/// <summary>
/// Initializes a new instance of the FastASequencePositionParser class by
/// loading the specified stream.
/// </summary>
/// <param name="stream">Stream to load</param>
/// <param name="reverseReversePairedRead">Flag to indicate to get the forward strand sequence of a reverse paired read.</param>
public FastASequencePositionParser(Stream stream, bool reverseReversePairedRead = false)
{
if (stream == null)
{
throw new ArgumentNullException("stream");
}
this.fastaParser = new FastAParser();
this.stream = stream;
this.reverseReversePairedRead = reverseReversePairedRead;
}
public void TestMethod1()
{
FastAParser Query = new FastAParser(@"Query.txt");
var Mismatches_query = Mismatch_Test.GetMismatches(Query.Parse().First());
Assert.AreEqual(2,Mismatches_query.Count());
var first = Mismatches_query.First();
Assert.AreEqual(0, first.QuerySequenceOffset);
var last = Mismatches_query.Last();
Assert.AreEqual(179, last.QuerySequenceOffset);
}
/// <summary>
/// Executes the cross-link search for LC-IMS-TOF data.
/// </summary>
/// <param name="settings">Settings object to control parameters for cross-linking.</param>
/// <param name="fastAFile">The FileInfo object for the FASTA file containg all protein sequences you want to search.</param>
/// <param name="featureFile">The FileInfo object for the LC-IMS-MS features file, created by the LC-IMS-MS Feature Finder. (email [email protected] for more info)</param>
/// <param name="peaksFile">The FileInfo object for the Isotopic Peaks file, created by DeconTools. (email [email protected] for more info)</param>
/// <returns>An enumerable of CrossLinkResult objects.</returns>
public static IList<CrossLinkResult> Execute(CrossLinkSettings settings, FileInfo fastAFile, FileInfo featureFile, FileInfo peaksFile)
{
IEnumerable<ISequence> sequenceEnumerable;
List<LcImsMsFeature> featureList;
List<IsotopicPeak> peakEnumerable;
Console.WriteLine();
try
{
// Read in FASTA File
var fastAParser = new FastAParser(fastAFile.FullName);
sequenceEnumerable = fastAParser.Parse();
Console.WriteLine("FASTA file: " + GetRelativePath(fastAFile.FullName));
}
catch (Exception ex)
{
Console.WriteLine("Error reading the FASTA file: " + ex.Message);
throw;
}
try
{
// Read in LC-IMS-MS Features
featureList = LcImsMsFeatureReader.ReadFile(featureFile);
Console.WriteLine("Features file: " + GetRelativePath(featureFile.FullName));
}
catch (Exception ex)
{
Console.WriteLine("Error reading the LCMSFeatures file: " + ex.Message);
throw;
}
try
{
// Read in Isotopic Peaks (not Isotopic Profile)
peakEnumerable = IsotopicPeakReader.ReadFile(peaksFile);
Console.WriteLine("Peaks file: " + GetRelativePath(peaksFile.FullName));
}
catch (Exception ex)
{
Console.WriteLine("Error reading the Isotopic Peaks file: " + ex.Message);
throw;
}
// Now call the executor that expects the opbjects instead of the file locations
return Execute(settings, sequenceEnumerable, featureList, peakEnumerable);
}
public void ValidateFastaAFileSequence()
{
// Gets the expected sequence from the Xml
string expectedSequence = this.utilityObj.xmlUtil.GetTextValue(
Constants.SimpleFastaNodeName, Constants.ExpectedSequenceNode);
string fastAFilePath = this.utilityObj.xmlUtil.GetTextValue(
Constants.SimpleFastaNodeName, Constants.FilePathNode);
string alphabet = this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName,
Constants.AlphabetNameNode);
Assert.IsTrue(File.Exists(fastAFilePath));
// Logs information to the log file
ApplicationLog.WriteLine(string.Concat(
"Sequence BVT: The File exist in the Path ", fastAFilePath));
IEnumerable<ISequence> sequence = null;
FastAParser parser = new FastAParser();
{
// Parse a FastA file Using Parse method and convert the same to sequence.
parser.Alphabet = Utility.GetAlphabet(alphabet);
sequence = parser.Parse(fastAFilePath);
Assert.IsNotNull(sequence);
Sequence fastASequence = (Sequence)sequence.ElementAt(0);
Assert.IsNotNull(fastASequence);
char[] seqString = sequence.ElementAt(0).Select(a => (char)a).ToArray();
string newSequence = new string(seqString);
Assert.AreEqual(expectedSequence, newSequence);
ApplicationLog.WriteLine(string.Concat(
"Sequence BVT: The Sequence is as expected."));
byte[] tmpEncodedSeq = new byte[fastASequence.Count];
(fastASequence as IEnumerable<byte>).ToArray().CopyTo(tmpEncodedSeq, 0);
Assert.AreEqual(expectedSequence.Length, tmpEncodedSeq.Length);
ApplicationLog.WriteLine(string.Concat(
"Sequence BVT: Sequence Length is as expected."));
Assert.AreEqual(this.utilityObj.xmlUtil.GetTextValue(
Constants.SimpleProteinAlphabetNode, Constants.SequenceIdNode), fastASequence.ID);
ApplicationLog.WriteLine(string.Concat(
"Sequence BVT: SequenceID is as expected."));
Assert.AreEqual(fastASequence.Alphabet.Name,
this.utilityObj.xmlUtil.GetTextValue(Constants.SimpleFastaNodeName, Constants.AlphabetNameNode));
ApplicationLog.WriteLine(string.Concat(
"Sequence BVT: Sequence Alphabet is as expected."));
}
}
private void ValidateFindMatchSuffixGeneralTestCases(string nodeName, bool isFilePath,
AdditionalParameters additionalParam,
PropertyParameters propParam)
{
ISequence referenceSeq;
var searchSeqList = new List<ISequence>();
if (isFilePath)
{
// Gets the reference sequence from the FastA file
string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
Assert.IsNotNull(filePath);
ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", filePath));
var parser = new FastAParser();
IEnumerable<ISequence> referenceSeqList = parser.Parse(filePath);
var byteList = new List<Byte>();
foreach (ISequence seq in referenceSeqList)
{
byteList.AddRange(seq);
byteList.Add((byte) '+');
}
referenceSeq = new Sequence(referenceSeqList.First().Alphabet.GetMummerAlphabet(),
byteList.ToArray());
// Gets the query sequence from the FastA file
string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode);
Assert.IsNotNull(queryFilePath);
ApplicationLog.WriteLine(string.Format(null, "NUCmer P1 : Successfully validated the File Path '{0}'.", queryFilePath));
var queryParserObj = new FastAParser();
IEnumerable<ISequence> querySeqList = queryParserObj.Parse(queryFilePath);
searchSeqList.AddRange(querySeqList);
}
else
{
// Gets the reference & search sequences from the configuration file
string[] referenceSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ReferenceSequencesNode);
string[] searchSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SearchSequencesNode);
IAlphabet seqAlphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode));
var refSeqList = referenceSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))).Cast<ISequence>().ToList();
var byteList = new List<Byte>();
foreach (ISequence seq in refSeqList)
{
byteList.AddRange(seq);
byteList.Add((byte) '+');
}
referenceSeq = new Sequence(refSeqList.First().Alphabet.GetMummerAlphabet(), byteList.ToArray());
searchSeqList.AddRange(searchSequences.Select(t => new Sequence(seqAlphabet, Encoding.ASCII.GetBytes(t))));
}
string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMLengthNode);
// Builds the suffix for the reference sequence passed.
var suffixTreeBuilder = new MultiWaySuffixTree(referenceSeq as Sequence)
{
MinLengthOfMatch =
long.Parse(mumLength, null)
};
var matches = searchSeqList.ToDictionary(t => t, suffixTreeBuilder.SearchMatchesUniqueInReference);
var mums = new List<Match>();
foreach (var a in matches.Values)
{
mums.AddRange(a);
}
switch (additionalParam)
{
case AdditionalParameters.FindUniqueMatches:
// Validates the Unique Matches.
ApplicationLog.WriteLine("NUCmer P1 : Validating the Unique Matches");
Assert.IsTrue(this.ValidateUniqueMatches(mums, nodeName, isFilePath));
ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences.");
break;
case AdditionalParameters.PerformClusterBuilder:
// Validates the Unique Matches.
ApplicationLog.WriteLine(
"NUCmer P1 : Validating the Unique Matches using Cluster Builder");
Assert.IsTrue(this.ValidateClusterBuilderMatches(mums, nodeName, propParam));
ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the cluster builder matches for the sequences.");
break;
default:
break;
}
ApplicationLog.WriteLine("NUCmer P1 : Successfully validated the all the unique matches for the sequences.");
}
请发表评论