Created
November 29, 2012 17:00
-
-
Save radaniba/4170388 to your computer and use it in GitHub Desktop.
Multifasta Parser is a new parser for fasta files. basically allows you to extract fasta sequences from multifasta file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| to compile: | |
| $gmcs multifasta-parser.cs -out:multifasta-parser | |
| to run: | |
| $mono multifasta-parser [/path/multifasta-file] | |
| */ | |
| using System; | |
| using System.IO; | |
| namespace BioMono.Fasta { | |
| public class CutFasta | |
| { | |
| // reader for multifasta text file | |
| StreamReader sr; | |
| // writer | |
| StreamWriter sw; | |
| // destDir: multifasta output directory | |
| string destDir; | |
| // constructor | |
| public CutFasta() | |
| { | |
| destDir = ""; | |
| } | |
| //method to set multifasta file, no memory used | |
| public void Load(string sourceFile) | |
| { | |
| destDir = Path.GetDirectoryName(sourceFile); | |
| sr = File.OpenText(Path.GetFullPath(sourceFile)); | |
| } | |
| //method to extract sequences from multifasta file | |
| public void Cut() | |
| { | |
| string line = ""; | |
| int index = 0; | |
| int sequenceFastaSegmentCounter = -1; | |
| char[] tokens = {'>'}; | |
| string seqName = ""; | |
| try { | |
| do { | |
| line = sr.ReadLine(); | |
| index = line.IndexOf(tokens[0]); | |
| if(index != -1) { | |
| sequenceFastaSegmentCounter++; | |
| //seqName = line.Substring((index+1),(line.Length-index)); | |
| seqName = line.Substring((index+1)); | |
| Console.WriteLine("seqName: {0}",seqName); | |
| index = seqName.IndexOf(' '); | |
| if(index != -1) | |
| seqName = seqName.Substring(0,index); | |
| if(sequenceFastaSegmentCounter > 0) | |
| sw.Close(); | |
| //string newfastafile = destDir + "/" + sequenceFastaSegmentCounter + ".fasta"; | |
| string newfastafile = destDir + "/" + seqName + ".fasta"; | |
| Console.WriteLine("Creating new fasta file {0}",newfastafile); | |
| Console.WriteLine("Short seqName: {0}",seqName); | |
| sw = File.CreateText(newfastafile); | |
| sw.WriteLine(line); | |
| } | |
| else | |
| sw.WriteLine(line); | |
| } | |
| while(line!=null); | |
| } | |
| catch(Exception e) { ; } | |
| sr.Close(); | |
| sw.Close(); | |
| } | |
| } | |
| // testing main class | |
| public class testCutFasta | |
| { | |
| public static void Main(string[] args) | |
| { | |
| CutFasta cf = new CutFasta(); | |
| Console.WriteLine("Loading fasta file {0}", args[0]); | |
| cf.Load(args[0]); | |
| Console.WriteLine("Extracting from fasta file..."); | |
| cf.Cut(); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment