Created
February 24, 2013 19:03
-
-
Save lomereiter/5025051 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
unittest { | |
import std.conv; | |
import bio.gff3.reader; | |
import bio.gtf.reader; | |
import bio.gff3.writer; | |
import bio.gff3.validator; | |
void main() { | |
// Keeps all comments and pragmas -- this is cheap. | |
// If user doesn't need them -- std.algorithm.filter to the rescue! | |
// | |
// One simple constructor with filename is enough. | |
auto gff = new Gff3Reader("data/test.gff3"); | |
// GtfReader should also be presented in bio.gtf.reader | |
// | |
// These two should probably be inherited from some abstract GffGtfReader, | |
// and only call set_input_... damn, I can't recall what this method is called, | |
// that's why two classes are needed! *looks at the code* set_data_format -- | |
// in the constructor with different parameters. | |
auto gtf = new GtfReader("data/test.gtf"); | |
auto validator = new Gff3Validator(); | |
validator.setStringency(ValidationStringency.silent); | |
validator.setOutputFilename("validation.log"); // print there instead of stderr | |
// enum ValidationStringency { | |
// silent, /// skip validation altogether | |
// lenient, /// print warnings | |
// strict /// throw exceptions | |
// } | |
gff.setValidator(validator); // will be used in later calls | |
// default stringency should be 'strict' | |
auto records = gff.records; | |
assert(record.front.is_comment); | |
assert(!record.front.is_pragma); | |
assert(!record.front.is_regular); | |
records = gff.records; // gff.records gives a brand-new range each time | |
// Having such trivial things might seem weird, but this makes for consistent syntax, | |
// as for binary formats one can't just convert each record to a string and print it. | |
// Among other advantages, Gff3Writer may use less allocations, | |
// and users may not forget to print a newline in some place. | |
{ | |
auto w = new Gff3Writer(gff.filename ~ ".modified"); | |
scope(exit) { | |
foreach (fr; gff.fasta_records) | |
w.writeFastaRecord(fr); | |
w.close(); | |
} | |
foreach (record; records) { | |
record.start += 1; // Should be integer, not string! | |
// If parsing time is a big deal - store it in string internally | |
// but provide access via property. | |
// Same for end. | |
record.strand = '+'; // Strand should be of type 'char' and checked for sanity | |
// (only '+', '-', '.', '?' values should be allowed). | |
// | |
// Or, better yet, give strand a type and use 'alias this' magic: | |
// struct Strand { char strand; alias strand this; | |
// bool is_unknown() @property const { ... } <- '?' | |
// bool is_undefined() @property const { ... } } <- '.' | |
// And then implement property of this type. | |
// Explicit conversion is fine, but the word 'attributes' is way too long to type. | |
// | |
// It's easy to use opIndexAssign to delegate this to the dictionary. | |
// Clearly, it should have at least two overloads -- for strings and arrays of strings. | |
record["Awwww1"] = to!string(12345); | |
record["Awwww2"] = ["1", "2", "3"]; | |
assert(record["Awwww1"] == "12345"); // checks that length is 1 and compares first element | |
// i.e. opEquals should also have two overloads -- for strings and arrays of strings | |
w.writeRecord(record); | |
} | |
} | |
// How to access features? | |
// I looked at FeatureRange, and almost all of its methods | |
// do nothing but just delegate to underlying record range. | |
// | |
// Therefore, it makes sense to have a simple function | |
// which will turn record range into a feature range, with signature | |
// auto groupIntoFeatures(R)(R records, bool link=false, size_t cache_size=1024); | |
auto features = gff.records.groupIntoFeatures(); | |
// provide a few shortcuts as well in Gff3Reader interface | |
features = gff.features; // no cache size parameter for not linked features | |
features = gff.linkedFeatures(); // use defaults | |
features = gff.linkedFeatures(15000); // increased cache size | |
foreach (feature; features) { | |
writeln(feature.parent.id); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment