Created
March 8, 2016 16:05
-
-
Save rmzelle/256cbef7d594b518ac46 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##gff-version 3 | |
##sequence-region scaffold10_size537661 1 537661 | |
scaffold10_size537661 AUGUSTUS gene 745 1674 . - 0 ID=95_g3568;Name=RTM1_1;gene_id=95_g3568 | |
scaffold10_size537661 AUGUSTUS mRNA 745 1674 . - 0 Parent=95_g3568;gene_id=95_g3568;transcript_id=95_g3568.t1;product=Protein RTM1;locus_tag=95_g3568.1;accession=P40113;score=625;evalue=0.0;translation=MSNDSSGSEWELYRYTPSKGAAIALTVLFIVTTLIYSFQVVWDARKASKPEVDNPFDTPVDKCESITAISLGENYKKLTVRSTFSAFIPLFFGCIMEIVGYIARAVSSSNTKEIAPYVIQAVLLLIAPALYAATIYMLFGRLLHVMRCESLMIVSSRFGTSFFVFGDVVSFCLQAAGGGLMATVNGRTTGSNLITAGLVIQIVFFGVFIINEFRFSYSVARVCPFYRHISKKWWFLNLTLMLSSILIMVRSIVRLVEFVEGYDGFIISHEYFIYVFDAVPMLLAAIVFIVGSFFGNIFTTITECQSLKP | |
scaffold10_size537661 AUGUSTUS exon 748 1674 0.93 - 0 Parent=95_g3568;gene_id=95_g3568;transcript_id=95_g3568.t1 | |
scaffold10_size537661 AUGUSTUS CDS 748 1674 0.93 - 0 Parent=95_g3568;Name=RTM1_1;gene_id=95_g3568;transcript_id=95_g3568.t1 | |
scaffold10_size537661 AUGUSTUS gene 2374 4626 . - 0 ID=95_g3569;Name=-;gene_id=95_g3569 | |
scaffold10_size537661 AUGUSTUS mRNA 2374 4626 . - 0 Parent=95_g3569;gene_id=95_g3569;transcript_id=95_g3569.t1;product=putative transcriptional regulatory protein YER184C;locus_tag=95_g3569.1;accession=P39961;score=311;evalue=4e-92;translation=MIKGDEGNLSFYKQDDHILEQIKILKKSIDILEKLKLSVVTSSEYATNAIADINRQLETLLSFQHVNINSESVRTYKDNGALENQLLDGNYVAFDAFSTFENVSKGKAVDDIFGLYSPISVMSPKGICWLIQQLIVKSREKDTEETVYILLKFLDAGSSSYKWELIAPVYRLQFILIEFFGCKVPFTRNETLKEAIDAIPPSLRSELKEEGGNSIQDSRKLFMYCVKLLGKHFTSSRHLLRNINIFEQFFQVEELLSTLCYTFLENSLNFRLLDAEFLHDLLCFVKQRHWKDNSFIIGGVIAPLCRQVQDLGLSRWEYYLGMNEEHANPLREIWWDTYWWDKWYVVVTGKLPMIDSSTVTCLLPQQIMRLGVDDAMSSWQMLERVDFTYGSLKDHIMFGYIVLSISINDVFSNVLYNRKFTDYRVFSGITALDFKVVEELLTRINGIRKGFTVLKTTVVHELEKSLLDDDVFRFCIHFAYSRISCLRAIGNLLMRFKSIFHGTSSNLISDQIGECDKDILQTTVETFTFILRANDNIKIKEHVRPISEMGLNILLEAVKAPAGIDVYHISLFCGVASLFDRITCSENGAEKQYNHPGRIVLMEITCIFIFVRVCCLVYRQYKKVSKEELMAILTDFDHTTARFCNETLDIRSDLFQHIIRDKKKSDYHRDIIHGIEKVLGRDIITTIESCEREVISSDEYRQAQYMGNVATKDLDYLRYFLNLDIFPELNTDDELWDDLKEIDKYYCSSV | |
scaffold10_size537661 AUGUSTUS exon 2377 4626 0.97 - 0 Parent=95_g3569;gene_id=95_g3569;transcript_id=95_g3569.t1 | |
scaffold10_size537661 AUGUSTUS CDS 2377 4626 0.97 - 0 Parent=95_g3569;Name=-;gene_id=95_g3569;transcript_id=95_g3569.t1 | |
scaffold10_size537661 SignalP:4.1 signal_peptide 15225 15287 . - 0 product=putative signal peptide | |
scaffold10_size537661 AUGUSTUS gene 16555 17262 . + 0 ID=95_g3574;Name=-;gene_id=95_g3574 | |
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 Parent=95_g3574;gene_id=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV | |
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 Parent=95_g3574;gene_id=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Parent=95_g3574;Name=-;gene_id=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS gene 17890 19107 . + 0 ID=95_g3575;Name=ECM34;gene_id=95_g3575 | |
scaffold10_size537661 AUGUSTUS mRNA 17890 19107 . + 0 Parent=95_g3575;gene_id=95_g3575;transcript_id=95_g3575.t1;product=Protein ECM34;locus_tag=95_g3575.1;accession=P38728;score=353;evalue=2e-120;translation=MEGRKSEDEKNEAALVCDVFESSNAKLPKNVFRSSFTWYCYEVINRSAFHIWLLLCLTLIVGWKVFSGIGGRRPSDSNMDGPQTKHKRNPGFLRRHSTIVILVISLAVSFSWEAFKMYRERTFGKQITQFAKEIIKSAPSTDMESWDRVAADFNSYMYENKLWNTEYFFFDGSSCHVAFRRTLLWISSRIDGDYKIEYFRKHPYIEEALKVYFAEVDRKWNLNTSQQLLSNISVGNIKLPGQSCRFKLFHIFKKVMKQRFSQVATVIFFVMSIRSPRNLGFFFTLALFVVLVCSQEWFSFEMNRSCSMKVEHRMQFLSTIISEHQKSDVNCWDQIAKKMNVYLFEQKVSGSDVFFLDGADCERFFERNFLRYLPSRKSSHPDLPIAELLPYIRKADIACAGKQLI | |
scaffold10_size537661 AUGUSTUS exon 17890 19104 0.97 + 0 Parent=95_g3575;gene_id=95_g3575;transcript_id=95_g3575.t1 | |
scaffold10_size537661 AUGUSTUS CDS 17890 19104 0.97 + 0 Parent=95_g3575;Name=ECM34;gene_id=95_g3575;transcript_id=95_g3575.t1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##gff-version 3 | |
scaffold10_size537661 AUGUSTUS gene 745 1674 . - 0 ID=95_g3568;Name=RTM1_1;gene_id=95_g3568 | |
scaffold10_size537661 AUGUSTUS mRNA 745 1674 . - 0 gene_id=95_g3568;Parent=95_g3568;transcript_id=95_g3568.t1;product=Protein RTM1;locus_tag=95_g3568.1;accession=P40113;score=625;evalue=0.0;translation=MSNDSSGSEWELYRYTPSKGAAIALTVLFIVTTLIYSFQVVWDARKASKPEVDNPFDTPVDKCESITAISLGENYKKLTVRSTFSAFIPLFFGCIMEIVGYIARAVSSSNTKEIAPYVIQAVLLLIAPALYAATIYMLFGRLLHVMRCESLMIVSSRFGTSFFVFGDVVSFCLQAAGGGLMATVNGRTTGSNLITAGLVIQIVFFGVFIINEFRFSYSVARVCPFYRHISKKWWFLNLTLMLSSILIMVRSIVRLVEFVEGYDGFIISHEYFIYVFDAVPMLLAAIVFIVGSFFGNIFTTITECQSLKP | |
scaffold10_size537661 AUGUSTUS exon 748 1674 0.93 - 0 gene_id=95_g3568;Parent=95_g3568;transcript_id=95_g3568.t1 | |
scaffold10_size537661 AUGUSTUS CDS 748 1674 0.93 - 0 Name=RTM1_1;gene_id=95_g3568;Parent=95_g3568;transcript_id=95_g3568.t1 | |
scaffold10_size537661 AUGUSTUS gene 2374 4626 . - 0 ID=95_g3569;Name=-;gene_id=95_g3569 | |
scaffold10_size537661 AUGUSTUS mRNA 2374 4626 . - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1;product=putative transcriptional regulatory protein YER184C;locus_tag=95_g3569.1;accession=P39961;score=311;evalue=4e-92;translation=MIKGDEGNLSFYKQDDHILEQIKILKKSIDILEKLKLSVVTSSEYATNAIADINRQLETLLSFQHVNINSESVRTYKDNGALENQLLDGNYVAFDAFSTFENVSKGKAVDDIFGLYSPISVMSPKGICWLIQQLIVKSREKDTEETVYILLKFLDAGSSSYKWELIAPVYRLQFILIEFFGCKVPFTRNETLKEAIDAIPPSLRSELKEEGGNSIQDSRKLFMYCVKLLGKHFTSSRHLLRNINIFEQFFQVEELLSTLCYTFLENSLNFRLLDAEFLHDLLCFVKQRHWKDNSFIIGGVIAPLCRQVQDLGLSRWEYYLGMNEEHANPLREIWWDTYWWDKWYVVVTGKLPMIDSSTVTCLLPQQIMRLGVDDAMSSWQMLERVDFTYGSLKDHIMFGYIVLSISINDVFSNVLYNRKFTDYRVFSGITALDFKVVEELLTRINGIRKGFTVLKTTVVHELEKSLLDDDVFRFCIHFAYSRISCLRAIGNLLMRFKSIFHGTSSNLISDQIGECDKDILQTTVETFTFILRANDNIKIKEHVRPISEMGLNILLEAVKAPAGIDVYHISLFCGVASLFDRITCSENGAEKQYNHPGRIVLMEITCIFIFVRVCCLVYRQYKKVSKEELMAILTDFDHTTARFCNETLDIRSDLFQHIIRDKKKSDYHRDIIHGIEKVLGRDIITTIESCEREVISSDEYRQAQYMGNVATKDLDYLRYFLNLDIFPELNTDDELWDDLKEIDKYYCSSV | |
scaffold10_size537661 AUGUSTUS exon 2377 4626 0.97 - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1 | |
scaffold10_size537661 AUGUSTUS CDS 2377 4626 0.97 - 0 Name=-;gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1 | |
scaffold10_size537661 SignalP:4.1 signal_peptide 15225 15287 . - 0 product=putative signal peptide | |
scaffold10_size537661 AUGUSTUS gene 16555 17262 . + 0 ID=95_g3574;Name=-;gene_id=95_g3574 | |
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV | |
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Name=-;gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS gene 17890 19107 . + 0 ID=95_g3575;Name=ECM34;gene_id=95_g3575 | |
scaffold10_size537661 AUGUSTUS mRNA 17890 19107 . + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1;product=Protein ECM34;locus_tag=95_g3575.1;accession=P38728;score=353;evalue=2e-120;translation=MEGRKSEDEKNEAALVCDVFESSNAKLPKNVFRSSFTWYCYEVINRSAFHIWLLLCLTLIVGWKVFSGIGGRRPSDSNMDGPQTKHKRNPGFLRRHSTIVILVISLAVSFSWEAFKMYRERTFGKQITQFAKEIIKSAPSTDMESWDRVAADFNSYMYENKLWNTEYFFFDGSSCHVAFRRTLLWISSRIDGDYKIEYFRKHPYIEEALKVYFAEVDRKWNLNTSQQLLSNISVGNIKLPGQSCRFKLFHIFKKVMKQRFSQVATVIFFVMSIRSPRNLGFFFTLALFVVLVCSQEWFSFEMNRSCSMKVEHRMQFLSTIISEHQKSDVNCWDQIAKKMNVYLFEQKVSGSDVFFLDGADCERFFERNFLRYLPSRKSSHPDLPIAELLPYIRKADIACAGKQLI | |
scaffold10_size537661 AUGUSTUS exon 17890 19104 0.97 + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1 | |
scaffold10_size537661 AUGUSTUS CDS 17890 19104 0.97 + 0 Name=ECM34;gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##gff-version 3 | |
scaffold10_size537661 AUGUSTUS gene 745 1674 . - 0 ID=95_g3568;Name=RTM1_1;gene_id=95_g3568 | |
scaffold10_size537661 AUGUSTUS mRNA 2374 4626 . - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1;product=putative transcriptional regulatory protein YER184C;locus_tag=95_g3569.1;accession=P39961;score=311;evalue=4e-92;translation=MIKGDEGNLSFYKQDDHILEQIKILKKSIDILEKLKLSVVTSSEYATNAIADINRQLETLLSFQHVNINSESVRTYKDNGALENQLLDGNYVAFDAFSTFENVSKGKAVDDIFGLYSPISVMSPKGICWLIQQLIVKSREKDTEETVYILLKFLDAGSSSYKWELIAPVYRLQFILIEFFGCKVPFTRNETLKEAIDAIPPSLRSELKEEGGNSIQDSRKLFMYCVKLLGKHFTSSRHLLRNINIFEQFFQVEELLSTLCYTFLENSLNFRLLDAEFLHDLLCFVKQRHWKDNSFIIGGVIAPLCRQVQDLGLSRWEYYLGMNEEHANPLREIWWDTYWWDKWYVVVTGKLPMIDSSTVTCLLPQQIMRLGVDDAMSSWQMLERVDFTYGSLKDHIMFGYIVLSISINDVFSNVLYNRKFTDYRVFSGITALDFKVVEELLTRINGIRKGFTVLKTTVVHELEKSLLDDDVFRFCIHFAYSRISCLRAIGNLLMRFKSIFHGTSSNLISDQIGECDKDILQTTVETFTFILRANDNIKIKEHVRPISEMGLNILLEAVKAPAGIDVYHISLFCGVASLFDRITCSENGAEKQYNHPGRIVLMEITCIFIFVRVCCLVYRQYKKVSKEELMAILTDFDHTTARFCNETLDIRSDLFQHIIRDKKKSDYHRDIIHGIEKVLGRDIITTIESCEREVISSDEYRQAQYMGNVATKDLDYLRYFLNLDIFPELNTDDELWDDLKEIDKYYCSSV | |
scaffold10_size537661 AUGUSTUS gene 2374 4626 . - 0 ID=95_g3569;Name=-;gene_id=95_g3569 | |
scaffold10_size537661 AUGUSTUS exon 2377 4626 0.97 - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1 | |
scaffold10_size537661 AUGUSTUS CDS 2377 4626 0.97 - 0 Name=-;gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1 | |
scaffold10_size537661 SignalP:4.1 signal_peptide 15225 15287 . - 0 product=putative signal peptide | |
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV | |
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Name=-;gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS gene 16555 17262 . + 0 ID=95_g3574;Name=-;gene_id=95_g3574 | |
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV | |
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Name=-;gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1 | |
scaffold10_size537661 AUGUSTUS gene 17890 19107 . + 0 ID=95_g3575;Name=ECM34;gene_id=95_g3575 | |
scaffold10_size537661 AUGUSTUS mRNA 17890 19107 . + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1;product=Protein ECM34;locus_tag=95_g3575.1;accession=P38728;score=353;evalue=2e-120;translation=MEGRKSEDEKNEAALVCDVFESSNAKLPKNVFRSSFTWYCYEVINRSAFHIWLLLCLTLIVGWKVFSGIGGRRPSDSNMDGPQTKHKRNPGFLRRHSTIVILVISLAVSFSWEAFKMYRERTFGKQITQFAKEIIKSAPSTDMESWDRVAADFNSYMYENKLWNTEYFFFDGSSCHVAFRRTLLWISSRIDGDYKIEYFRKHPYIEEALKVYFAEVDRKWNLNTSQQLLSNISVGNIKLPGQSCRFKLFHIFKKVMKQRFSQVATVIFFVMSIRSPRNLGFFFTLALFVVLVCSQEWFSFEMNRSCSMKVEHRMQFLSTIISEHQKSDVNCWDQIAKKMNVYLFEQKVSGSDVFFLDGADCERFFERNFLRYLPSRKSSHPDLPIAELLPYIRKADIACAGKQLI | |
scaffold10_size537661 AUGUSTUS exon 17890 19104 0.97 + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1 | |
scaffold10_size537661 AUGUSTUS CDS 17890 19104 0.97 + 0 Name=ECM34;gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gffutils | |
import os | |
def writeGFF(gffutilsDB, outputFilename): | |
with open(outputFilename, 'w') as fout: | |
fout.write('##gff-version 3' + '\n') | |
for feature in gffutilsDB.all_features(order_by=('seqid','start')): | |
fout.write(str(feature) + '\n') | |
return | |
def gffExtract(): | |
genomeGFF = 'test-small.gff' | |
genomeGFFdb = 'test-small.db' | |
db = gffutils.create_db(genomeGFF, dbfn=genomeGFFdb, force=True, keep_order=True, merge_strategy="warning") | |
writeGFF(db, "test-unchanged.gff") | |
def generatorModifyFeatures(featureFamilies): | |
for featureFamily in featureFamilies: | |
# featureFamily[0] is the parent (featureType "gene") | |
if featureFamily[0].attributes["Name"][0] == "-": | |
for feature in featureFamily: | |
print feature | |
yield feature | |
featureFamilies = list(db.iter_by_parent_childs()) | |
db.update(generatorModifyFeatures(featureFamilies), merge_strategy="replace") | |
writeGFF(db, "test-updated.gff") | |
gffExtract() |
Also, "test-unchanged.gff" and "test-updated.gff" turn out the same when I just use:
def generatorModifyFeatures(featureFamilies):
for featureFamily in featureFamilies:
for feature in featureFamily:
yield feature
The input file ("test-small.gff") validates according to http://genometools.org/cgi-bin/gff3validator.cgi.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Difference between "test-unchanged.gff" (left) and "test-updated.gff" (right):