Skip to content

Instantly share code, notes, and snippets.

@rmzelle
Created March 8, 2016 16:05
Show Gist options
  • Save rmzelle/256cbef7d594b518ac46 to your computer and use it in GitHub Desktop.
Save rmzelle/256cbef7d594b518ac46 to your computer and use it in GitHub Desktop.
##gff-version 3
##sequence-region scaffold10_size537661 1 537661
scaffold10_size537661 AUGUSTUS gene 745 1674 . - 0 ID=95_g3568;Name=RTM1_1;gene_id=95_g3568
scaffold10_size537661 AUGUSTUS mRNA 745 1674 . - 0 Parent=95_g3568;gene_id=95_g3568;transcript_id=95_g3568.t1;product=Protein RTM1;locus_tag=95_g3568.1;accession=P40113;score=625;evalue=0.0;translation=MSNDSSGSEWELYRYTPSKGAAIALTVLFIVTTLIYSFQVVWDARKASKPEVDNPFDTPVDKCESITAISLGENYKKLTVRSTFSAFIPLFFGCIMEIVGYIARAVSSSNTKEIAPYVIQAVLLLIAPALYAATIYMLFGRLLHVMRCESLMIVSSRFGTSFFVFGDVVSFCLQAAGGGLMATVNGRTTGSNLITAGLVIQIVFFGVFIINEFRFSYSVARVCPFYRHISKKWWFLNLTLMLSSILIMVRSIVRLVEFVEGYDGFIISHEYFIYVFDAVPMLLAAIVFIVGSFFGNIFTTITECQSLKP
scaffold10_size537661 AUGUSTUS exon 748 1674 0.93 - 0 Parent=95_g3568;gene_id=95_g3568;transcript_id=95_g3568.t1
scaffold10_size537661 AUGUSTUS CDS 748 1674 0.93 - 0 Parent=95_g3568;Name=RTM1_1;gene_id=95_g3568;transcript_id=95_g3568.t1
scaffold10_size537661 AUGUSTUS gene 2374 4626 . - 0 ID=95_g3569;Name=-;gene_id=95_g3569
scaffold10_size537661 AUGUSTUS mRNA 2374 4626 . - 0 Parent=95_g3569;gene_id=95_g3569;transcript_id=95_g3569.t1;product=putative transcriptional regulatory protein YER184C;locus_tag=95_g3569.1;accession=P39961;score=311;evalue=4e-92;translation=MIKGDEGNLSFYKQDDHILEQIKILKKSIDILEKLKLSVVTSSEYATNAIADINRQLETLLSFQHVNINSESVRTYKDNGALENQLLDGNYVAFDAFSTFENVSKGKAVDDIFGLYSPISVMSPKGICWLIQQLIVKSREKDTEETVYILLKFLDAGSSSYKWELIAPVYRLQFILIEFFGCKVPFTRNETLKEAIDAIPPSLRSELKEEGGNSIQDSRKLFMYCVKLLGKHFTSSRHLLRNINIFEQFFQVEELLSTLCYTFLENSLNFRLLDAEFLHDLLCFVKQRHWKDNSFIIGGVIAPLCRQVQDLGLSRWEYYLGMNEEHANPLREIWWDTYWWDKWYVVVTGKLPMIDSSTVTCLLPQQIMRLGVDDAMSSWQMLERVDFTYGSLKDHIMFGYIVLSISINDVFSNVLYNRKFTDYRVFSGITALDFKVVEELLTRINGIRKGFTVLKTTVVHELEKSLLDDDVFRFCIHFAYSRISCLRAIGNLLMRFKSIFHGTSSNLISDQIGECDKDILQTTVETFTFILRANDNIKIKEHVRPISEMGLNILLEAVKAPAGIDVYHISLFCGVASLFDRITCSENGAEKQYNHPGRIVLMEITCIFIFVRVCCLVYRQYKKVSKEELMAILTDFDHTTARFCNETLDIRSDLFQHIIRDKKKSDYHRDIIHGIEKVLGRDIITTIESCEREVISSDEYRQAQYMGNVATKDLDYLRYFLNLDIFPELNTDDELWDDLKEIDKYYCSSV
scaffold10_size537661 AUGUSTUS exon 2377 4626 0.97 - 0 Parent=95_g3569;gene_id=95_g3569;transcript_id=95_g3569.t1
scaffold10_size537661 AUGUSTUS CDS 2377 4626 0.97 - 0 Parent=95_g3569;Name=-;gene_id=95_g3569;transcript_id=95_g3569.t1
scaffold10_size537661 SignalP:4.1 signal_peptide 15225 15287 . - 0 product=putative signal peptide
scaffold10_size537661 AUGUSTUS gene 16555 17262 . + 0 ID=95_g3574;Name=-;gene_id=95_g3574
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 Parent=95_g3574;gene_id=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 Parent=95_g3574;gene_id=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Parent=95_g3574;Name=-;gene_id=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS gene 17890 19107 . + 0 ID=95_g3575;Name=ECM34;gene_id=95_g3575
scaffold10_size537661 AUGUSTUS mRNA 17890 19107 . + 0 Parent=95_g3575;gene_id=95_g3575;transcript_id=95_g3575.t1;product=Protein ECM34;locus_tag=95_g3575.1;accession=P38728;score=353;evalue=2e-120;translation=MEGRKSEDEKNEAALVCDVFESSNAKLPKNVFRSSFTWYCYEVINRSAFHIWLLLCLTLIVGWKVFSGIGGRRPSDSNMDGPQTKHKRNPGFLRRHSTIVILVISLAVSFSWEAFKMYRERTFGKQITQFAKEIIKSAPSTDMESWDRVAADFNSYMYENKLWNTEYFFFDGSSCHVAFRRTLLWISSRIDGDYKIEYFRKHPYIEEALKVYFAEVDRKWNLNTSQQLLSNISVGNIKLPGQSCRFKLFHIFKKVMKQRFSQVATVIFFVMSIRSPRNLGFFFTLALFVVLVCSQEWFSFEMNRSCSMKVEHRMQFLSTIISEHQKSDVNCWDQIAKKMNVYLFEQKVSGSDVFFLDGADCERFFERNFLRYLPSRKSSHPDLPIAELLPYIRKADIACAGKQLI
scaffold10_size537661 AUGUSTUS exon 17890 19104 0.97 + 0 Parent=95_g3575;gene_id=95_g3575;transcript_id=95_g3575.t1
scaffold10_size537661 AUGUSTUS CDS 17890 19104 0.97 + 0 Parent=95_g3575;Name=ECM34;gene_id=95_g3575;transcript_id=95_g3575.t1
##gff-version 3
scaffold10_size537661 AUGUSTUS gene 745 1674 . - 0 ID=95_g3568;Name=RTM1_1;gene_id=95_g3568
scaffold10_size537661 AUGUSTUS mRNA 745 1674 . - 0 gene_id=95_g3568;Parent=95_g3568;transcript_id=95_g3568.t1;product=Protein RTM1;locus_tag=95_g3568.1;accession=P40113;score=625;evalue=0.0;translation=MSNDSSGSEWELYRYTPSKGAAIALTVLFIVTTLIYSFQVVWDARKASKPEVDNPFDTPVDKCESITAISLGENYKKLTVRSTFSAFIPLFFGCIMEIVGYIARAVSSSNTKEIAPYVIQAVLLLIAPALYAATIYMLFGRLLHVMRCESLMIVSSRFGTSFFVFGDVVSFCLQAAGGGLMATVNGRTTGSNLITAGLVIQIVFFGVFIINEFRFSYSVARVCPFYRHISKKWWFLNLTLMLSSILIMVRSIVRLVEFVEGYDGFIISHEYFIYVFDAVPMLLAAIVFIVGSFFGNIFTTITECQSLKP
scaffold10_size537661 AUGUSTUS exon 748 1674 0.93 - 0 gene_id=95_g3568;Parent=95_g3568;transcript_id=95_g3568.t1
scaffold10_size537661 AUGUSTUS CDS 748 1674 0.93 - 0 Name=RTM1_1;gene_id=95_g3568;Parent=95_g3568;transcript_id=95_g3568.t1
scaffold10_size537661 AUGUSTUS gene 2374 4626 . - 0 ID=95_g3569;Name=-;gene_id=95_g3569
scaffold10_size537661 AUGUSTUS mRNA 2374 4626 . - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1;product=putative transcriptional regulatory protein YER184C;locus_tag=95_g3569.1;accession=P39961;score=311;evalue=4e-92;translation=MIKGDEGNLSFYKQDDHILEQIKILKKSIDILEKLKLSVVTSSEYATNAIADINRQLETLLSFQHVNINSESVRTYKDNGALENQLLDGNYVAFDAFSTFENVSKGKAVDDIFGLYSPISVMSPKGICWLIQQLIVKSREKDTEETVYILLKFLDAGSSSYKWELIAPVYRLQFILIEFFGCKVPFTRNETLKEAIDAIPPSLRSELKEEGGNSIQDSRKLFMYCVKLLGKHFTSSRHLLRNINIFEQFFQVEELLSTLCYTFLENSLNFRLLDAEFLHDLLCFVKQRHWKDNSFIIGGVIAPLCRQVQDLGLSRWEYYLGMNEEHANPLREIWWDTYWWDKWYVVVTGKLPMIDSSTVTCLLPQQIMRLGVDDAMSSWQMLERVDFTYGSLKDHIMFGYIVLSISINDVFSNVLYNRKFTDYRVFSGITALDFKVVEELLTRINGIRKGFTVLKTTVVHELEKSLLDDDVFRFCIHFAYSRISCLRAIGNLLMRFKSIFHGTSSNLISDQIGECDKDILQTTVETFTFILRANDNIKIKEHVRPISEMGLNILLEAVKAPAGIDVYHISLFCGVASLFDRITCSENGAEKQYNHPGRIVLMEITCIFIFVRVCCLVYRQYKKVSKEELMAILTDFDHTTARFCNETLDIRSDLFQHIIRDKKKSDYHRDIIHGIEKVLGRDIITTIESCEREVISSDEYRQAQYMGNVATKDLDYLRYFLNLDIFPELNTDDELWDDLKEIDKYYCSSV
scaffold10_size537661 AUGUSTUS exon 2377 4626 0.97 - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1
scaffold10_size537661 AUGUSTUS CDS 2377 4626 0.97 - 0 Name=-;gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1
scaffold10_size537661 SignalP:4.1 signal_peptide 15225 15287 . - 0 product=putative signal peptide
scaffold10_size537661 AUGUSTUS gene 16555 17262 . + 0 ID=95_g3574;Name=-;gene_id=95_g3574
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Name=-;gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS gene 17890 19107 . + 0 ID=95_g3575;Name=ECM34;gene_id=95_g3575
scaffold10_size537661 AUGUSTUS mRNA 17890 19107 . + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1;product=Protein ECM34;locus_tag=95_g3575.1;accession=P38728;score=353;evalue=2e-120;translation=MEGRKSEDEKNEAALVCDVFESSNAKLPKNVFRSSFTWYCYEVINRSAFHIWLLLCLTLIVGWKVFSGIGGRRPSDSNMDGPQTKHKRNPGFLRRHSTIVILVISLAVSFSWEAFKMYRERTFGKQITQFAKEIIKSAPSTDMESWDRVAADFNSYMYENKLWNTEYFFFDGSSCHVAFRRTLLWISSRIDGDYKIEYFRKHPYIEEALKVYFAEVDRKWNLNTSQQLLSNISVGNIKLPGQSCRFKLFHIFKKVMKQRFSQVATVIFFVMSIRSPRNLGFFFTLALFVVLVCSQEWFSFEMNRSCSMKVEHRMQFLSTIISEHQKSDVNCWDQIAKKMNVYLFEQKVSGSDVFFLDGADCERFFERNFLRYLPSRKSSHPDLPIAELLPYIRKADIACAGKQLI
scaffold10_size537661 AUGUSTUS exon 17890 19104 0.97 + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1
scaffold10_size537661 AUGUSTUS CDS 17890 19104 0.97 + 0 Name=ECM34;gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1
##gff-version 3
scaffold10_size537661 AUGUSTUS gene 745 1674 . - 0 ID=95_g3568;Name=RTM1_1;gene_id=95_g3568
scaffold10_size537661 AUGUSTUS mRNA 2374 4626 . - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1;product=putative transcriptional regulatory protein YER184C;locus_tag=95_g3569.1;accession=P39961;score=311;evalue=4e-92;translation=MIKGDEGNLSFYKQDDHILEQIKILKKSIDILEKLKLSVVTSSEYATNAIADINRQLETLLSFQHVNINSESVRTYKDNGALENQLLDGNYVAFDAFSTFENVSKGKAVDDIFGLYSPISVMSPKGICWLIQQLIVKSREKDTEETVYILLKFLDAGSSSYKWELIAPVYRLQFILIEFFGCKVPFTRNETLKEAIDAIPPSLRSELKEEGGNSIQDSRKLFMYCVKLLGKHFTSSRHLLRNINIFEQFFQVEELLSTLCYTFLENSLNFRLLDAEFLHDLLCFVKQRHWKDNSFIIGGVIAPLCRQVQDLGLSRWEYYLGMNEEHANPLREIWWDTYWWDKWYVVVTGKLPMIDSSTVTCLLPQQIMRLGVDDAMSSWQMLERVDFTYGSLKDHIMFGYIVLSISINDVFSNVLYNRKFTDYRVFSGITALDFKVVEELLTRINGIRKGFTVLKTTVVHELEKSLLDDDVFRFCIHFAYSRISCLRAIGNLLMRFKSIFHGTSSNLISDQIGECDKDILQTTVETFTFILRANDNIKIKEHVRPISEMGLNILLEAVKAPAGIDVYHISLFCGVASLFDRITCSENGAEKQYNHPGRIVLMEITCIFIFVRVCCLVYRQYKKVSKEELMAILTDFDHTTARFCNETLDIRSDLFQHIIRDKKKSDYHRDIIHGIEKVLGRDIITTIESCEREVISSDEYRQAQYMGNVATKDLDYLRYFLNLDIFPELNTDDELWDDLKEIDKYYCSSV
scaffold10_size537661 AUGUSTUS gene 2374 4626 . - 0 ID=95_g3569;Name=-;gene_id=95_g3569
scaffold10_size537661 AUGUSTUS exon 2377 4626 0.97 - 0 gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1
scaffold10_size537661 AUGUSTUS CDS 2377 4626 0.97 - 0 Name=-;gene_id=95_g3569;Parent=95_g3569;transcript_id=95_g3569.t1
scaffold10_size537661 SignalP:4.1 signal_peptide 15225 15287 . - 0 product=putative signal peptide
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Name=-;gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS gene 16555 17262 . + 0 ID=95_g3574;Name=-;gene_id=95_g3574
scaffold10_size537661 AUGUSTUS mRNA 16555 17262 . + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1;product=DUP240 protein YHL044W;locus_tag=95_g3574.1;accession=P38727;score=468;evalue=3e-167;translation=MSSELLISDSKPRPEGLRKLCEGETVILPRDITPSKCAYFLKQNIVFISYIFIHIIITIILNRLALSAHGNTLIVILAALLITISLFLLLLLPYLNCSRYKLRCLDDDCKFKLLAEVITHKPNVDLSTWDRIAYDMNQFVYDRRICADRSFFYDGSSCYQVFKKLVATPYLVNSNMNSIDADLEMRSNAATNINDSGNSSLHIELGTYIFKALAVFRNSVDKYWEDKYPEMGVTV
scaffold10_size537661 AUGUSTUS exon 16555 17259 1 + 0 gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS CDS 16555 17259 1 + 0 Name=-;gene_id=95_g3574;Parent=95_g3574;transcript_id=95_g3574.t1
scaffold10_size537661 AUGUSTUS gene 17890 19107 . + 0 ID=95_g3575;Name=ECM34;gene_id=95_g3575
scaffold10_size537661 AUGUSTUS mRNA 17890 19107 . + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1;product=Protein ECM34;locus_tag=95_g3575.1;accession=P38728;score=353;evalue=2e-120;translation=MEGRKSEDEKNEAALVCDVFESSNAKLPKNVFRSSFTWYCYEVINRSAFHIWLLLCLTLIVGWKVFSGIGGRRPSDSNMDGPQTKHKRNPGFLRRHSTIVILVISLAVSFSWEAFKMYRERTFGKQITQFAKEIIKSAPSTDMESWDRVAADFNSYMYENKLWNTEYFFFDGSSCHVAFRRTLLWISSRIDGDYKIEYFRKHPYIEEALKVYFAEVDRKWNLNTSQQLLSNISVGNIKLPGQSCRFKLFHIFKKVMKQRFSQVATVIFFVMSIRSPRNLGFFFTLALFVVLVCSQEWFSFEMNRSCSMKVEHRMQFLSTIISEHQKSDVNCWDQIAKKMNVYLFEQKVSGSDVFFLDGADCERFFERNFLRYLPSRKSSHPDLPIAELLPYIRKADIACAGKQLI
scaffold10_size537661 AUGUSTUS exon 17890 19104 0.97 + 0 gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1
scaffold10_size537661 AUGUSTUS CDS 17890 19104 0.97 + 0 Name=ECM34;gene_id=95_g3575;Parent=95_g3575;transcript_id=95_g3575.t1
import gffutils
import os
def writeGFF(gffutilsDB, outputFilename):
with open(outputFilename, 'w') as fout:
fout.write('##gff-version 3' + '\n')
for feature in gffutilsDB.all_features(order_by=('seqid','start')):
fout.write(str(feature) + '\n')
return
def gffExtract():
genomeGFF = 'test-small.gff'
genomeGFFdb = 'test-small.db'
db = gffutils.create_db(genomeGFF, dbfn=genomeGFFdb, force=True, keep_order=True, merge_strategy="warning")
writeGFF(db, "test-unchanged.gff")
def generatorModifyFeatures(featureFamilies):
for featureFamily in featureFamilies:
# featureFamily[0] is the parent (featureType "gene")
if featureFamily[0].attributes["Name"][0] == "-":
for feature in featureFamily:
print feature
yield feature
featureFamilies = list(db.iter_by_parent_childs())
db.update(generatorModifyFeatures(featureFamilies), merge_strategy="replace")
writeGFF(db, "test-updated.gff")
gffExtract()
@rmzelle
Copy link
Author

rmzelle commented Mar 8, 2016

Difference between "test-unchanged.gff" (left) and "test-updated.gff" (right):

image

@rmzelle
Copy link
Author

rmzelle commented Mar 8, 2016

Also, "test-unchanged.gff" and "test-updated.gff" turn out the same when I just use:

    def generatorModifyFeatures(featureFamilies):
        for featureFamily in featureFamilies:
            for feature in featureFamily:
                yield feature

@rmzelle
Copy link
Author

rmzelle commented Mar 8, 2016

The input file ("test-small.gff") validates according to http://genometools.org/cgi-bin/gff3validator.cgi.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment