Skip to content

Instantly share code, notes, and snippets.

@epaule
Created February 27, 2017 12:23
Show Gist options
  • Save epaule/7b870a805552ddc1d933f9d014dca906 to your computer and use it in GitHub Desktop.
Save epaule/7b870a805552ddc1d933f9d014dca906 to your computer and use it in GitHub Desktop.
scriptlet to create sample meta table entries for an ensembl core database
#!/usr/bin/env python
# random_sample_chooser.py <GFF3>
import re
import sys
import random
gff=open(sys.argv[1],"r")
mRNAids = {}
for line in gff:
if not re.search('#',line):
columns = line.split("\t")
if columns[2]=='mRNA':
ifmrna=re.search("Name=(.*?)(;|$)",line)
mrna=ifmrna.group(1)
findgene=re.search("Parent=(.*?);",line)
geneid=findgene.group(1)
#look for the seq and position of the transcript
region=columns[0] + ':' + columns[3] + '-' + columns[4]
mRNAids[mrna]=[geneid,region]
sample = random.choice(mRNAids.keys())
print "meta.sample.gene_param: " + mRNAids[sample][0]
print "meta.sample.gene_text: " + mRNAids[sample][0]
print "meta.sample.location_param: " + mRNAids[sample][1]
print "meta.sample.location_text: " + mRNAids[sample][1]
print "meta.sample.search_text: ribosomal"
print "meta.sample.transcript_param: " + sample
print "meta.sample.transcript_text: " + sample
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment