Use inside notebook (uses !
syntax):
import json
import pandas as pd
def get_gene_length(gene):
gene_info = \
!curl -s "http://grch37.rest.ensembl.org/xrefs/symbol/homo_sapiens/"{gene} -H 'Content-type:application/json' | \
jq '.[0].id' | tr -d '"' | \
xargs -I GENEID curl -s "http://grch37.rest.ensembl.org/lookup/id/"GENEID -H 'Content-type:application/json'
d = json.loads(gene_info[0])
return max(d['start'], d['end']) - min(d['start'], d['end'])
gene_lengths = pd.Series({gene: get_gene_length(gene) for gene in df.reset_index().GENE.unique()})