Skip to content

Instantly share code, notes, and snippets.

@alexlenail
Created November 11, 2016 15:58
Show Gist options
  • Save alexlenail/d74890f978c172cbee15975aff949753 to your computer and use it in GitHub Desktop.
Save alexlenail/d74890f978c172cbee15975aff949753 to your computer and use it in GitHub Desktop.
import pdb
import pandas as pd
import os
import json
import requests
from argparse import ArgumentParser
ENRICHR_ADDLIST = 'http://amp.pharm.mssm.edu/Enrichr/addList'
ENRICHR_EXPORT = 'http://amp.pharm.mssm.edu/Enrichr/export'
DATABASES =['KEGG_2016', 'Reactome_2016', 'BioCarta_2016']
# DATABASES =['KEGG_2016', 'GO_Biological_Process_2015', 'GO_Cellular_Component_2015', 'GO_Molecular_Function_2015', 'WikiPathways_2016']
def main():
"""
This script takes the output of a communities algorithm and then searches the
list of terms for different enriched terms using the enrichr API.
VERSION 2016-10-25
"""
parser=ArgumentParser(description='Read in communities from clustering algoritithm')
parser.add_argument('filename',help='Input file with the cluster communities')
args=parser.parse_args()
filename=args.filename
get_enrichment(filename)
def get_enrichment(filename):
"""
Reads a file, groups the entries by cluster ID (col 1),
for the genes in each cluster, uploads to enrichr,
then downloads the enrichments for each (clustered genes X databse)
"""
print 'The file for clustering is ' + filename
outputDir = os.path.dirname(os.path.abspath(filename))
df = pd.read_table(filename, header=None)
df.columns = ['cluster_number','genes']
for clusterID, genes in df.groupby(['cluster_number'])['genes']:
genes_str = '\n'.join(genes.tolist())
clusterID = 'Cluster_' + str(clusterID)
userListID = add_gene_list(genes_str, clusterID)
for database in DATABASES:
download_enrichr(userListID, clusterID, outputDir, database)
def add_gene_list(genes_str, clusterID):
""" Uploads a list of genes to enrichr """
payload = {
'list': (None, genes_str),
'description': (None, clusterID)
}
response = requests.post(ENRICHR_ADDLIST, files=payload)
if not response.ok: raise Exception('Error analyzing gene list')
json_response = json.loads(response.text)
userListID = json_response['userListId']
return userListID
def download_enrichr(userListID, clusterID, outputDir, database):
""" Downloads and writes to file enrichments for a list of genes given a database """
query_string = '?userListId=%s&filename=%s&backgroundType=%s'
filename = outputDir +'/' + database + '_' + clusterID
url = ENRICHR_EXPORT + query_string % (userListID, filename, database)
print "Querying " + database + " for " + clusterID
response = requests.get(url, stream=True)
if not response.ok:
print "failed once, trying again: "
return download_enrichr(userListID, clusterID, outputDir, database)
with open(filename + '.txt', 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment