Created
November 11, 2016 15:58
-
-
Save alexlenail/d74890f978c172cbee15975aff949753 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pdb | |
import pandas as pd | |
import os | |
import json | |
import requests | |
from argparse import ArgumentParser | |
ENRICHR_ADDLIST = 'http://amp.pharm.mssm.edu/Enrichr/addList' | |
ENRICHR_EXPORT = 'http://amp.pharm.mssm.edu/Enrichr/export' | |
DATABASES =['KEGG_2016', 'Reactome_2016', 'BioCarta_2016'] | |
# DATABASES =['KEGG_2016', 'GO_Biological_Process_2015', 'GO_Cellular_Component_2015', 'GO_Molecular_Function_2015', 'WikiPathways_2016'] | |
def main(): | |
""" | |
This script takes the output of a communities algorithm and then searches the | |
list of terms for different enriched terms using the enrichr API. | |
VERSION 2016-10-25 | |
""" | |
parser=ArgumentParser(description='Read in communities from clustering algoritithm') | |
parser.add_argument('filename',help='Input file with the cluster communities') | |
args=parser.parse_args() | |
filename=args.filename | |
get_enrichment(filename) | |
def get_enrichment(filename): | |
""" | |
Reads a file, groups the entries by cluster ID (col 1), | |
for the genes in each cluster, uploads to enrichr, | |
then downloads the enrichments for each (clustered genes X databse) | |
""" | |
print 'The file for clustering is ' + filename | |
outputDir = os.path.dirname(os.path.abspath(filename)) | |
df = pd.read_table(filename, header=None) | |
df.columns = ['cluster_number','genes'] | |
for clusterID, genes in df.groupby(['cluster_number'])['genes']: | |
genes_str = '\n'.join(genes.tolist()) | |
clusterID = 'Cluster_' + str(clusterID) | |
userListID = add_gene_list(genes_str, clusterID) | |
for database in DATABASES: | |
download_enrichr(userListID, clusterID, outputDir, database) | |
def add_gene_list(genes_str, clusterID): | |
""" Uploads a list of genes to enrichr """ | |
payload = { | |
'list': (None, genes_str), | |
'description': (None, clusterID) | |
} | |
response = requests.post(ENRICHR_ADDLIST, files=payload) | |
if not response.ok: raise Exception('Error analyzing gene list') | |
json_response = json.loads(response.text) | |
userListID = json_response['userListId'] | |
return userListID | |
def download_enrichr(userListID, clusterID, outputDir, database): | |
""" Downloads and writes to file enrichments for a list of genes given a database """ | |
query_string = '?userListId=%s&filename=%s&backgroundType=%s' | |
filename = outputDir +'/' + database + '_' + clusterID | |
url = ENRICHR_EXPORT + query_string % (userListID, filename, database) | |
print "Querying " + database + " for " + clusterID | |
response = requests.get(url, stream=True) | |
if not response.ok: | |
print "failed once, trying again: " | |
return download_enrichr(userListID, clusterID, outputDir, database) | |
with open(filename + '.txt', 'wb') as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) | |
if __name__=='__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment