Last active
December 12, 2015 00:59
-
-
Save hannes-brt/4688151 to your computer and use it in GitHub Desktop.
Readbase API example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Map Bodymap STAR" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import urllib2\n", | |
"import subprocess\n", | |
"import os\n", | |
"import json" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Query the database for Bodymap 2.0 files (`study_id=ERP000546`) that are single end (`library_layout=single`)\n", | |
"Read all of the results (`.read()`), and split on newlines (the results are returned one uid by line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"uids = urllib2.urlopen('http://128.100.241.98/readbase/query?study_id=ERP000546&library_layout=single').read().split('\\n')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Now request the JSON metadata for all the experiments. The uids are given as a comma-separated list in the URL and the result is automatically parsed by the JSON module." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"metadata_json = json.load(urllib2.urlopen('http://128.100.241.98/readbase/get-json?uid=' + ','.join(uids)))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 19 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Here I extract the run ids for all runs and all experiments (an experiment can have more than one run, so we need to iterate over all runs for the experiment)." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"run_ids = [run['run_id'] for experiment in metadata_json for run in experiment['runs']]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 29 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Load the genome into shared memory for the STAR mapping algorithm." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"subprocess.check_output(['/home/hannes/usr/bin/STAR', '--genomeDir', '/data/hg19/', '--genomeLoad', 'LoadAndExit'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 33, | |
"text": [ | |
"'Jan 31 17:14:32 ..... Started STAR run\\n'" | |
] | |
} | |
], | |
"prompt_number": 33 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Parameter for STAR\n", | |
"star_parameters = [\n", | |
" '--genomeDir', '/data/hg19',\n", | |
" '--runThreadN', '32']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 36 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Run STAR for each file in a new directory\n", | |
"for run_id in run_ids:\n", | |
" os.chdir('/data/Bodymap/STAR_maps/')\n", | |
" if not os.path.exists(run_id): os.mkdir(run_id)\n", | |
" os.chdir(run_id)\n", | |
" print run_id\n", | |
" path = '/data/Bodymap/ERP000546_fastq/' + run_id + '.fastq'\n", | |
" subprocess.check_output(['/home/hannes/usr/bin/STAR'] + star_parameters + ['--readFilesIn'] + [path])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"ERR030865\n", | |
"ERR030861" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030864" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030902" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030871" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030857" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030903" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030888" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030898" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030863" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030900" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030895" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030866" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030856" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030858" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030896" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030860" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030870" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030862" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030893" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030899" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030868" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030894" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030890" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030869" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030889" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030891" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030867" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030859" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030897" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030901" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"ERR030892" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 34 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Remove the index from the shared memory\n", | |
"os.chdir('/data/Bodymap/STAR_maps/')\n", | |
"subprocess.check_output(['/home/hannes/usr/bin/STAR', '--genomeDir', '/data/hg19/', '--genomeLoad', 'Remove'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 35, | |
"text": [ | |
"'Jan 31 19:12:47 ..... Started STAR run\\n'" | |
] | |
} | |
], | |
"prompt_number": 35 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment