Created
January 29, 2019 08:37
-
-
Save tbrittoborges/c9400b69ec7bb320f0c392238dc6a7b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# __author__ = 'tbrittoborges' | |
import subprocess | |
import tempfile | |
import sys | |
import utils | |
seqs = utils.fasta_parser('.fasta') | |
# iterate over the fasta file | |
for pid in seqs: | |
output_file = 'interproscan/{}'.format(pid) | |
# creates a temp file for a single fasta sequence | |
with tempfile.NamedTemporaryFile(delete=False) as temp: | |
temp.write(">{}\n{}".format(pid, seqs[pid])) | |
# launch the job | |
p = subprocess.Popen(["python", "/iprscan5_soappy.py", | |
"--goterms", "--pathways", | |
"--email", "[email protected]", | |
'--sequence', temp.name, "--outformat", "tsv", | |
'--outfile', output_file], | |
stdout=subprocess.PIPE) | |
for stdout in p.stdout: | |
if stdout == "FINISHED": | |
continue |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = 'tbrittoborges' | |
""" | |
Created on 11:13 19/09/2014 2014 | |
""" | |
import os | |
import csv | |
import pandas as pd | |
pd.options.display.mpl_style = "default" | |
pd.options.display.max_columns = 50 | |
#https://code.google.com/p/interproscan/wiki/InterProScan5OutputFormats | |
# Protein Accession (e.g. P51587) | |
# Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579) | |
# Sequence Length (e.g. 3418) | |
# Analysis (e.g. Pfam / PRINTS / Gene3D) | |
# Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140) | |
# Signature Description (e.g. BRCA2 repeat profile) | |
# Start location | |
# Stop location | |
# Score - is the e-value of the match reported by member database method | |
# (e.g. 3.1E-52) | |
# Status - is the status of the match (T: true) | |
# Date - is the date of the run | |
# (InterPro annotations - accession (e.g. IPR002093) - optional column; | |
# only displayed if -iprscan option is switched on) | |
# (InterPro annotations - description (e.g. BRCA2 repeat) - optional column; | |
# only displayed if -iprscan option is switched on) | |
# (GO annotations (e.g. GO:0005515) - optional column; | |
# only displayed if --goterms option is switched on) | |
# (Pathways annotations (e.g. REACT_71) - optional column; | |
# only displayed if --pathways option is switched on) | |
interproscan_path = "path/to/results/" | |
f_list = [] | |
names = ['pid', 'md5', 'len', 'analysis', 'signature', 'description', 'start', | |
'end', 'score', 'status', 'date', 'ips_id', 'ips_des', 'go', 'pathway'] | |
for f_name in os.listdir(interproscan_path): | |
if f_name.endswith(".txt") and not f_name.startswith('.'): | |
with open(interproscan_path + f_name) as f_handle: | |
f_list.extend(list(csv.reader( | |
open(interproscan_path + f_name), delimiter='\t'))) | |
df = pd.DataFrame(f_list, columns=names) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment