Skip to content

Instantly share code, notes, and snippets.

@tbrittoborges
Created January 29, 2019 08:37
Show Gist options
  • Save tbrittoborges/c9400b69ec7bb320f0c392238dc6a7b0 to your computer and use it in GitHub Desktop.
Save tbrittoborges/c9400b69ec7bb320f0c392238dc6a7b0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# __author__ = 'tbrittoborges'
import subprocess
import tempfile
import sys
import utils
seqs = utils.fasta_parser('.fasta')
# iterate over the fasta file
for pid in seqs:
output_file = 'interproscan/{}'.format(pid)
# creates a temp file for a single fasta sequence
with tempfile.NamedTemporaryFile(delete=False) as temp:
temp.write(">{}\n{}".format(pid, seqs[pid]))
# launch the job
p = subprocess.Popen(["python", "/iprscan5_soappy.py",
"--goterms", "--pathways",
"--email", "[email protected]",
'--sequence', temp.name, "--outformat", "tsv",
'--outfile', output_file],
stdout=subprocess.PIPE)
for stdout in p.stdout:
if stdout == "FINISHED":
continue
#!/usr/bin/env python
__author__ = 'tbrittoborges'
"""
Created on 11:13 19/09/2014 2014
"""
import os
import csv
import pandas as pd
pd.options.display.mpl_style = "default"
pd.options.display.max_columns = 50
#https://code.google.com/p/interproscan/wiki/InterProScan5OutputFormats
# Protein Accession (e.g. P51587)
# Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579)
# Sequence Length (e.g. 3418)
# Analysis (e.g. Pfam / PRINTS / Gene3D)
# Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140)
# Signature Description (e.g. BRCA2 repeat profile)
# Start location
# Stop location
# Score - is the e-value of the match reported by member database method
# (e.g. 3.1E-52)
# Status - is the status of the match (T: true)
# Date - is the date of the run
# (InterPro annotations - accession (e.g. IPR002093) - optional column;
# only displayed if -iprscan option is switched on)
# (InterPro annotations - description (e.g. BRCA2 repeat) - optional column;
# only displayed if -iprscan option is switched on)
# (GO annotations (e.g. GO:0005515) - optional column;
# only displayed if --goterms option is switched on)
# (Pathways annotations (e.g. REACT_71) - optional column;
# only displayed if --pathways option is switched on)
interproscan_path = "path/to/results/"
f_list = []
names = ['pid', 'md5', 'len', 'analysis', 'signature', 'description', 'start',
'end', 'score', 'status', 'date', 'ips_id', 'ips_des', 'go', 'pathway']
for f_name in os.listdir(interproscan_path):
if f_name.endswith(".txt") and not f_name.startswith('.'):
with open(interproscan_path + f_name) as f_handle:
f_list.extend(list(csv.reader(
open(interproscan_path + f_name), delimiter='\t')))
df = pd.DataFrame(f_list, columns=names)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment