Skip to content

Instantly share code, notes, and snippets.

View edraizen's full-sized avatar

Eli Draizen edraizen

View GitHub Profile
from collections import defaultdict
import requests
from bs4 import beautifulsoup
num_sequences = defaultdict(int)
with open("/Users/edraizen/Dropbox/Membrane-Proteins/data/pfam-uniprot/pfam-polytopic_201601.txt") as pfam_f:
pfam_f.next()
for line in pfam_f:
pfam = line.split()[1]
r = requests.get("http://pfam.xfam.org/family/{}#tabview=tab3".format(pfam))
soup = BeautifulSoup(r.text)