Skip to content

Instantly share code, notes, and snippets.

@monk1337
Created April 7, 2021 06:43
Show Gist options
  • Save monk1337/547cc3e862f41adc3bfc7b93f7433a4a to your computer and use it in GitHub Desktop.
Save monk1337/547cc3e862f41adc3bfc7b93f7433a4a to your computer and use it in GitHub Desktop.
from metapub import PubMedFetcher
import pickle as pk
from tqdm import tqdm
import uuid
fetch = PubMedFetcher()
import os
os.environ["NCBI_API_KEYY"] = "abcd"
final_data = {}
def get_keywords(keyword, max_n):
# get the first 1000 pmids matching "breast neoplasm" keyword search
pmids = fetch.pmids_for_query(keyword, retmax=max_n)
# get abstract for each article:
abstracts = {}
for pmid in pmids:
abstracts[pmid] = fetch.article_by_pmid(pmid).abstract
return abstracts
def get_abstracts(abs_limit):
with open('topics.pk','rb') as f:
ks = pk.load(f)
all_keywords = list(set(ks))
result = {}
for i in tqdm(all_keywords):
try:
temp_result = {}
keywrs = get_keywords(i, abs_limit)
temp_result[i] = keywrs
result[i] = keywrs
with open('./data/' + str(uuid.uuid4()) + '.pk', 'wb') as f:
pk.dump(temp_result,f)
except Exception as e:
print(e)
with open('./data/all_data_in_one_' + str(i) + '.pk', 'wb') as f:
pk.dump(result,f)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment