Last active
August 4, 2023 13:48
-
-
Save victormurcia/22a5aabd07892fee06529ec4c55d3516 to your computer and use it in GitHub Desktop.
Get information for a given term using Pubmed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import Entrez | |
from Bio.Medline import parse | |
from io import StringIO | |
import pandas as pd | |
def fetch_pubmed_data(search_term, email, retmax=100): | |
""" | |
Fetches data from PubMed related to a specific search term. | |
Parameters: | |
search_term (str): The term to search for in the PubMed database. | |
email (str): The email address to be used for accessing PubMed's API. | |
retmax (int, optional): The maximum number of results to retrieve. Defaults to 100. | |
Returns: | |
pandas.DataFrame: A DataFrame containing the details of the PubMed entries, including | |
PMID, Title, Authors, Abstract, Publication Date, Journal, Volume, | |
Issue, Pages, Affiliation, Article ID, E-Publication Date, Place of | |
Publication, Journal Abbreviation, Language, Publication Type, and MeSH Terms. | |
""" | |
Entrez.email = email | |
handle = Entrez.esearch(db="pubmed", term=search_term, retmax=retmax) | |
record = Entrez.read(handle) | |
handle.close() | |
idlist = record["IdList"] | |
handle = Entrez.efetch(db="pubmed", id=idlist, rettype="medline", retmode="text") | |
records = handle.read() | |
handle.close() | |
records = parse(StringIO(records)) | |
columns = ["PMID", "Title", "Authors", "Abstract", "Publication Date", "Journal", "Volume", "Issue", "Pages", "Affiliation", "Article ID", "E-Publication Date", "Place of Publication", "Journal Abbreviation", "Language", "Publication Type", "MeSH Terms"] | |
df = pd.DataFrame(columns=columns) | |
for record in records: | |
new_row = { | |
"PMID": record.get("PMID", "N/A"), | |
"Title": record.get("TI", "N/A"), | |
"Authors": ", ".join(record.get("AU", ["N/A"])), | |
"Abstract": record.get("AB", "N/A"), | |
"Publication Date": record.get("DP", "N/A"), | |
"Journal": record.get("JT", "N/A"), | |
"Volume": record.get("VI", "N/A"), | |
"Issue": record.get("IP", "N/A"), | |
"Pages": record.get("PG", "N/A"), | |
"Affiliation": record.get("AD", "N/A"), | |
"Article ID": ", ".join(record.get("AID", ["N/A"])), | |
"E-Publication Date": record.get("DEP", "N/A"), | |
"Place of Publication": record.get("PL", "N/A"), | |
"Journal Abbreviation": record.get("TA", "N/A"), | |
"Language": ", ".join(record.get("LA", ["N/A"])), | |
"Publication Type": ", ".join(record.get("PT", ["N/A"])), | |
"MeSH Terms": ", ".join(record.get("MH", ["N/A"])), | |
} | |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) | |
return df | |
# Example usage | |
search_term = "Chronic Inflammatory Demyelinating Polyneuropathy (CIDP)" | |
email = "[email protected]" | |
df = fetch_pubmed_data(search_term, email) | |
print(df.head()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment