Created
September 25, 2023 23:40
-
-
Save corneliusroemer/5f294dbe753ef611c9800465b484d357 to your computer and use it in GitHub Desktop.
Automatically generate faculty profiles using pubmed abstracts and GPT-4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from math import e | |
from Bio import Entrez | |
import openai | |
def search_latest_papers(faculty_name, source="Pubmed", num_papers=5): | |
abstracts = [] | |
if source == "Pubmed": | |
Entrez.email = "[email protected]" | |
handle = Entrez.esearch( | |
db="pubmed", | |
term=f"{faculty_name}[Author]", | |
retmax=num_papers, | |
sort="relevance", | |
retmode="xml", | |
) | |
results = Entrez.read(handle) | |
handle.close() | |
for paper_id in results["IdList"]: | |
handle = Entrez.efetch(db="pubmed", id=paper_id, retmode="xml") | |
papers = Entrez.read(handle) | |
handle.close() | |
try: | |
abstract = papers["PubmedArticle"][0]["MedlineCitation"]["Article"][ | |
"Abstract" | |
]["AbstractText"][0] | |
abstracts.append(abstract) | |
except (IndexError, KeyError): | |
pass | |
return abstracts | |
def generate_summary(abstracts, name, api_key): | |
""" | |
Use the OpenAI API to generate a summary for the given abstract. | |
:param abstract: Abstract of the paper. | |
:return: Summarized abstract. | |
""" | |
openai.api_key = api_key | |
# completion = openai.ChatCompletion.create( | |
modeltype = "chat" | |
if modeltype == "completion": | |
completion = openai.Completion.create( | |
prompt=f"You are the head of a university department. Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Please start now:", | |
model="gpt-3.5-turbo-instruct", | |
max_tokens=500, | |
# prompt=[ | |
# { | |
# "role": "system", | |
# "content": "You are the head of a university department", | |
# }, | |
# { | |
# "role": "user", | |
# "content": f"Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore.", | |
# }, | |
# ], | |
# max_tokens=500, | |
) | |
else: | |
completion = openai.ChatCompletion.create( | |
model="gpt-4", | |
messages=[ | |
{ | |
"role": "system", | |
"content": "You are the head of a university department. If you don't know something, don't guess. Just say you don't know. You tend to be conservative, you don't exaggerate. You are not a salesman. You write succinctly.", | |
}, | |
{ | |
"role": "user", | |
"content": f"Please produce a faculty profile of around 100 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. Just refer to them by name. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Don't go into too much detail as research areas often change. Everyone knows that this is a faculty profile, definitely do no say things like '{name} is a faculty member'. Start with '{name} specializes in' or '{name} is an expert in'.", | |
}, | |
], | |
max_tokens=500, | |
) | |
return completion | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Fetch the latest papers' abstracts for a faculty from PubMed." | |
) | |
parser.add_argument("faculty_name", help="Name of the faculty member.") | |
parser.add_argument( | |
"--source", | |
choices=["Pubmed"], | |
default="Pubmed", | |
help="Source to fetch papers from. Currently supports only 'Pubmed'.", | |
) | |
parser.add_argument( | |
"--num-papers", | |
type=int, | |
default=5, | |
help="Number of papers to retrieve. Default is 5.", | |
) | |
parser.add_argument("--api-key", help="OpenAI API key.") | |
parser.add_argument("--verbose", action="store_true", help="Print verbose output.") | |
args = parser.parse_args() | |
abstracts = search_latest_papers(args.faculty_name, args.source, args.num_papers) | |
if args.verbose: | |
for idx, abstract in enumerate(abstracts, 1): | |
print(f"Abstract {idx}: {abstract}\n") | |
summary = generate_summary(abstracts, args.faculty_name, args.api_key) | |
# Extract the summary from the completion. | |
extraction = summary["choices"][0]["message"]["content"] | |
# Pretty print the summary. | |
print(extraction) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment