corneliusroemer · September 26, 2023 02:24
diff --git a/auto-faculty-profile.py b/auto-faculty-profile.py
 import argparse
 from math import e
 from Bio import Entrez
 import openai

 def search_latest_papers(faculty_name, source="Pubmed", num_papers=5):
    papers_info = []

    if source == "Pubmed":
        Entrez.email = "[email protected]"
        handle = Entrez.esearch(
            db="pubmed",
            term=f"{faculty_name}[Author]",
            retmax=num_papers,
            sort="relevance",
            retmode="xml",
        )
        results = Entrez.read(handle)
        handle.close()

        for paper_id in results["IdList"]:
            handle = Entrez.efetch(db="pubmed", id=paper_id, retmode="xml")
            papers = Entrez.read(handle)
            handle.close()
            try:
                abstract = papers["PubmedArticle"][0]["MedlineCitation"]["Article"]["Abstract"]["AbstractText"][0]
                authors = papers["PubmedArticle"][0]["MedlineCitation"]["Article"]["AuthorList"]
                author_names = [f"{author['LastName']}" for author in authors]
                papers_info.append({'abstract': abstract, 'authors': author_names})
            except (IndexError, KeyError):
                pass

    return papers_info


 def generate_summary(abstracts, name, api_key, model):
    """
    Use the OpenAI API to generate a summary for the given abstract.

    :param abstract: Abstract of the paper.
    :return: Summarized abstract.
    """
    openai.api_key = api_key
    # completion = openai.ChatCompletion.create(
    if model == "gpt-3.5-turbo-instruct":
        completion = openai.Completion.create(
            prompt=f"You are the head of a university department. Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Please start now:",
            model="gpt-3.5-turbo-instruct",
            max_tokens=500,
        )
    else:
        completion = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {
                    "role": "system",
                    "content": "You are the head of a university department. If you don't know something, don't guess. Just say you don't know. You tend to be conservative, you don't exaggerate. You are not a salesman. You write succinctly.",
                },
                {
                    "role": "user",
                    "content": f"Please produce a faculty profile of around 100 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. Just refer to them by name. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Don't go into too much detail as research areas often change. Everyone knows that this is a faculty profile, definitely do no say things like '{name} is a faculty member'. Start with '{name} specializes in' or '{name} is an expert in'.",
                },
            ],
            max_tokens=500,
        )
    return completion


 def main():
    parser = argparse.ArgumentParser(
        description="Fetch the latest papers' abstracts for a faculty from PubMed."
    )
    parser.add_argument("first", help="First Name of the faculty member.")
    parser.add_argument("last", help="Last Name of the faculty member.")
    parser.add_argument(
        "--source",
        choices=["Pubmed"],
        default="Pubmed",
        help="Source to fetch papers from. Currently supports only 'Pubmed'.",
    )
    parser.add_argument(
        "--num-papers",
        type=int,
        default=5,
        help="Number of papers to retrieve. Default is 5.",
    )
    parser.add_argument("--api-key", help="OpenAI API key.")
    parser.add_argument("--verbose", action="store_true", help="Print verbose output.")
    parser.add_argument("--model", help="OpenAI model to use.", default="gpt-3.5-turbo")
    parser.add_argument("--relevant-first", help="Number of first author positions that are relevant.", default=2)
    parser.add_argument("--relevant-last", help="Number of last author positions that are relevant.", default=2)

    args = parser.parse_args()

    abstracts = search_latest_papers(f"{args.first} {args.last}", args.source, args.num_papers)

    # Filter out abstracts where faculty is not among first 2 or last 2 authors.
    relevant_abstracts = []
    for abstract in abstracts:
        if args.last in abstract['authors'][:args.relevant_first] or args.last in abstract['authors'][-args.relevant_last:]:
            relevant_abstracts.append(abstract['abstract'])
            if args.verbose:
                print(f"Relevant abstract: {abstract['abstract']}\n")
        else:
            if args.verbose:
                print(f"Ignoring abstract: {abstract['abstract']}\n")

    # if args.verbose:
    #     for idx, abstract in enumerate(abstracts, 1):
    #         print(f"Abstract {idx}: {abstract}\n")

    summary = generate_summary(relevant_abstracts, f"{args.first} {args.last}", args.api_key, model=args.model)

    # Extract the summary from the completion.

    extraction = summary["choices"][0]["message"]["content"]

    # Pretty print the summary.
    print(extraction)


 if __name__ == "__main__":
    main()
	import argparse
	from math import e
	from Bio import Entrez
	import openai

	def search_latest_papers(faculty_name, source="Pubmed", num_papers=5):
	papers_info = []

	if source == "Pubmed":
	Entrez.email = "[email protected]"
	handle = Entrez.esearch(
	db="pubmed",
	term=f"{faculty_name}[Author]",
	retmax=num_papers,
	sort="relevance",
	retmode="xml",
	)
	results = Entrez.read(handle)
	handle.close()

	for paper_id in results["IdList"]:
	handle = Entrez.efetch(db="pubmed", id=paper_id, retmode="xml")
	papers = Entrez.read(handle)
	handle.close()
	try:
	abstract = papers["PubmedArticle"][0]["MedlineCitation"]["Article"]["Abstract"]["AbstractText"][0]
	authors = papers["PubmedArticle"][0]["MedlineCitation"]["Article"]["AuthorList"]
	author_names = [f"{author['LastName']}" for author in authors]
	papers_info.append({'abstract': abstract, 'authors': author_names})
	except (IndexError, KeyError):
	pass

	return papers_info


	def generate_summary(abstracts, name, api_key, model):
	"""
	Use the OpenAI API to generate a summary for the given abstract.

	:param abstract: Abstract of the paper.
	:return: Summarized abstract.
	"""
	openai.api_key = api_key
	# completion = openai.ChatCompletion.create(
	if model == "gpt-3.5-turbo-instruct":
	completion = openai.Completion.create(
	prompt=f"You are the head of a university department. Please produce a faculty profile of around 150 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Please start now:",
	model="gpt-3.5-turbo-instruct",
	max_tokens=500,
	)
	else:
	completion = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{
	"role": "system",
	"content": "You are the head of a university department. If you don't know something, don't guess. Just say you don't know. You tend to be conservative, you don't exaggerate. You are not a salesman. You write succinctly.",
	},
	{
	"role": "user",
	"content": f"Please produce a faculty profile of around 100 words for {name}. It shouldn't be too technical. Useful for general educated audience. These are some recent abstracts {abstracts}. Only use these abstracts for background. It's not important what the exact research is. More about the field and general topics. Make sure you don't assume someone is a Dr. or Professor. Just refer to them by name. And don't be too positive. Understatement is better, be very conservative in calling someone 'prominent'. If a paper doesn't fit the others, assume it's a different researcher and ignore. Don't go into too much detail as research areas often change. Everyone knows that this is a faculty profile, definitely do no say things like '{name} is a faculty member'. Start with '{name} specializes in' or '{name} is an expert in'.",
	},
	],
	max_tokens=500,
	)
	return completion


	def main():
	parser = argparse.ArgumentParser(
	description="Fetch the latest papers' abstracts for a faculty from PubMed."
	)
	parser.add_argument("first", help="First Name of the faculty member.")
	parser.add_argument("last", help="Last Name of the faculty member.")
	parser.add_argument(
	"--source",
	choices=["Pubmed"],
	default="Pubmed",
	help="Source to fetch papers from. Currently supports only 'Pubmed'.",
	)
	parser.add_argument(
	"--num-papers",
	type=int,
	default=5,
	help="Number of papers to retrieve. Default is 5.",
	)
	parser.add_argument("--api-key", help="OpenAI API key.")
	parser.add_argument("--verbose", action="store_true", help="Print verbose output.")
	parser.add_argument("--model", help="OpenAI model to use.", default="gpt-3.5-turbo")
	parser.add_argument("--relevant-first", help="Number of first author positions that are relevant.", default=2)
	parser.add_argument("--relevant-last", help="Number of last author positions that are relevant.", default=2)

	args = parser.parse_args()

	abstracts = search_latest_papers(f"{args.first} {args.last}", args.source, args.num_papers)

	# Filter out abstracts where faculty is not among first 2 or last 2 authors.
	relevant_abstracts = []
	for abstract in abstracts:
	if args.last in abstract['authors'][:args.relevant_first] or args.last in abstract['authors'][-args.relevant_last:]:
	relevant_abstracts.append(abstract['abstract'])
	if args.verbose:
	print(f"Relevant abstract: {abstract['abstract']}\n")
	else:
	if args.verbose:
	print(f"Ignoring abstract: {abstract['abstract']}\n")

	# if args.verbose:
	# for idx, abstract in enumerate(abstracts, 1):
	# print(f"Abstract {idx}: {abstract}\n")

	summary = generate_summary(relevant_abstracts, f"{args.first} {args.last}", args.api_key, model=args.model)

	# Extract the summary from the completion.

	extraction = summary["choices"][0]["message"]["content"]

	# Pretty print the summary.
	print(extraction)


	if __name__ == "__main__":
	main()