htlin222 · September 2, 2023 13:29 · htlin222 · Sep 2, 2023
diff --git a/zotero_csv_to_marp_slide.py b/zotero_csv_to_marp_slide.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # title: getab
 # date: "2023-09-02"
 # author: Hsieh-Ting Lin, the Lizard 🦎
 import re
 import subprocess
 import sys

 import openai
 import pandas as pd
 import requests
 from Bio import Entrez

 def respond(prompt):
    openai.api_key = "YOUR_API_KEY"
    completions = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        max_tokens=1000,
        n=1,
        stop=None,
        temperature=0.5,
    )

    message = completions.choices[0].text
    return message


 def doi_to_pmid(doi):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {"db": "pubmed", "term": f"{doi}[DOI]", "retmode": "json"}

    response = requests.get(base_url, params=params)
    response_json = response.json()

    if "esearchresult" in response_json and "idlist" in response_json[
            "esearchresult"]:
        pmids = response_json["esearchresult"]["idlist"]
        if pmids:
            # Return the first PMID if there are multiple matches (which is rare)
            return pmids[0]
    return None


 def get_abstract_from_pmid(pmid):
    Entrez.email = "[email protected]"
    handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml")
    records = Entrez.read(handle)
    try:
        abstract = records["PubmedArticle"][0]["MedlineCitation"]["Article"][
            "Abstract"]["AbstractText"][0]
    except (KeyError, IndexError):
        abstract = "Abstract not available"

    return abstract


 def get_abstract(doi):
    pmid = doi_to_pmid(doi)
    print(pmid)
    if pmid:
        return get_abstract_from_pmid(pmid)
    else:
        print("No DOI Found")
        return None


 def clean_text(string):
    # Split the string by lines
    lines = string.split("\n")

    # Filter out lines that don't start with "- "
    filtered_lines = [line for line in lines if line.startswith("- ")]

    # Add "\n\n---\n\n" separator every 3 bullet points
    grouped_lines = []
    for i in range(0, len(filtered_lines), 3):
        grouped_lines.extend(filtered_lines[i:i + 3])
        if i + 3 < len(filtered_lines):
            grouped_lines.append("\n\n---\n\n")

    # Join the lines back into a string and return
    return "\n".join(grouped_lines)


 def extract_plain_text_line_by_line(string: str) -> str:
    # Split by closing tags
    lines = re.split(r"<\/[^>]+>", string)

    # Remove all other HTML tags and strip each line
    lines = [
        re.sub(r"<[^>]+>", "", line).strip() for line in lines
        if re.sub(r"<[^>]+>", "", line).strip()
    ]

    combined = "- " + "\n- ".join(lines)
    combined = clean_text(combined)
    return combined


 if __name__ == "__main__":
    # Check if the correct number of arguments are passed
    if len(sys.argv) != 3:
        print("Usage: script_name.py -f filename.csv")
        sys.exit(1)

    flag, filename = sys.argv[1], sys.argv[2]

    # Check if the correct flag is used
    if flag != "-f":
        print("Usage: script_name.py -f filename.csv")
        sys.exit(1)

    # Read the CSV
    df = pd.read_csv(filename)
    doi_list = df["DOI"].dropna().unique().tolist()
    markdown_content = "# Title Page\n\n---\n\n"

    for doi in doi_list:
        # Fetch the title for the DOI from the DataFrame
        title = df[df["DOI"] == doi]["Title"].iloc[0]
        note = df[df["DOI"] == doi]["Notes"].iloc[0]
        if isinstance(note, str) and len(note) > 0:
            cleaned_note = (f"## {title}\n\n### Hightlights\n\n" +
                            extract_plain_text_line_by_line(note) +
                            "\n\n---\n\n")
        else:
            cleaned_note = ""
        print(title)
        abstract = get_abstract(doi)  # Get the abstract using the DOI
        response = respond(
            f"transferred to bullet points in markdown, like '- content\n- content' here are the context:\n{abstract}"
        )
        lines = [line for line in response.splitlines() if line.strip()]
        bullet_points = "\n".join(lines)
        bullet_points = clean_text(bullet_points)
        # Fetch the publication year for the DOI
        year = df[df["DOI"] == doi]["Publication Year"].iloc[0]

        # Format the information in the desired markdown structure
        markdown_content += f"{cleaned_note}## {title}\n\n### Summary\n\n{bullet_points}\n\n<!-- {abstract} -->\n\n> {title} ({year}). DOI: {doi}\n\n---\n\n"

    markdown_content += "\n\n## Thank You for Your Listening\n"
    # Export the markdown content to a .md file
    with open("output.md", "w") as file:
        file.write(markdown_content)
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	# title: getab
	# date: "2023-09-02"
	# author: Hsieh-Ting Lin, the Lizard 🦎
	import re
	import subprocess
	import sys

	import openai
	import pandas as pd
	import requests
	from Bio import Entrez

	def respond(prompt):
	openai.api_key = "YOUR_API_KEY"
	completions = openai.Completion.create(
	engine="text-davinci-002",
	prompt=prompt,
	max_tokens=1000,
	n=1,
	stop=None,
	temperature=0.5,
	)

	message = completions.choices[0].text
	return message


	def doi_to_pmid(doi):
	base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
	params = {"db": "pubmed", "term": f"{doi}[DOI]", "retmode": "json"}

	response = requests.get(base_url, params=params)
	response_json = response.json()

	if "esearchresult" in response_json and "idlist" in response_json[
	"esearchresult"]:
	pmids = response_json["esearchresult"]["idlist"]
	if pmids:
	# Return the first PMID if there are multiple matches (which is rare)
	return pmids[0]
	return None


	def get_abstract_from_pmid(pmid):
	Entrez.email = "[email protected]"
	handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml")
	records = Entrez.read(handle)
	try:
	abstract = records["PubmedArticle"][0]["MedlineCitation"]["Article"][
	"Abstract"]["AbstractText"][0]
	except (KeyError, IndexError):
	abstract = "Abstract not available"

	return abstract


	def get_abstract(doi):
	pmid = doi_to_pmid(doi)
	print(pmid)
	if pmid:
	return get_abstract_from_pmid(pmid)
	else:
	print("No DOI Found")
	return None


	def clean_text(string):
	# Split the string by lines
	lines = string.split("\n")

	# Filter out lines that don't start with "- "
	filtered_lines = [line for line in lines if line.startswith("- ")]

	# Add "\n\n---\n\n" separator every 3 bullet points
	grouped_lines = []
	for i in range(0, len(filtered_lines), 3):
	grouped_lines.extend(filtered_lines[i:i + 3])
	if i + 3 < len(filtered_lines):
	grouped_lines.append("\n\n---\n\n")

	# Join the lines back into a string and return
	return "\n".join(grouped_lines)


	def extract_plain_text_line_by_line(string: str) -> str:
	# Split by closing tags
	lines = re.split(r"<\/[^>]+>", string)

	# Remove all other HTML tags and strip each line
	lines = [
	re.sub(r"<[^>]+>", "", line).strip() for line in lines
	if re.sub(r"<[^>]+>", "", line).strip()
	]

	combined = "- " + "\n- ".join(lines)
	combined = clean_text(combined)
	return combined


	if __name__ == "__main__":
	# Check if the correct number of arguments are passed
	if len(sys.argv) != 3:
	print("Usage: script_name.py -f filename.csv")
	sys.exit(1)

	flag, filename = sys.argv[1], sys.argv[2]

	# Check if the correct flag is used
	if flag != "-f":
	print("Usage: script_name.py -f filename.csv")
	sys.exit(1)

	# Read the CSV
	df = pd.read_csv(filename)
	doi_list = df["DOI"].dropna().unique().tolist()
	markdown_content = "# Title Page\n\n---\n\n"

	for doi in doi_list:
	# Fetch the title for the DOI from the DataFrame
	title = df[df["DOI"] == doi]["Title"].iloc[0]
	note = df[df["DOI"] == doi]["Notes"].iloc[0]
	if isinstance(note, str) and len(note) > 0:
	cleaned_note = (f"## {title}\n\n### Hightlights\n\n" +
	extract_plain_text_line_by_line(note) +
	"\n\n---\n\n")
	else:
	cleaned_note = ""
	print(title)
	abstract = get_abstract(doi) # Get the abstract using the DOI
	response = respond(
	f"transferred to bullet points in markdown, like '- content\n- content' here are the context:\n{abstract}"
	)
	lines = [line for line in response.splitlines() if line.strip()]
	bullet_points = "\n".join(lines)
	bullet_points = clean_text(bullet_points)
	# Fetch the publication year for the DOI
	year = df[df["DOI"] == doi]["Publication Year"].iloc[0]

	# Format the information in the desired markdown structure
	markdown_content += f"{cleaned_note}## {title}\n\n### Summary\n\n{bullet_points}\n\n<!-- {abstract} -->\n\n> {title} ({year}). DOI: {doi}\n\n---\n\n"

	markdown_content += "\n\n## Thank You for Your Listening\n"
	# Export the markdown content to a .md file
	with open("output.md", "w") as file:
	file.write(markdown_content)