Created
July 24, 2025 15:21
-
-
Save libcrack/972c30f9296f4cc5399c58ad6b026ea6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from dotenv import load_dotenv | |
from langchain.document_loaders import PyPDFLoader | |
#from langchain.document_loaders import PDFMinerLoader | |
#from langchain.document_loaders import Docx2txtLoader | |
from langchain.llms import OpenAI | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.prompts import PromptTemplate | |
import textwrap | |
import os | |
try: | |
LLM_KEY = os.environ.get("OPENAI_API_KEY") | |
except Exception as e: | |
print(f"ERROR: Cannot read OpenAI API key from the environment") | |
raise(e) | |
filenames = [ | |
"AMD Technical Update - Speculative Return Stack Overflow.pdf", | |
"AMD Technical Guidance for Mitigating BTC (Branch Type Confusion).pdf", | |
] | |
filename = f"{os.environ.get('HOME')}/Downloads/{filenames[0]}" | |
print(f"Reading file {filename}") | |
text = "" | |
#loader = Docx2txtLoader(filename) | |
#loader = PDFMinerLoader(filename) | |
#loader = PDFMinerLoader(file_path=filename, | |
# extract_images=False, | |
# concatenate_pages=True) | |
loader = PyPDFLoader(filename) | |
text = loader.load_and_split() | |
# Define prompt | |
prompt_template = """Write a concise summary of the following: | |
"{text}" | |
CONCISE SUMMARY:""" | |
prompt_template = PromptTemplate(template=prompt_template, | |
input_variables=["text"]) | |
# Define the LLM - here we are using OpenAI's ChatGPT | |
llm = OpenAI(temperature=0) | |
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt_template) | |
summarized_text = chain.run(text) | |
summarized_text = textwrap.fill(summarized_text, | |
width=100, | |
break_long_words=False, | |
replace_whitespace=False) | |
print(summarized_text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment