Skip to content

Instantly share code, notes, and snippets.

@libcrack
Last active July 24, 2025 15:47
Show Gist options
  • Save libcrack/946685662c2591003335cd87fffd5f53 to your computer and use it in GitHub Desktop.
Save libcrack/946685662c2591003335cd87fffd5f53 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
import textwrap
import os
import fnmatch
try:
LLM_KEY = os.environ.get("OPENAI_API_KEY")
except Exception as e:
print(f"ERROR: Cannot read OpenAI API key from the environment")
raise(e)
def file_search_match(pattern) -> list:
cwd = os.getcwd()
files = os.listdir(cwd)
result = [f for f in files if fnmatch.fnmatch(f, pattern)]
return result
if __name__ == "__main__":
size = 1000
overlay = 100
text = ""
# patterns = [f"*.md", "*.txt"]
# results = map(file_search_match, patterns)
# for f in results:
#  if f:
#  print (f"-> Reading {f}")
#  loader = PyPDFLoader(f)
#  pages = loader.load()
#  for page in pages:
#  text += page.page_content
#  text = text.replace('\t', ' ')
loader = PyPDFLoader(f)
pages = loader.load()
for page in pages:
text += page.page_content
text = text.replace('\t', ' ')
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=size,
chunk_overlap=overlap
)
texts = text_splitter.create_documents([text])
llm = OpenAI(temperature=0)
chain = load_summarize_chain(llm, chain_type="map_reduce")
summarized_text = chain.run(texts)
print(summarized_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment