Skip to content

Instantly share code, notes, and snippets.

@youtube-jocoding
Created August 26, 2023 08:54
Show Gist options
  • Save youtube-jocoding/a909a52065766328a9eb934b6b891e59 to your computer and use it in GitHub Desktop.
Save youtube-jocoding/a909a52065766328a9eb934b6b891e59 to your computer and use it in GitHub Desktop.
from dotenv import load_dotenv
load_dotenv()
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
#Loader
loader = PyPDFLoader("unsu.pdf")
pages = loader.load_and_split()
#Split
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 300,
chunk_overlap = 20,
length_function = len,
is_separator_regex = False,
)
texts = text_splitter.split_documents(pages)
#Embedding
from langchain.embeddings import OpenAIEmbeddings
embeddings_model = OpenAIEmbeddings()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment