Skip to content

Instantly share code, notes, and snippets.

@larkintuckerllc
Last active October 24, 2025 10:18
Show Gist options
  • Save larkintuckerllc/5bdb8f40322605394156d4e61ff5b54c to your computer and use it in GitHub Desktop.
Save larkintuckerllc/5bdb8f40322605394156d4e61ff5b54c to your computer and use it in GitHub Desktop.
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
def main():
loader = DirectoryLoader("./data/", glob="**/*.txt", loader_cls=TextLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
add_start_index=True,
)
all_splits = text_splitter.split_documents(documents)
embeddings = OllamaEmbeddings(model="llama3")
vector_store = Chroma(
collection_name="example_collection",
embedding_function=embeddings,
persist_directory="./chroma_langchain_db",
)
vector_store.add_documents(all_splits)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment