Skip to content

Instantly share code, notes, and snippets.

@larkintuckerllc
Created October 27, 2025 11:37
Show Gist options
  • Save larkintuckerllc/fe74a2d2a0a13e373588bb2b8b714a0e to your computer and use it in GitHub Desktop.
Save larkintuckerllc/fe74a2d2a0a13e373588bb2b8b714a0e to your computer and use it in GitHub Desktop.
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
def main():
loader = DirectoryLoader("./data/", glob="**/*.txt", loader_cls=TextLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
add_start_index=True
)
all_splits = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(
model="Qwen/Qwen3-Embedding-0.6B",
openai_api_base="http://localhost:8000/v1",
openai_api_key=""
)
vector_store = Chroma(
collection_name="example_collection",
embedding_function=embeddings,
persist_directory="./chroma_langchain_db",
)
vector_store.add_documents(all_splits)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment