Skip to content

Instantly share code, notes, and snippets.

@larkintuckerllc
Last active October 24, 2025 10:18
Show Gist options
  • Save larkintuckerllc/0ea27bc49b8a25e6d8c8c4204549455f to your computer and use it in GitHub Desktop.
Save larkintuckerllc/0ea27bc49b8a25e6d8c8c4204549455f to your computer and use it in GitHub Desktop.
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
def main():
loader = DirectoryLoader("./data/", glob="**/*.txt", loader_cls=TextLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
add_start_index=True,
)
all_splits = text_splitter.split_documents(documents)
print(f"Split blog post into {len(all_splits)} sub-documents.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment