Skip to content

Instantly share code, notes, and snippets.

@hitripod
Last active December 30, 2024 05:46
Show Gist options
  • Save hitripod/87d25310c0a2469c68603e07d6c35621 to your computer and use it in GitHub Desktop.
Save hitripod/87d25310c0a2469c68603e07d6c35621 to your computer and use it in GitHub Desktop.
This script fetches all pages from a GitBook URL and saves their content to a single text file.
import sys
import os
from langchain_community.document_loaders import GitbookLoader
def save_gitbook_to_txt(url, output_file):
try:
# Initialize the loader with all paths enabled
loader = GitbookLoader(url, load_all_paths=True)
# Load all pages from the GitBook
all_pages_data = loader.load()
# Combine all page content into a single string
combined_content = "\n\n".join([doc.page_content for doc in all_pages_data])
# Save the combined content to a file
with open(output_file, "w", encoding="utf-8") as f:
f.write(combined_content)
print(f"Successfully saved {len(all_pages_data)} pages to {output_file}.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
os.environ['USER_AGENT'] = 'myagent'
if len(sys.argv) != 2:
print("Usage: python fetch_gitbook.py <GitBook URL>")
sys.exit(1)
gitbook_url = sys.argv[1]
output_file = os.path.join(os.getcwd(), "gitbook_content.txt")
save_gitbook_to_txt(gitbook_url, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment