Last active
October 16, 2025 23:44
-
-
Save ivy/c86ab5b8145add9afe3b580d694856fc to your computer and use it in GitHub Desktop.
Use Claude Code to customize Claude Code by scraping the official docs into digestible context!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# fetch-claude-code-docs.sh - Claude Code Documentation Scraper | |
# | |
# I wanted to vibe code customizations for Claude Code *with* Claude Code. This | |
# script scrapes the official documentation and combines it into a single file | |
# that can be ingested as context by Claude Code itself! | |
# | |
# I would have used gitingest but Claude Code is closed source and the docs are | |
# only available on Anthropic's website. So, feel free to use this script to go | |
# get 'em! | |
# | |
# Usage: | |
# ./fetch-claude-code-docs.sh # Output combined docs to stdout | |
# ./fetch-claude-code-docs.sh -o combined.md # Save to file | |
# ./fetch-claude-code-docs.sh -h # Show help | |
# DEBUG=1 ./fetch-claude-code-docs.sh # Show debug information | |
# | |
# Copyright (c) 2025 Ivy Evans <[email protected]> | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
# PERFORMANCE OF THIS SOFTWARE. | |
[[ -n "${DEBUG:-}" ]] && set -o xtrace | |
set -o errexit | |
set -o errtrace | |
set -o nounset | |
set -o pipefail | |
IFS=$'\n\t' | |
USER_AGENT='fetch-claude-code-docs/1.0 (+https://gist.github.com/ivy/c86ab5b8145add9afe3b580d694856fc)' | |
trap 'echo "[!] Error at line $LINENO" >&2' ERR | |
# Helper functions | |
log() { printf '[*] %s\n' "$*" >&2; } | |
debug() { [ "${DEBUG:-}" = 1 ] && printf '[D] %s\n' "$*" >&2 || true; } | |
die() { | |
printf '[!] %s\n' "$*" >&2 | |
exit 1 | |
} | |
require_cmd() { command -v "$1" >/dev/null 2>&1 || die "Missing dependency: $1"; } | |
sanitize_name() { | |
printf '%s' "$1" | tr '[:upper:]' '[:lower:]' | | |
sed -E 's/[^a-z0-9._-]+/-/g; s/^-+|-+$//g' | |
} | |
discover_urls() { | |
# NOTE: The previous dynamic discovery method no longer works because the HTML | |
# served by the server differs from what's rendered after JavaScript executes. | |
# The navigation links are generated client-side, so grep/sed cannot extract them. | |
# | |
# To update this list in the future, visit: | |
# https://docs.claude.com/en/docs/claude-code/overview | |
# | |
# Then run this JavaScript snippet in the browser console: | |
# let links = Array.from(document.querySelectorAll('li[id^="/en/docs/claude-code"] a')).map(a => a.href); | |
# console.log(JSON.stringify(links)); | |
# | |
# A more robust dynamic method would require a headless browser like Playwright, | |
# but for now we use a hardcoded list (last updated: 2025-10-16). | |
cat <<'EOF' | |
https://docs.claude.com/en/docs/claude-code/overview | |
https://docs.claude.com/en/docs/claude-code/quickstart | |
https://docs.claude.com/en/docs/claude-code/common-workflows | |
https://docs.claude.com/en/docs/claude-code/sub-agents | |
https://docs.claude.com/en/docs/claude-code/plugins | |
https://docs.claude.com/en/docs/claude-code/skills | |
https://docs.claude.com/en/docs/claude-code/output-styles | |
https://docs.claude.com/en/docs/claude-code/hooks-guide | |
https://docs.claude.com/en/docs/claude-code/headless | |
https://docs.claude.com/en/docs/claude-code/github-actions | |
https://docs.claude.com/en/docs/claude-code/gitlab-ci-cd | |
https://docs.claude.com/en/docs/claude-code/mcp | |
https://docs.claude.com/en/docs/claude-code/troubleshooting | |
https://docs.claude.com/en/docs/claude-code/sdk/migration-guide | |
https://docs.claude.com/en/docs/claude-code/third-party-integrations | |
https://docs.claude.com/en/docs/claude-code/amazon-bedrock | |
https://docs.claude.com/en/docs/claude-code/google-vertex-ai | |
https://docs.claude.com/en/docs/claude-code/network-config | |
https://docs.claude.com/en/docs/claude-code/llm-gateway | |
https://docs.claude.com/en/docs/claude-code/devcontainer | |
https://docs.claude.com/en/docs/claude-code/setup | |
https://docs.claude.com/en/docs/claude-code/iam | |
https://docs.claude.com/en/docs/claude-code/security | |
https://docs.claude.com/en/docs/claude-code/data-usage | |
https://docs.claude.com/en/docs/claude-code/monitoring-usage | |
https://docs.claude.com/en/docs/claude-code/costs | |
https://docs.claude.com/en/docs/claude-code/analytics | |
https://docs.claude.com/en/docs/claude-code/plugin-marketplaces | |
https://docs.claude.com/en/docs/claude-code/settings | |
https://docs.claude.com/en/docs/claude-code/vs-code | |
https://docs.claude.com/en/docs/claude-code/jetbrains | |
https://docs.claude.com/en/docs/claude-code/terminal-config | |
https://docs.claude.com/en/docs/claude-code/model-config | |
https://docs.claude.com/en/docs/claude-code/memory | |
https://docs.claude.com/en/docs/claude-code/statusline | |
https://docs.claude.com/en/docs/claude-code/cli-reference | |
https://docs.claude.com/en/docs/claude-code/interactive-mode | |
https://docs.claude.com/en/docs/claude-code/slash-commands | |
https://docs.claude.com/en/docs/claude-code/checkpointing | |
https://docs.claude.com/en/docs/claude-code/hooks | |
https://docs.claude.com/en/docs/claude-code/plugins-reference | |
https://docs.claude.com/en/docs/claude-code/legal-and-compliance | |
EOF | |
} | |
fetch_md() { | |
local url="$1" | |
debug "Fetching: ${url}.md" | |
curl \ | |
--compressed \ | |
--fail \ | |
--location \ | |
--max-time 20 \ | |
--retry 3 \ | |
--silent \ | |
--show-error \ | |
--user-agent "$USER_AGENT" \ | |
"${url}.md" | |
} | |
render_index() { | |
local i=1 | |
while IFS= read -r url; do | |
local name | |
name=$(sanitize_name "$(basename "$url")") | |
printf '%02d - %s\n' "$i" "$name" | |
i=$((i + 1)) | |
done | |
} | |
main() { | |
local output_file="-" | |
# Check dependencies | |
require_cmd curl | |
require_cmd grep | |
require_cmd sed | |
require_cmd sort | |
require_cmd tr | |
# Parse flags (support -h/--help, -o/--output) | |
while [ $# -gt 0 ]; do | |
case "$1" in | |
-h | --help) | |
printf 'Usage: %s [--output FILE]\n' "$0" | |
printf ' -h, --help Show this help message\n' | |
printf ' -o, --output Output to FILE instead of stdout\n' | |
printf '\nEnvironment variables:\n' | |
printf ' DEBUG=1 Show debug information\n' | |
exit 0 | |
;; | |
-o | --output) | |
[ $# -lt 2 ] && die "Option $1 requires an argument" | |
output_file="$2" | |
shift 2 | |
;; | |
--) | |
shift | |
break | |
;; | |
-*) | |
die "Unknown option: $1" | |
;; | |
*) | |
break | |
;; | |
esac | |
done | |
# Build URL list using a portable while-read loop | |
log "Discovering documentation URLs..." | |
urls=() | |
while IFS= read -r line; do | |
urls+=("$line") | |
done < <(discover_urls) | |
[ ${#urls[@]} -gt 0 ] || die "No documentation URLs discovered" | |
log "Found ${#urls[@]} documentation pages" | |
# Setup atomic output if writing to file | |
local tmp_out="" | |
if [ "$output_file" != "-" ]; then | |
tmp_out="${output_file}.tmp.$$" | |
exec >"$tmp_out" | |
trap 'rm -f "$tmp_out"' EXIT | |
debug "Writing to temporary file: $tmp_out" | |
fi | |
# Generate output | |
printf 'Directory structure:\n' | |
printf '%s\n' "${urls[@]}" | render_index | |
printf '\n\n' | |
local i=1 | |
local failed_urls=() | |
for url in "${urls[@]}"; do | |
local name | |
name=$(sanitize_name "$(basename "$url")") | |
log "Fetching $i/${#urls[@]}: $name" | |
printf '================================================\n' | |
printf 'FILE: %02d-%s.md\n' "$i" "$name" | |
printf '================================================\n\n' | |
if ! fetch_md "$url"; then | |
printf '[!] Failed to fetch: %s\n' "${url}.md" >&2 | |
failed_urls+=("$url") | |
fi | |
printf '\n\n' | |
i=$((i + 1)) | |
sleep 1 | |
done | |
# Move temporary file to final location if writing to file | |
if [ "$output_file" != "-" ]; then | |
mv "$tmp_out" "$output_file" | |
trap - EXIT | |
log "Output written to: $output_file" | |
fi | |
# Report results | |
local successful=$((${#urls[@]} - ${#failed_urls[@]})) | |
log "Combined $successful/${#urls[@]} documents successfully" | |
if [ ${#failed_urls[@]} -gt 0 ]; then | |
printf '[!] Failed to fetch %d URLs:\n' ${#failed_urls[@]} >&2 | |
printf '%s\n' "${failed_urls[@]}" >&2 | |
exit 1 | |
fi | |
# Debug: show what URLs were processed | |
if [ "${DEBUG:-}" = "1" ]; then | |
printf '[D] URLs processed:\n' >&2 | |
printf '%s\n' "${urls[@]}" >&2 | |
fi | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment