ivy · October 16, 2025 23:44
diff --git a/fetch-claude-code-docs.sh b/fetch-claude-code-docs.sh
 #!/usr/bin/env bash
 # fetch-claude-code-docs.sh - Claude Code Documentation Scraper
 #
 # I wanted to vibe code customizations for Claude Code *with* Claude Code. This
 # script scrapes the official documentation and combines it into a single file
 # that can be ingested as context by Claude Code itself!
 #
 # I would have used gitingest but Claude Code is closed source and the docs are
 # only available on Anthropic's website. So, feel free to use this script to go
 # get 'em!
 #
 # Usage:
 #   ./fetch-claude-code-docs.sh                        # Output combined docs to stdout
 #   ./fetch-claude-code-docs.sh -o combined.md         # Save to file
 #   ./fetch-claude-code-docs.sh -h                     # Show help
 #   DEBUG=1 ./fetch-claude-code-docs.sh                # Show debug information
 #
 # Copyright (c) 2025 Ivy Evans <[email protected]>
 #
 # Permission to use, copy, modify, and/or distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
 # copyright notice and this permission notice appear in all copies.
 #
 # THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 # REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 # AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 # INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 # LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 # PERFORMANCE OF THIS SOFTWARE.

 [[ -n "${DEBUG:-}" ]] && set -o xtrace
 set -o errexit
 set -o errtrace
 set -o nounset
 set -o pipefail
 IFS=$'\n\t'

 USER_AGENT='fetch-claude-code-docs/1.0 (+https://gist.github.com/ivy/c86ab5b8145add9afe3b580d694856fc)'

 trap 'echo "[!] Error at line $LINENO" >&2' ERR

 # Helper functions
 log() { printf '[*] %s\n' "$*" >&2; }
 debug() { [ "${DEBUG:-}" = 1 ] && printf '[D] %s\n' "$*" >&2 || true; }
 die() {
  printf '[!] %s\n' "$*" >&2
  exit 1
 }
 require_cmd() { command -v "$1" >/dev/null 2>&1 || die "Missing dependency: $1"; }

 sanitize_name() {
  printf '%s' "$1" | tr '[:upper:]' '[:lower:]' |
    sed -E 's/[^a-z0-9._-]+/-/g; s/^-+|-+$//g'
 }

 discover_urls() {
  # NOTE: The previous dynamic discovery method no longer works because the HTML
  # served by the server differs from what's rendered after JavaScript executes.
  # The navigation links are generated client-side, so grep/sed cannot extract them.
  #
  # To update this list in the future, visit:
  #   https://docs.claude.com/en/docs/claude-code/overview
  #
  # Then run this JavaScript snippet in the browser console:
  #   let links = Array.from(document.querySelectorAll('li[id^="/en/docs/claude-code"] a')).map(a => a.href);
  #   console.log(JSON.stringify(links));
  #
  # A more robust dynamic method would require a headless browser like Playwright,
  # but for now we use a hardcoded list (last updated: 2025-10-16).

  cat <<'EOF'
 https://docs.claude.com/en/docs/claude-code/overview
 https://docs.claude.com/en/docs/claude-code/quickstart
 https://docs.claude.com/en/docs/claude-code/common-workflows
 https://docs.claude.com/en/docs/claude-code/sub-agents
 https://docs.claude.com/en/docs/claude-code/plugins
 https://docs.claude.com/en/docs/claude-code/skills
 https://docs.claude.com/en/docs/claude-code/output-styles
 https://docs.claude.com/en/docs/claude-code/hooks-guide
 https://docs.claude.com/en/docs/claude-code/headless
 https://docs.claude.com/en/docs/claude-code/github-actions
 https://docs.claude.com/en/docs/claude-code/gitlab-ci-cd
 https://docs.claude.com/en/docs/claude-code/mcp
 https://docs.claude.com/en/docs/claude-code/troubleshooting
 https://docs.claude.com/en/docs/claude-code/sdk/migration-guide
 https://docs.claude.com/en/docs/claude-code/third-party-integrations
 https://docs.claude.com/en/docs/claude-code/amazon-bedrock
 https://docs.claude.com/en/docs/claude-code/google-vertex-ai
 https://docs.claude.com/en/docs/claude-code/network-config
 https://docs.claude.com/en/docs/claude-code/llm-gateway
 https://docs.claude.com/en/docs/claude-code/devcontainer
 https://docs.claude.com/en/docs/claude-code/setup
 https://docs.claude.com/en/docs/claude-code/iam
 https://docs.claude.com/en/docs/claude-code/security
 https://docs.claude.com/en/docs/claude-code/data-usage
 https://docs.claude.com/en/docs/claude-code/monitoring-usage
 https://docs.claude.com/en/docs/claude-code/costs
 https://docs.claude.com/en/docs/claude-code/analytics
 https://docs.claude.com/en/docs/claude-code/plugin-marketplaces
 https://docs.claude.com/en/docs/claude-code/settings
 https://docs.claude.com/en/docs/claude-code/vs-code
 https://docs.claude.com/en/docs/claude-code/jetbrains
 https://docs.claude.com/en/docs/claude-code/terminal-config
 https://docs.claude.com/en/docs/claude-code/model-config
 https://docs.claude.com/en/docs/claude-code/memory
 https://docs.claude.com/en/docs/claude-code/statusline
 https://docs.claude.com/en/docs/claude-code/cli-reference
 https://docs.claude.com/en/docs/claude-code/interactive-mode
 https://docs.claude.com/en/docs/claude-code/slash-commands
 https://docs.claude.com/en/docs/claude-code/checkpointing
 https://docs.claude.com/en/docs/claude-code/hooks
 https://docs.claude.com/en/docs/claude-code/plugins-reference
 https://docs.claude.com/en/docs/claude-code/legal-and-compliance
 EOF
 }

 fetch_md() {
  local url="$1"

  debug "Fetching: ${url}.md"
  curl \
    --compressed \
    --fail \
    --location \
    --max-time 20 \
    --retry 3 \
    --silent \
    --show-error \
    --user-agent "$USER_AGENT" \
    "${url}.md"
 }

 render_index() {
  local i=1
  while IFS= read -r url; do
    local name
    name=$(sanitize_name "$(basename "$url")")
    printf '%02d - %s\n' "$i" "$name"
    i=$((i + 1))
  done
 }

 main() {
  local output_file="-"

  # Check dependencies
  require_cmd curl
  require_cmd grep
  require_cmd sed
  require_cmd sort
  require_cmd tr

  # Parse flags (support -h/--help, -o/--output)
  while [ $# -gt 0 ]; do
    case "$1" in
    -h | --help)
      printf 'Usage: %s [--output FILE]\n' "$0"
      printf '  -h, --help     Show this help message\n'
      printf '  -o, --output   Output to FILE instead of stdout\n'
      printf '\nEnvironment variables:\n'
      printf '  DEBUG=1        Show debug information\n'
      exit 0
      ;;
    -o | --output)
      [ $# -lt 2 ] && die "Option $1 requires an argument"
      output_file="$2"
      shift 2
      ;;
    --)
      shift
      break
      ;;
    -*)
      die "Unknown option: $1"
      ;;
    *)
      break
      ;;
    esac
  done

  # Build URL list using a portable while-read loop
  log "Discovering documentation URLs..."
  urls=()
  while IFS= read -r line; do
    urls+=("$line")
  done < <(discover_urls)

  [ ${#urls[@]} -gt 0 ] || die "No documentation URLs discovered"
  log "Found ${#urls[@]} documentation pages"

  # Setup atomic output if writing to file
  local tmp_out=""
  if [ "$output_file" != "-" ]; then
    tmp_out="${output_file}.tmp.$$"
    exec >"$tmp_out"
    trap 'rm -f "$tmp_out"' EXIT
    debug "Writing to temporary file: $tmp_out"
  fi

  # Generate output
  printf 'Directory structure:\n'
  printf '%s\n' "${urls[@]}" | render_index
  printf '\n\n'

  local i=1
  local failed_urls=()
  for url in "${urls[@]}"; do
    local name
    name=$(sanitize_name "$(basename "$url")")

    log "Fetching $i/${#urls[@]}: $name"

    printf '================================================\n'
    printf 'FILE: %02d-%s.md\n' "$i" "$name"
    printf '================================================\n\n'

    if ! fetch_md "$url"; then
      printf '[!] Failed to fetch: %s\n' "${url}.md" >&2
      failed_urls+=("$url")
    fi

    printf '\n\n'
    i=$((i + 1))
    sleep 1
  done

  # Move temporary file to final location if writing to file
  if [ "$output_file" != "-" ]; then
    mv "$tmp_out" "$output_file"
    trap - EXIT
    log "Output written to: $output_file"
  fi

  # Report results
  local successful=$((${#urls[@]} - ${#failed_urls[@]}))
  log "Combined $successful/${#urls[@]} documents successfully"

  if [ ${#failed_urls[@]} -gt 0 ]; then
    printf '[!] Failed to fetch %d URLs:\n' ${#failed_urls[@]} >&2
    printf '%s\n' "${failed_urls[@]}" >&2
    exit 1
  fi

  # Debug: show what URLs were processed
  if [ "${DEBUG:-}" = "1" ]; then
    printf '[D] URLs processed:\n' >&2
    printf '%s\n' "${urls[@]}" >&2
  fi
 }

 main "$@"
	#!/usr/bin/env bash
	# fetch-claude-code-docs.sh - Claude Code Documentation Scraper
	#
	# I wanted to vibe code customizations for Claude Code with Claude Code. This
	# script scrapes the official documentation and combines it into a single file
	# that can be ingested as context by Claude Code itself!
	#
	# I would have used gitingest but Claude Code is closed source and the docs are
	# only available on Anthropic's website. So, feel free to use this script to go
	# get 'em!
	#
	# Usage:
	# ./fetch-claude-code-docs.sh # Output combined docs to stdout
	# ./fetch-claude-code-docs.sh -o combined.md # Save to file
	# ./fetch-claude-code-docs.sh -h # Show help
	# DEBUG=1 ./fetch-claude-code-docs.sh # Show debug information
	#
	# Copyright (c) 2025 Ivy Evans <[email protected]>
	#
	# Permission to use, copy, modify, and/or distribute this software for any
	# purpose with or without fee is hereby granted, provided that the above
	# copyright notice and this permission notice appear in all copies.
	#
	# THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
	# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
	# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
	# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
	# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
	# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
	# PERFORMANCE OF THIS SOFTWARE.

	[[ -n "${DEBUG:-}" ]] && set -o xtrace
	set -o errexit
	set -o errtrace
	set -o nounset
	set -o pipefail
	IFS=$'\n\t'

	USER_AGENT='fetch-claude-code-docs/1.0 (+https://gist.github.com/ivy/c86ab5b8145add9afe3b580d694856fc)'

	trap 'echo "[!] Error at line $LINENO" >&2' ERR

	# Helper functions
	log() { printf '[] %s\n' "$" >&2; }
	debug() { [ "${DEBUG:-}" = 1 ] && printf '[D] %s\n' "$*" >&2 \|\| true; }
	die() {
	printf '[!] %s\n' "$*" >&2
	exit 1
	}
	require_cmd() { command -v "$1" >/dev/null 2>&1 \|\| die "Missing dependency: $1"; }

	sanitize_name() {
	printf '%s' "$1" \| tr '[:upper:]' '[:lower:]' \|
	sed -E 's/[^a-z0-9._-]+/-/g; s/^-+\|-+$//g'
	}

	discover_urls() {
	# NOTE: The previous dynamic discovery method no longer works because the HTML
	# served by the server differs from what's rendered after JavaScript executes.
	# The navigation links are generated client-side, so grep/sed cannot extract them.
	#
	# To update this list in the future, visit:
	# https://docs.claude.com/en/docs/claude-code/overview
	#
	# Then run this JavaScript snippet in the browser console:
	# let links = Array.from(document.querySelectorAll('li[id^="/en/docs/claude-code"] a')).map(a => a.href);
	# console.log(JSON.stringify(links));
	#
	# A more robust dynamic method would require a headless browser like Playwright,
	# but for now we use a hardcoded list (last updated: 2025-10-16).

	cat <<'EOF'
	https://docs.claude.com/en/docs/claude-code/overview
	https://docs.claude.com/en/docs/claude-code/quickstart
	https://docs.claude.com/en/docs/claude-code/common-workflows
	https://docs.claude.com/en/docs/claude-code/sub-agents
	https://docs.claude.com/en/docs/claude-code/plugins
	https://docs.claude.com/en/docs/claude-code/skills
	https://docs.claude.com/en/docs/claude-code/output-styles
	https://docs.claude.com/en/docs/claude-code/hooks-guide
	https://docs.claude.com/en/docs/claude-code/headless
	https://docs.claude.com/en/docs/claude-code/github-actions
	https://docs.claude.com/en/docs/claude-code/gitlab-ci-cd
	https://docs.claude.com/en/docs/claude-code/mcp
	https://docs.claude.com/en/docs/claude-code/troubleshooting
	https://docs.claude.com/en/docs/claude-code/sdk/migration-guide
	https://docs.claude.com/en/docs/claude-code/third-party-integrations
	https://docs.claude.com/en/docs/claude-code/amazon-bedrock
	https://docs.claude.com/en/docs/claude-code/google-vertex-ai
	https://docs.claude.com/en/docs/claude-code/network-config
	https://docs.claude.com/en/docs/claude-code/llm-gateway
	https://docs.claude.com/en/docs/claude-code/devcontainer
	https://docs.claude.com/en/docs/claude-code/setup
	https://docs.claude.com/en/docs/claude-code/iam
	https://docs.claude.com/en/docs/claude-code/security
	https://docs.claude.com/en/docs/claude-code/data-usage
	https://docs.claude.com/en/docs/claude-code/monitoring-usage
	https://docs.claude.com/en/docs/claude-code/costs
	https://docs.claude.com/en/docs/claude-code/analytics
	https://docs.claude.com/en/docs/claude-code/plugin-marketplaces
	https://docs.claude.com/en/docs/claude-code/settings
	https://docs.claude.com/en/docs/claude-code/vs-code
	https://docs.claude.com/en/docs/claude-code/jetbrains
	https://docs.claude.com/en/docs/claude-code/terminal-config
	https://docs.claude.com/en/docs/claude-code/model-config
	https://docs.claude.com/en/docs/claude-code/memory
	https://docs.claude.com/en/docs/claude-code/statusline
	https://docs.claude.com/en/docs/claude-code/cli-reference
	https://docs.claude.com/en/docs/claude-code/interactive-mode
	https://docs.claude.com/en/docs/claude-code/slash-commands
	https://docs.claude.com/en/docs/claude-code/checkpointing
	https://docs.claude.com/en/docs/claude-code/hooks
	https://docs.claude.com/en/docs/claude-code/plugins-reference
	https://docs.claude.com/en/docs/claude-code/legal-and-compliance
	EOF
	}

	fetch_md() {
	local url="$1"

	debug "Fetching: ${url}.md"
	curl \
	--compressed \
	--fail \
	--location \
	--max-time 20 \
	--retry 3 \
	--silent \
	--show-error \
	--user-agent "$USER_AGENT" \
	"${url}.md"
	}

	render_index() {
	local i=1
	while IFS= read -r url; do
	local name
	name=$(sanitize_name "$(basename "$url")")
	printf '%02d - %s\n' "$i" "$name"
	i=$((i + 1))
	done
	}

	main() {
	local output_file="-"

	# Check dependencies
	require_cmd curl
	require_cmd grep
	require_cmd sed
	require_cmd sort
	require_cmd tr

	# Parse flags (support -h/--help, -o/--output)
	while [ $# -gt 0 ]; do
	case "$1" in
	-h \| --help)
	printf 'Usage: %s [--output FILE]\n' "$0"
	printf ' -h, --help Show this help message\n'
	printf ' -o, --output Output to FILE instead of stdout\n'
	printf '\nEnvironment variables:\n'
	printf ' DEBUG=1 Show debug information\n'
	exit 0
	;;
	-o \| --output)
	[ $# -lt 2 ] && die "Option $1 requires an argument"
	output_file="$2"
	shift 2
	;;
	--)
	shift
	break
	;;
	-*)
	die "Unknown option: $1"
	;;
	*)
	break
	;;
	esac
	done

	# Build URL list using a portable while-read loop
	log "Discovering documentation URLs..."
	urls=()
	while IFS= read -r line; do
	urls+=("$line")
	done < <(discover_urls)

	[ ${#urls[@]} -gt 0 ] \|\| die "No documentation URLs discovered"
	log "Found ${#urls[@]} documentation pages"

	# Setup atomic output if writing to file
	local tmp_out=""
	if [ "$output_file" != "-" ]; then
	tmp_out="${output_file}.tmp.$$"
	exec >"$tmp_out"
	trap 'rm -f "$tmp_out"' EXIT
	debug "Writing to temporary file: $tmp_out"
	fi

	# Generate output
	printf 'Directory structure:\n'
	printf '%s\n' "${urls[@]}" \| render_index
	printf '\n\n'

	local i=1
	local failed_urls=()
	for url in "${urls[@]}"; do
	local name
	name=$(sanitize_name "$(basename "$url")")

	log "Fetching $i/${#urls[@]}: $name"

	printf '================================================\n'
	printf 'FILE: %02d-%s.md\n' "$i" "$name"
	printf '================================================\n\n'

	if ! fetch_md "$url"; then
	printf '[!] Failed to fetch: %s\n' "${url}.md" >&2
	failed_urls+=("$url")
	fi

	printf '\n\n'
	i=$((i + 1))
	sleep 1
	done

	# Move temporary file to final location if writing to file
	if [ "$output_file" != "-" ]; then
	mv "$tmp_out" "$output_file"
	trap - EXIT
	log "Output written to: $output_file"
	fi

	# Report results
	local successful=$((${#urls[@]} - ${#failed_urls[@]}))
	log "Combined $successful/${#urls[@]} documents successfully"

	if [ ${#failed_urls[@]} -gt 0 ]; then
	printf '[!] Failed to fetch %d URLs:\n' ${#failed_urls[@]} >&2
	printf '%s\n' "${failed_urls[@]}" >&2
	exit 1
	fi

	# Debug: show what URLs were processed
	if [ "${DEBUG:-}" = "1" ]; then
	printf '[D] URLs processed:\n' >&2
	printf '%s\n' "${urls[@]}" >&2
	fi
	}

	main "$@"