Skip to content

Instantly share code, notes, and snippets.

@ristomatti
Last active March 9, 2025 00:28
Show Gist options
  • Save ristomatti/d3322510f2985e3e69f102baa2444ea2 to your computer and use it in GitHub Desktop.
Save ristomatti/d3322510f2985e3e69f102baa2444ea2 to your computer and use it in GitHub Desktop.
Markdown LLM summarizer
#!/usr/bin/env bash
#
# web2md - Convert one or more web pages to a Markdown document, optionally
# summarizing the content.
#
# Requirements
# - percollate: https://github.com/danburzo/percollate
# - aichat: https://github.com/sigoden/aichat
#
# Copyright (c) 2025 Ristomatti Airo <[email protected]>
#
# This work is licensed under the terms of the MIT license.
# For a copy, see <https://opensource.org/licenses/MIT>.
# shellcheck disable=SC2068
set -Eeuo pipefail
VERSION="0.4.0"
DEFAULT_MODEL="openai:o1-mini"
PROMPT=$(cat <<EOF
Summarize text paragaphs, bullet points and tabular content of this Markdown document.
Rules:
1. RETAIN document structure: heading, bullet points, tables and source links.
2. RETAIN key information.
3. REMOVE common postfixes from headings.
4. REMOVE author information.
5. ENSURE there's only one top level heading by incrementing the rest accordingly.
6. ADD Obsidian TOC plugin snippet before the second heading:
\`\`\`table-of-contents
minLevel: 2
maxLevel: 2
\`\`\`
7. DO NOT wrap output in a codeblock.
EOF
)
trap cleanup SIGINT SIGTERM ERR EXIT
script_name=$(basename "${BASH_SOURCE[0]}")
print_help() {
cat <<EOF
${script_name} ${VERSION}
${BOLD}USAGE${RESET}
${YELLOW}${script_name}${RESET} [OPTIONS] URL [URL]...
${BOLD}OPTIONS${RESET}
-t, --title <title> Document title
-o, --output <path> Output file path
-s, --summary Summarize output
-m, --model AI model for summarization (default: openai:o1-mini)
-h, --help Print help
-v, --version Print version
EOF
exit 0
}
print_version() {
die "${BOLD}${script_name}${RESET} ${VERSION}" 0
}
cleanup() {
trap - SIGINT SIGTERM ERR EXIT
# script cleanup here
}
setup_colors() {
if [[ -t 2 ]] && [[ -z "${NO_COLOR-}" ]] && [[ "${TERM-}" != "dumb" ]]; then
RESET=$(tput sgr0) BOLD=$(tput bold) DIM=$(tput setaf 11) GREEN=$(tput setaf 2) YELLOW=$(tput setaf 3)
else
RESET="" BOLD="" DIM="" GREEN=""
fi
}
msg() {
echo >&2 -e "${1-}"
}
die() {
local msg=$1
local code=${2-1} # default exit status 1
msg "$msg"
exit "$code"
}
validate_deps() {
if ! command -v percollate &>/dev/null; then
die "${script_name} requires percollate. Please install it first."
fi
if ! command -v aichat &>/dev/null; then
die "${script_name} requires aichat. Please install it first."
fi
}
# Parse command line options
parse_options() {
title=
output_path=
summary=0
model="$DEFAULT_MODEL"
while :; do
case "${1-}" in
-h | --help) print_help ;;
-v | --version) print_version ;;
-s | --summary) summary=1 ;;
-t | --title)
title="${2-}"
shift
;;
-o | --output)
output_path="${2-}"
shift
;;
-m | --model)
model="${2-}"
shift
;;
-?*) die "Unknown option: $1" ;;
*) break ;;
esac
shift
done
urls=("$@")
# Validate arguments
if [[ ${#urls[@]} -eq 0 ]]; then
die "${script_name}: no arguments provided"
fi
return 0
}
web2md() {
local output
setup_colors
validate_deps
parse_options "$@"
if [[ -n "$title" ]]; then
msg "${DIM}Title:${RESET} ${title}"
fi
if [[ -n "$output_path" ]]; then
msg "${DIM}Output path:${RESET} ${output_path}"
fi
msg "${DIM}Pages:${RESET}"
for url in "${urls[@]}"; do
msg "- ${url}"
done
msg "\n${YELLOW}Converting to Markdown...${RESET}\n"
if [[ -n "$title" ]]; then
output="$(percollate md --no-toc --title "$title" --output - ${urls[@]-} )"
else
output="$(percollate md --no-toc --output - ${urls[@]-} )"
fi
if [[ $summary -eq 1 ]]; then
output="$(summarize "$output")"
fi
if [[ -n "$output_path" ]]; then
cat <<< "$output" > "$output_path"
msg "\n${GREEN}Done!${RESET}"
else
# Use glow or bat if available
if command -v glow &>/dev/null; then
glow --pager <<< "$output"
elif command -v bat &>/dev/null; then
bat --style=plain --language markdown <<< "$output"
else
cat <<< "$output"
fi
fi
}
summarize() {
local -r input="$1-"
msg "\n${YELLOW}Summarizing...${RESET}"
aichat --no-stream --model "$model" "$PROMPT" <<< "$input"
}
web2md "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment