Skip to content

Instantly share code, notes, and snippets.

@sebastiancarlos
Created March 28, 2024 19:56
Show Gist options
  • Save sebastiancarlos/42f343ef420105be72ec43f4dd879a9c to your computer and use it in GitHub Desktop.
Save sebastiancarlos/42f343ef420105be72ec43f4dd879a9c to your computer and use it in GitHub Desktop.
gutenberg-dl - download text books from Project Gutenberg
#! /usr/bin/env bash
# All my gist code is licensed under the MIT license.
# gutenberg-dl <search-term>
# - Download books from Project Gutenberg in plain text format
# - Actually uses "gutendex" to search for the download link. It seems to be
# the current best third-party Gutemberg API
# - Source: https://github.com/garethbjohnson/gutendex
shopt -s xpg_echo
bold='\x1b[1m'
green='\x1b[32m'
blue='\x1b[34m'
reset='\x1b[0m'
function usage () {
echo "${bold}Usage:${reset} gutenberg-dl [OPTIONS] <search-term>"
echo " - Download books from Project Gutenberg in plain text format"
echo " - If muliple matching books are found, the first one is downloaded."
echo " Books are sorted by popularity, so this should be ok in most cases."
echo " - The output file will be named '<title>_<author>.txt', with spaces"
echo " replaced by underscores."
echo ""
echo "${bold}Options:${reset}"
echo " -h, --help Display this help message"
echo ' -d, --output-dir DIR Output directory (default: current directory)'
exit 0
}
# print usage on -h/--help or no arguments
if [[ "$#" -eq 1 && ("$1" == "-h" || "$1" == "--help") || "$#" -eq 0 ]]; then
usage
fi
# parse options
output_dir="."
while [[ "$#" -gt 0 ]]; do
case "$1" in
-d|--output-dir)
output_dir="$2"
shift
;;
*)
break
;;
esac
shift
done
url_encode () {
# yes, this is rudiementary, please keep search terms to alphanumeric
# characters
echo -n "$1" | sed 's/ /%20/g'
}
endpoint="http://gutendex.com/books?search="
search_term=$(url_encode "$*")
url="$endpoint$search_term"
response=$(curl --location --header 'Accept: application/json' --silent "$url")
# on error, print message and bail out
if [[ "$?" -ne 0 ]]; then
echo "Error: Could not fetch data from Gutendex"
echo " - URL: $url"
exit 1
fi
# use jq to extract id, title and authors
data=$(echo $response | jq -r '.results[0] | "\(.id)|\(.title)|\(.authors[0].name)"')
IFS='|' read -r id title author <<< "$data"
# print message about found book
echo "Found book: ${bold}${blue}$title${reset} by ${bold}${blue}$author${reset}"
# create filename by joining title and author, removing non alphanum
# characters, and replacing spaces with underscores
# make all lowercase
filename=$(echo "${title}_${author}" | sed -E -e 's/[^a-zA-Z0-9_ ]//g' -e 's/ /_/g' | tr '[:upper:]' '[:lower:]').txt
# download book
book_url="http://gutenberg.org/ebooks/$id.txt.utf-8"
output_file="$output_dir/$filename"
echo "Downloading book to $output_file"
echo " - From: $book_url"
curl --location --silent --output "$output_file" "$book_url"
if [[ "$?" -ne 0 ]]; then
echo "Error: Could not download book"
echo " - URL: $book_url"
exit 1
fi
echo "${green}Done!${reset}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment