-
-
Save manniru/91f4e20bb5fd9480478e5879f84a1969 to your computer and use it in GitHub Desktop.
ddg.sh - Search the internet with DuckDuckGo from your terminal
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# ddg.sh - Search the internet with DuckDuckGo from your terminal | |
# Made with boredom, on Christmas Eve, by mftrhu | |
# Requirements: | |
# awk, curl OR wget, sed, head, tail, grep, tput (MAYBE) | |
available () { | |
command -v "$1" >/dev/null 2>&1 | |
} | |
split () { | |
awk -F "$1" "{print \$$2}" - | |
} | |
between () { | |
split "$1" 2 | split "$2" 1 | |
} | |
bold () { | |
sed -r -e "s|<b>|$(tput bold)|g" \ | |
-e "s|</b>|$(tput sgr0)|g" | |
} | |
urldecode () { | |
awk -niord '{ printf RT?$0chr("0x"substr(RT, 2)):$0 }' RS=%.. | |
} | |
urlencode () { | |
#TODO: actually do more shit here | |
sed -e 's/ /\+/g' | |
} | |
htmlstrip () { | |
sed -r -e "s|</?[a-zA-Z]+>||g" | |
} | |
htmldecode () { | |
sed -e "s/"/'/g" -e "s/'/'/g" -e 's/&/\&/g' # | \ | |
# awk -niord '{ printf RT?$0chr("0x"substr(RT, 4)):$0 }' RS='&#x..;' | |
#BUG: awk gives up the ghost, sometimes, first seen with `-p 4 bah` | |
} | |
usage () { | |
printf "Usage: %s [-hdq] [-g N] [-p P] QUERY...\n" "$0" | |
printf "Performs a DuckDuckGo search and prints the results.\n" | fmt -w "$cols" | |
printf "\n" | |
printf "DuckDuckGo bangs are (mostly) unsupported, and will just be opened in the browser.\n" | fmt -w "$cols" | |
printf "\n" | |
printf " -h shows this help and quits\n" | |
printf " -l opens the first result in the browser\n" | |
printf " -g N opens the Nth result in the browser\n" | |
printf " -p P shows the Pth page of results\n" | |
printf " -q does not print the descriptions\n" | |
printf " -d dumps the results as tab-separated values\n" | |
printf " -c enable colors (requires tput)\n" | |
printf " -n disable colors\n" | |
exit "$1" | |
} | |
error() { | |
printf "${bold}${error_c}Error:${reset} %s.\n" "$2" >&2 | |
if [ -n "$3" ]; then | |
printf "\n%s\n" "$3" >&2 | |
fi | |
exit "$1" | |
} | |
download () { | |
if available curl; then | |
curl "$1" 2>/dev/null; return $? | |
elif available wget; then | |
wget "$1" -q -O-; return $? | |
fi | |
error 4 "no downloader installed" \ | |
"Install curl or wget before proceeding." | |
} | |
pager () { | |
if [ -n "$DDG_PAGER" ] && available "$DDG_PAGER"; then | |
"$DDG_PAGER"; return $? | |
elif [ -n "$PAGER" ] && available "$PAGER"; then | |
"$PAGER"; return $? | |
elif available less; then | |
less -r; return $? | |
fi | |
cat; return $? | |
} | |
browser () { | |
if [ -n "$DDG_BROWSER" ] && available "$DDG_BROWSER"; then | |
"$DDG_BROWSER" "$1"; return $? | |
elif [ -n "$BROWSER" ] && available "$BROWSER"; then | |
"$BROWSER" "$1"; return $? | |
elif available lynx; then | |
lynx "$1"; return $? | |
elif available w3m; then | |
w3m "$1"; return $? | |
fi | |
error 4 "no browser installed" \ | |
"Install a browser before proceeding." | |
} | |
cache_dir="$HOME/.cache/ddg" | |
purge_cache () { | |
rm -f "${cache_dir}/query" | |
rm -f "${cache_dir}/raw" | |
rm -f "${cache_dir}/urls" | |
rm -f "${cache_dir}/titles" | |
rm -f "${cache_dir}/desc" | |
} | |
check_age () { | |
#TODO | |
true | |
} | |
check_cache () { | |
mkdir -p "${cache_dir}" | |
if [ -r "${cache_dir}/query" ]; then | |
old_query=$(cat "${cache_dir}/query") | |
if [ -z "$query" ] || [ "${query}" = "${old_query}" ]; then | |
check_age | |
if [ ! -s "${cache_dir}/urls" ]; then | |
purge_cache | |
error 2 "no cached search results" \ | |
"Try again with a non-empty QUERY." | |
fi | |
else | |
purge_cache | |
fi | |
else | |
if [ -z "$query" ]; then | |
error 2 "no cached search results" \ | |
"Try again with a non-empty QUERY." | |
fi | |
purge_cache | |
fi | |
} | |
clean_up () { | |
line_cut=$(grep -n "${cache_dir}/tmp" -e "result-sponsored" | \ | |
cut -d ':' -f 1 | \ | |
tail -1) | |
[ -z "$line_cut" ] && line_cut=$(grep -n "${cache_dir}/tmp" -e "</style>" | \ | |
cut -d ':' -f 1) | |
tail -n +"${line_cut}" "${cache_dir}/tmp" > "${cache_dir}/raw" | |
rm -f "${cache_dir}/tmp" | |
} | |
extract_urls () { | |
grep "${cache_dir}/raw" -e "result-link" | \ | |
cut -d '"' -f 4 | \ | |
cut -d '=' -f 3 | \ | |
urldecode >> "${cache_dir}/urls" | |
} | |
extract_titles () { | |
grep "${cache_dir}/raw" -e "result-link" | \ | |
cut -d ">" -f 2- | \ | |
split "</a>" 1 | \ | |
htmlstrip | \ | |
htmldecode >> "${cache_dir}/titles" | |
} | |
extract_descriptions () { | |
grep "${cache_dir}/raw" -e "result-snippet" -A 2 | \ | |
grep -v -e "<td.*>" -e "</td>" -e "--" | \ | |
sed 's/^ \+//g' | \ | |
bold | \ | |
htmldecode >> "${cache_dir}/desc" | |
} | |
collate () { | |
paste "${cache_dir}/titles" "${cache_dir}/urls" "${cache_dir}/desc" | |
} | |
pull_results () { | |
results_wanted=$((page * page_size)) | |
[ "$index" -gt "$results_wanted" ] && results_wanted="$index" | |
if [ -r "${cache_dir}/urls" ]; then | |
len=$(wc -l "${cache_dir}/urls" | cut -d ' ' -f 1) | |
else | |
len=0 | |
fi | |
while [ "$len" -lt "$results_wanted" ]; do | |
download "https://duckduckgo.com/lite?q=${query}&sc=${len}" > "${cache_dir}/tmp" | |
if [ ! $? -eq 0 ]; then | |
error 3 "no connection to the internet" \ | |
"Check your connection and try again." | |
fi | |
clean_up | |
extract_urls | |
extract_titles | |
extract_descriptions | |
len=$(wc -l "${cache_dir}/urls" | cut -d ' ' -f 1) | |
done | |
} | |
paginate () { | |
tail -n +"$(( ( page - 1 ) * page_size + 1 ))" | \ | |
head -n "$(( page_size ))" | |
} | |
do_jump () { | |
url=$(tail "${cache_dir}/urls" -n +"$1" | head -1) | |
browser "$url" | |
exit 0 | |
} | |
do_urls () { | |
paginate < "${cache_dir}/urls" | |
exit 0 | |
} | |
do_dump () { | |
collate | paginate | |
exit 0 | |
} | |
do_show () { | |
counter="$(( (page - 1) * page_size ))" | |
collate | paginate | while read -r line; do | |
url="$(echo "$line" | cut -f 2)" | |
title="$(echo "$line" | cut -f 1 | fmt -s -w "$width")" | |
counter=$((counter + 1)) | |
echo "$title" | while read -r l; do | |
if [ -z "$n" ] ; then | |
printf "[%3d] ${bold}${title_c}%s${reset}\n" "$counter" "$l" | |
else | |
printf " ${bold}${title_c}%s${reset}\n" "$l" | |
fi | |
n=$((n + 1)) | |
done | |
printf " ${url_c}<%s>${reset}\n" "$url" | |
if $descriptions; then | |
desc="$(echo "$line" | cut -f 3 | fmt -w "$width")" | |
echo "$desc" | while read -r l; do | |
printf " %s\n" "$l" | |
done | |
fi | |
done | pager | |
exit 0 | |
} | |
check_query () { | |
first_char=$(echo "$query" | cut -c 1) | |
if [ "${first_char}" = "!" ]; then | |
accel=$(echo "$query" | cut -d ' ' -f 1) | |
rest=$(echo "$query" | cut -d ' ' -f 2-) | |
case "${accel}" in | |
(!g|!google) ./google.sh "${rest}" ;; | |
(*) browser "https://duckduckgo.com/lite?q=${query}" ;; | |
esac | |
exit 0 | |
fi | |
} | |
color () { | |
case "$1" in | |
(black) tput setaf 0 ;; | |
(red) tput setaf 1 ;; | |
(green) tput setaf 2 ;; | |
(yellow) tput setaf 3 ;; | |
(blue) tput setaf 4 ;; | |
(magenta) tput setaf 5 ;; | |
(cyan) tput setaf 6 ;; | |
(white) tput setaf 7 ;; | |
esac | |
} | |
init_colors () { | |
[ -z "$DDG_TITLE_COLOR" ] && title_c="yellow" || title_c="$DDG_TITLE_COLOR" | |
[ -z "$DDG_URL_COLOR" ] && url_c="blue" || url_c="$DDG_URL_COLOR" | |
title_c=$(color "$title_c") | |
url_c=$(color "$url_c") | |
error_c=$(color "red") | |
bold=$(tput bold) | |
reset=$(tput sgr0) | |
} | |
init_vars () { | |
cols=80 | |
available tput && cols=$(tput cols) | |
width=$(( cols - 6 - 4 )) | |
page_size="$DDG_PAGE_SIZE" | |
[ -z "$DDG_PAGE_SIZE" ] && page_size=10 | |
colors="$DDG_COLORS" | |
[ -z "$DDG_COLORS" ] && colors=true | |
secret_plan="show" | |
descriptions=true | |
index=0 | |
page=1 | |
error_c="" | |
title_c="" | |
url_c="" | |
bold="" | |
reset="" | |
} | |
init_vars | |
while getopts "hlduqg:p:nc" o; do | |
case "${o}" in | |
(d) secret_plan="dump" ;; | |
(l) secret_plan="jump" | |
index=1 ;; | |
(g) secret_plan="jump" | |
index="$OPTARG" ;; | |
(p) page="$OPTARG" ;; | |
(q) descriptions=false ;; | |
(u) secret_plan="urls" ;; | |
(n) colors=false ;; | |
(c) colors=true ;; | |
(h) usage 0 ;; | |
(*) usage 1 ;; | |
esac | |
done | |
shift $((OPTIND - 1)) | |
query=$(printf "$*" | urlencode) | |
if "$colors" && available tput; then | |
init_colors | |
fi | |
check_query | |
check_cache | |
printf "$query" > "${cache_dir}/query" | |
pull_results | |
case "${secret_plan}" in | |
(dump) do_dump ;; | |
(jump) do_jump "$index" ;; | |
(urls) do_urls ;; | |
(show) do_show ;; | |
esac |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment