oliveratgithub · June 16, 2025 12:59
diff --git a/curl-crawler.sh b/curl-crawler.sh
 #!/bin/sh
 timezone="Europe/Zurich"
 # List of valid timezones: wikipedia.org/wiki/List_of_tz_database_time_zones
 script="${0##*/}"
 rootdir=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd)
 logfile="$script.log"
 log="$rootdir/$logfile"
 now=$(TZ=":$timezone" date)
 # Uncomment 'mailto=' (remove #) to enable emailing the log upon completion
 #mailto="your@email.com"
 mailsubj="$script log from $now"
 logging() {
  now=$(TZ=":$timezone" date)
  if [[ -z "$1" || -z "$2" ]]; then
    echo "$now [ERROR] Nothing to log. Use:\nlogging <level> <result>"
    exit 2
  else
    echo "$now [$1] $2" >> $log
  fi
 }
 if [ -z "$1" ]; then
  echo "$now [ERROR] Missing file input. Use:\n$rootdir/$script /path/to/urls.txt"
  exit 2
 else
  input="$1"
 fi
 logging "INFO" "Reading file: $input"
 cat $input|while read line; do
  logging "INFO" "Crawling URL: $line"
  curlstart=$(date +"%s")
  curlresult=`curl -sSL -w '%{http_code} %{url_effective}' $line -o /dev/null`
  # curl parameters: -sS = silent; -L = follow redirects; -w = custom output format; -o = trash output
  logging "INFO" "$curlresult"
  curldone=$(date +"%s")
  difftime=$(($curldone-$curlstart))
  logging "INFO" "Crawl-time: $(($difftime / 3600)):$(($difftime / 60)):$(($difftime % 60))"
 done
 logging "INFO" "Done reading file: $input"
 if [ ! -z "$mailto" -a "$mailto" != " " ]; then
  logging "INFO" "Sending Email to: $mailto"
  # Using postfix mail command to email the logfile contents
  cat $log | mail -s "$mailsubj" $mailto
 fi
 exit
diff --git a/urls.txt b/urls.txt
 https://www.apple.com/
 https://wikipedia.org
 https://swissmacuser.ch/
 https://twitter.com/swissmacuser
diff --git a/zresult.log b/zresult.log
 # This is an example output generated by curl-crawler
 Sun Feb 19 21:56:07 CET 2017 [INFO] Reading file: ./urls.txt
 Sun Feb 19 21:56:07 CET 2017 [INFO] Crawling URL: https://www.apple.com/
 Sun Feb 19 21:56:07 CET 2017 [INFO] 200 https://www.apple.com/
 Sun Feb 19 21:56:07 CET 2017 [INFO] Crawl-time: 0:0:0
 Sun Feb 19 21:56:07 CET 2017 [INFO] Crawling URL: https://wikipedia.org
 Sun Feb 19 21:56:07 CET 2017 [INFO] 200 https://www.wikipedia.org/
 Sun Feb 19 21:56:07 CET 2017 [INFO] Crawl-time: 0:0:0
 Sun Feb 19 21:56:07 CET 2017 [INFO] Crawling URL: https://swissmacuser.ch/
 Sun Feb 19 21:56:08 CET 2017 [INFO] 200 https://swissmacuser.ch/
 Sun Feb 19 21:56:08 CET 2017 [INFO] Crawl-time: 0:0:1
 Sun Feb 19 21:56:08 CET 2017 [INFO] Crawling URL: https://twitter.com/swissmacuser
 Sun Feb 19 21:56:09 CET 2017 [INFO] 200 https://twitter.com/swissmacuser
 Sun Feb 19 21:56:09 CET 2017 [INFO] Crawl-time: 0:0:1
 Sun Feb 19 21:56:09 CET 2017 [INFO] Done reading file: ./urls.txt
	#!/bin/sh
	timezone="Europe/Zurich"
	# List of valid timezones: wikipedia.org/wiki/List_of_tz_database_time_zones
	script="${0##*/}"
	rootdir=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd)
	logfile="$script.log"
	log="$rootdir/$logfile"
	now=$(TZ=":$timezone" date)
	# Uncomment 'mailto=' (remove #) to enable emailing the log upon completion
	#mailto="your@email.com"
	mailsubj="$script log from $now"
	logging() {
	now=$(TZ=":$timezone" date)
	if [[ -z "$1" \|\| -z "$2" ]]; then
	echo "$now [ERROR] Nothing to log. Use:\nlogging <level> <result>"
	exit 2
	else
	echo "$now [$1] $2" >> $log
	fi
	}
	if [ -z "$1" ]; then
	echo "$now [ERROR] Missing file input. Use:\n$rootdir/$script /path/to/urls.txt"
	exit 2
	else
	input="$1"
	fi
	logging "INFO" "Reading file: $input"
	cat $input\|while read line; do
	logging "INFO" "Crawling URL: $line"
	curlstart=$(date +"%s")
	curlresult=`curl -sSL -w '%{http_code} %{url_effective}' $line -o /dev/null`
	# curl parameters: -sS = silent; -L = follow redirects; -w = custom output format; -o = trash output
	logging "INFO" "$curlresult"
	curldone=$(date +"%s")
	difftime=$(($curldone-$curlstart))
	logging "INFO" "Crawl-time: $(($difftime / 3600)):$(($difftime / 60)):$(($difftime % 60))"
	done
	logging "INFO" "Done reading file: $input"
	if [ ! -z "$mailto" -a "$mailto" != " " ]; then
	logging "INFO" "Sending Email to: $mailto"
	# Using postfix mail command to email the logfile contents
	cat $log \| mail -s "$mailsubj" $mailto
	fi
	exit
	https://www.apple.com/
	https://wikipedia.org
	https://swissmacuser.ch/
	https://twitter.com/swissmacuser
	# This is an example output generated by curl-crawler
	Sun Feb 19 21:56:07 CET 2017 [INFO] Reading file: ./urls.txt
	Sun Feb 19 21:56:07 CET 2017 [INFO] Crawling URL: https://www.apple.com/
	Sun Feb 19 21:56:07 CET 2017 [INFO] 200 https://www.apple.com/
	Sun Feb 19 21:56:07 CET 2017 [INFO] Crawl-time: 0:0:0
	Sun Feb 19 21:56:07 CET 2017 [INFO] Crawling URL: https://wikipedia.org
	Sun Feb 19 21:56:07 CET 2017 [INFO] 200 https://www.wikipedia.org/
	Sun Feb 19 21:56:07 CET 2017 [INFO] Crawl-time: 0:0:0
	Sun Feb 19 21:56:07 CET 2017 [INFO] Crawling URL: https://swissmacuser.ch/
	Sun Feb 19 21:56:08 CET 2017 [INFO] 200 https://swissmacuser.ch/
	Sun Feb 19 21:56:08 CET 2017 [INFO] Crawl-time: 0:0:1
	Sun Feb 19 21:56:08 CET 2017 [INFO] Crawling URL: https://twitter.com/swissmacuser
	Sun Feb 19 21:56:09 CET 2017 [INFO] 200 https://twitter.com/swissmacuser
	Sun Feb 19 21:56:09 CET 2017 [INFO] Crawl-time: 0:0:1
	Sun Feb 19 21:56:09 CET 2017 [INFO] Done reading file: ./urls.txt