Skip to content

Instantly share code, notes, and snippets.

@linuxsocist
Last active July 29, 2017 23:02
Show Gist options
  • Save linuxsocist/6f916c22b3725e1dfbb11c61986e7816 to your computer and use it in GitHub Desktop.
Save linuxsocist/6f916c22b3725e1dfbb11c61986e7816 to your computer and use it in GitHub Desktop.
#!/bin/bash
# This script scrapes the road status page of www.tararuadc.govt.nz and displays a list in the terminal
# as well as generating a JSON file which can be found at '/tmp/roads.json'.
#
# requires html-xml-utils which can be installed in ubuntu via 'sudo apt install html-xml-utils'
# or compiled from source found here: https://www.w3.org/Tools/HTML-XML-utils/
saveFile='/tmp/road-dump'
curl -sS 'http://www.tararuadc.govt.nz/Services/Roading_Transportation/District_Road_Status' | \
hxnormalize -x | hxtabletrans | tr -d '\n\r' | sed -r 's/<tr>/\n/g' | \
sed -e 's/<*br>//g' -e 's/<\/th><td>/`/g' -e 's/<\/td><td>/`/g' -e 's/<[^>]*>//g' > $saveFile
listNum=$(cat $saveFile | sed -n 2p | sed -e 's/`/\n/g' | sed -e '/Area/d' | wc -l)
echo > /tmp/newfile
NUM='2'
for (( c=1; c<=$listNum; c++ ));do
echo "============================================================"
updated=$(date -d "$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 5p | awk -F '/' '{print $2"/"$1"/"$3}')" '+%s')
area=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 2p)
road=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 3p)
status=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 6p)
comment=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | tr -s " " | sed -n 4p)
echo -e "Update: $updated\nArea: $area\nRoad: $road\nStatus: $status\nComment: $comment"
echo -e '{"update":"'$updated'","area":"'$area'","road":"'$road'","status":"'$status'","comment":"'$comment'"}' >> /tmp/newfile
((NUM++))
done
echo "============================================================"
echo "["$(cat /tmp/newfile | tr -d '\n' | sed -e 's/}{/},{/g')"]" > /tmp/roads.json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment