Last active
July 29, 2017 23:02
-
-
Save linuxsocist/6f916c22b3725e1dfbb11c61986e7816 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script scrapes the road status page of www.tararuadc.govt.nz and displays a list in the terminal | |
# as well as generating a JSON file which can be found at '/tmp/roads.json'. | |
# | |
# requires html-xml-utils which can be installed in ubuntu via 'sudo apt install html-xml-utils' | |
# or compiled from source found here: https://www.w3.org/Tools/HTML-XML-utils/ | |
saveFile='/tmp/road-dump' | |
curl -sS 'http://www.tararuadc.govt.nz/Services/Roading_Transportation/District_Road_Status' | \ | |
hxnormalize -x | hxtabletrans | tr -d '\n\r' | sed -r 's/<tr>/\n/g' | \ | |
sed -e 's/<*br>//g' -e 's/<\/th><td>/`/g' -e 's/<\/td><td>/`/g' -e 's/<[^>]*>//g' > $saveFile | |
listNum=$(cat $saveFile | sed -n 2p | sed -e 's/`/\n/g' | sed -e '/Area/d' | wc -l) | |
echo > /tmp/newfile | |
NUM='2' | |
for (( c=1; c<=$listNum; c++ ));do | |
echo "============================================================" | |
updated=$(date -d "$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 5p | awk -F '/' '{print $2"/"$1"/"$3}')" '+%s') | |
area=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 2p) | |
road=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 3p) | |
status=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | sed -n 6p) | |
comment=$(cat $saveFile | awk -F '`' '{print $'$NUM'}' | tr -s " " | sed -n 4p) | |
echo -e "Update: $updated\nArea: $area\nRoad: $road\nStatus: $status\nComment: $comment" | |
echo -e '{"update":"'$updated'","area":"'$area'","road":"'$road'","status":"'$status'","comment":"'$comment'"}' >> /tmp/newfile | |
((NUM++)) | |
done | |
echo "============================================================" | |
echo "["$(cat /tmp/newfile | tr -d '\n' | sed -e 's/}{/},{/g')"]" > /tmp/roads.json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment