Skip to content

Instantly share code, notes, and snippets.

@markrwilliams
Created January 31, 2014 22:13
Show Gist options
  • Save markrwilliams/8744350 to your computer and use it in GitHub Desktop.
Save markrwilliams/8744350 to your computer and use it in GitHub Desktop.
#!/bin/sh -x
die () {
msg="$1"
echo $msg 1>&2
exit 1
}
prep_sqlite() {
url="$1"
sqlite="$2"
if [ ! -f "$SQLITE" ]
then
(wget -O - $url | grep -v '^#' | sqlite3 "$sqlite") || die "couldn't download"
echo "$SQLITE: schema prepared"
fi
}
download_md5sums() {
url="$1"
download_dir="$2"
(wget -O $download_dir/md5sums $url/md5sums) || die "couldn't download"
echo "md5sums retrieved"
}
retrieve_check_and_load() {
url="$1"
md5sum_file="$2"
download_dir="$3"
sqlite="$4"
while read line
do
fn=$(echo $line | awk '{ print $2 }')
path="$download_dir/$fn"
if [ ! -f "$path" ]
then
(wget -O $path $url/$fn) || die "couldn't download"
echo "downloaded $fn"
(echo $line | md5sum -) || die "bad md5"
(cd $download_dir && unzip $fn) || die "couldn't unzip"
unzipped=$(echo $path | sed -e 's/\.zip$//')
cat <<EOF |
.sep ' '
.import $unzipped GDELT_DAILYUPDATES
EOF
sqlite3 $sqlite
fi
done < $md5sum_file
}
SCHEMA_URL='http://www.gdeltproject.org/data/lookups/SQL.tablecreate.txt'
DAILY_URL='http://www.gdeltproject.org/data/dailyupdates/'
SQLITE=gdelt.sqlite
DOWNLOADS=downloads
prep_sqlite $SCHEMA_URL $SQLITE
download_md5sums $DAILY_URL $DOWNLOADS
retrieve_check_and_load $DAILY_URL $DOWNLOADS/md5sums $DOWNLOADS $SQLITE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment