Skip to content

Instantly share code, notes, and snippets.

@gbraccialli
Created January 29, 2016 14:42
Show Gist options
  • Save gbraccialli/ee7f80f84413b00153d6 to your computer and use it in GitHub Desktop.
Save gbraccialli/ee7f80f84413b00153d6 to your computer and use it in GitHub Desktop.
drop table wikipedia;
create external table wikipedia(
domain string,
page string,
count bigint,
size bigint
)
partitioned by (datewiki int)
row format delimited fields terminated by ' '
stored as textfile
;
vi wikiday.sh
echo "start $1 $2 $3"
echo "creating hdfs dir..."
sudo -u hive hadoop fs -mkdir /test/wikipedia/datewiki=$1$2$3
echo "done creating hdfs "
for i in $(seq -f "%02g" 00 23)
do
echo "start download ${i}..."
sudo -u hive sh -c "wget -O - -o /dev/null https://dumps.wikimedia.org/other/pagecounts-raw/$1/$1-$2/pagecounts-$1$2$3-${i}0000.gz | hadoop fs -put - /test/wikipedia/datewiki=$1$2$3/${i}0000.gz"
echo "done download ${i}"
done
echo "adding hive partition"
beeline -u "jdbc:hive2://localhost:10000/default" -n "admin" -e "alter table wikipedia add partition (datewiki=$1$2$3) location '/test/wikipedia/datewiki=$1$2$3/';"
echo "done $1 $2 $3"
vi wiki.sh
d=2015-01-01
while [ "$d" != 2015-01-04 ]; do
echo "${d:0:4}" "${d:5:2}" "${d:8:2}"
./wikiday.sh "${d:0:4}" "${d:5:2}" "${d:8:2}"
d=$(date -I -d "$d + 1 day")
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment