Skip to content

Instantly share code, notes, and snippets.

@allixender
Last active September 1, 2015 13:46
Show Gist options
  • Save allixender/ccc5831e726f5fc7679d to your computer and use it in GitHub Desktop.
Save allixender/ccc5831e726f5fc7679d to your computer and use it in GitHub Desktop.
gcloud compute --project "cloud-project1" ssh --zone "us-central1-f" "cloud-instance1"
sudo apt-get update
sudo apt-get upgrade
sudo apt-get install openjdk-7-jdk git
sudo apt-get install libgdal-java libgdal-dev gdal-bin netcdf-bin libnetcdf-dev
wget http://mirror.cc.columbia.edu/pub/software/apache/cassandra/2.1.7/apache-cassandra-2.1.7-bin.tar.gz
tar -xvzf apache-cassandra-2.1.7-bin.tar.gz
wget http://apache.osuosl.org/cassandra/2.0.16/apache-cassandra-2.0.16-bin.tar.gz
tar -xvzf apache-cassandra-2.0.16-bin.tar.gz
wget http://apache.osuosl.org/spark/spark-1.2.2/spark-1.2.2-bin-hadoop2.4.tgz
tar -xvzf spark-1.2.2-bin-hadoop2.4.tgz
sudo rm /home/user1/gsutil/gsutil /usr/bin/gcloud /usr/bin/git-credential-gcloud.sh /usr/bin/bq /usr/bin/gsutil /usr/bin/gcutil
curl https://sdk.cloud.google.com | bash
gcloud auth activate-service-account --key-file cloud-project1.json
gcloud compute --project "cloud-project1" copy-files dev/spark-1.2.2-bin-hadoop2.4/conf/spark-env.sh user1@cloud-instance1:~ --zone "us-central1-f"
gcloud compute --project "cloud-project1" copy-files dev/apache-cassandra-2.1.2/conf/cassandra.yaml user1@cloud-instance1:~ --zone "us-central1-f"
gcloud compute --project "cloud-project1" ssh --zone "us-central1-f" "cloud-instance1"
cd spark-1.2.2-bin-hadoop2.4/
sbin/start-master.sh
tail -f /home/user1/spark-1.2.2-bin-hadoop2.4/sbin/../logs/spark-user1-org.apache.spark.deploy.master.Master-1-cloud-instance1.out
bin/spark-class org.apache.spark.deploy.worker.Worker spark://127.0.0.1:7077 >> worker1.log &
cd apache-cassandra-2.1.7/
bin/cassandra
tail -f logs/system.log
cqlsh> DESCRIBE KEYSPACEs;
system_traces system gtkeyspace
cqlsh> DESCRIBE KEYSPACE gtkeyspace ;
CREATE KEYSPACE gtkeyspace WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true;
cqlsh> use gtkeyspace ;
cqlsh:gtkeyspace> DESCRIBE tables;
export CXXFLAGS="$CXXFLAGS -fPIC"
sudo apt-get install build-essential autoconf automake libtool zlib1g-dev swig ant
sudo apt-get install libstdc++6-4.6-dev libstdc++5 libstdc++-4.8-dev libc++-dev
wget http://download.osgeo.org/gdal/1.10.1/gdal-1.10.1.tar.gz
tar -xvzf gdal-1.10.1.tar.gz
cd gdal-1.10.1/
./configure --prefix=/usr --with-netcdf --with-libtiff --with-sqlite3 --with-geotiff --with-python --with-curl --with-hdf5 \
--with-perl --with-geos --with-spatialite --with-java=/usr/lib/jvm/java-7-openjdk-amd64 \
--with-jvm-lib=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/server/libjvm.so --with-jvm-lib-add-rpath=yes
mkdir -p /home/user1/dev/build/gdal-java/gdal-1.11.2/swig/java
cd .
for i in /usr/lib/ogdi/libgdal.so /usr/lib/libgdal.so.1.17.1 /usr/lib/jni/libgdalconstjni.so /usr/lib/jni/libgdaljni.so /usr/lib/libgdal.a /usr/share/java/gdal.jar; do ln -s $i .; done
cd -
export LD_LIBRARY_PATH=/home/user1/gdal-1.10.1/swig/java
export JAVA_OPTS="-Djava.library.path=$LD_LIBRARY_PATH";
export JAVA_GDAL_DIR=$LD_LIBRARY_PATH
git clone https://github.com/allixender/geotrellis.git
git clone https://github.com/allixender/benchmark.git
export SPARK_HOME=~/spark-1.2.2-bin-hadoop2.4
cd geotrellis
git checkout origin/feature/cassandra-merge-master
./buildall.sh
./publish-local.sh
./sbt "project spark" assembly
cd ..
cd benchmark
git checkout origin/feature/cassandra
./sbt "project geotiff" compile "project spark" compile assembly
cd ..
# Please instead use:
# - ./spark-submit with --driver-class-path to augment the driver classpath
# - spark.executor.extraClassPath to augment the executor classpath
04:07:34 SparkConf: Setting 'spark.executor.extraClassPath' to '/home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar' as a work-around.
04:07:34 SparkConf: Setting 'spark.driver.extraClassPath' to '/home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar' as a work-around.
gsutil cp gs://mybucket/tasmax_day_CCSM4_rcp60_r1i1p1_20060101-20401231.nc .
gsutil cp gs://mybucket/tasmax_day_CCSM4_rcp60_r1i1p1_21010101-21241231.nc .
-rw-rw-r-- 1 user1 user1 2.7G Jun 30 10:43 tasmax_day_CCSM4_rcp60_r1i1p1_20060101-20401231.nc
-rw-rw-r-- 1 user1 user1 1.9G Jun 30 00:26 tasmax_day_CCSM4_rcp60_r1i1p1_21010101-21241231.nc
### INGEST NETCDF
### based on cassandra geotiff ingest, with a script from https://github.com/kyeah/gt-admin/blob/master/ingest-cassandra.sh
spark-1.2.2-bin-hadoop2.4/bin/spark-submit --master local --conf spark.shuffle.consolidateFiles=true \
--class climate.cmd.CassandraIngestCommand \
--jars /home/user1/geotrellis/spark/target/scala-2.10/geotrellis-spark-assembly-0.10.0-SNAPSHOT.jar \
--conf spark.cassandra.connection.host=127.0.0.1 --conf spark.cassandra.connection.rpc.port=9160 \
--conf spark.cassandra.connection.native.port=9042 \
--driver-library-path /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar \
--driver-memory 12G --executor-memory 8G \
/home/user1/benchmark/spark/target/scala-2.10/benchmark-spark-assembly-0.1.0.jar --host 127.0.0.1 --keyspace gtkeyspace --crs EPSG:4326 \
--pyramid true --clobber true --input file:/home/user1/tasmax_day_CCSM4_rcp60_r1i1p1_21010101-21241231.nc \
--layerName tasmaxrcp60ccsm4 --table tasmaxrcp60ccsm4
# from empty cassandra (the netcdf was 1.8 GB)
user1@cloud-instance1:~$ du -sh apache-cassandra-2.1.7/data/*
2.5G apache-cassandra-2.1.7/data/commitlog
5.9G apache-cassandra-2.1.7/data/data
4.0K apache-cassandra-2.1.7/data/saved_caches
# benchmark
# obviously needs two layers :-p goddamnit
spark-1.2.2-bin-hadoop2.4/bin/spark-submit --master local --class climate.cmd.CasBenchmark \
--jars /home/user1/geotrellis/spark/target/scala-2.10/geotrellis-spark-assembly-0.10.0-SNAPSHOT.jar \
--conf spark.cassandra.connection.host=127.0.0.1 --conf spark.cassandra.connection.rpc.port=9160 \
--conf spark.cassandra.connection.native.port=9042 \
--driver-library-path /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar \
/home/user1/benchmark/spark/target/scala-2.10/benchmark-spark-assembly-0.1.0.jar \
--host 127.0.0.1 --keyspace gtkeyspace --layers "tasmaxrcp60ccsm4:3"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment