Last active
September 1, 2015 13:46
-
-
Save allixender/ccc5831e726f5fc7679d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gcloud compute --project "cloud-project1" ssh --zone "us-central1-f" "cloud-instance1" | |
sudo apt-get update | |
sudo apt-get upgrade | |
sudo apt-get install openjdk-7-jdk git | |
sudo apt-get install libgdal-java libgdal-dev gdal-bin netcdf-bin libnetcdf-dev | |
wget http://mirror.cc.columbia.edu/pub/software/apache/cassandra/2.1.7/apache-cassandra-2.1.7-bin.tar.gz | |
tar -xvzf apache-cassandra-2.1.7-bin.tar.gz | |
wget http://apache.osuosl.org/cassandra/2.0.16/apache-cassandra-2.0.16-bin.tar.gz | |
tar -xvzf apache-cassandra-2.0.16-bin.tar.gz | |
wget http://apache.osuosl.org/spark/spark-1.2.2/spark-1.2.2-bin-hadoop2.4.tgz | |
tar -xvzf spark-1.2.2-bin-hadoop2.4.tgz | |
sudo rm /home/user1/gsutil/gsutil /usr/bin/gcloud /usr/bin/git-credential-gcloud.sh /usr/bin/bq /usr/bin/gsutil /usr/bin/gcutil | |
curl https://sdk.cloud.google.com | bash | |
gcloud auth activate-service-account --key-file cloud-project1.json | |
gcloud compute --project "cloud-project1" copy-files dev/spark-1.2.2-bin-hadoop2.4/conf/spark-env.sh user1@cloud-instance1:~ --zone "us-central1-f" | |
gcloud compute --project "cloud-project1" copy-files dev/apache-cassandra-2.1.2/conf/cassandra.yaml user1@cloud-instance1:~ --zone "us-central1-f" | |
gcloud compute --project "cloud-project1" ssh --zone "us-central1-f" "cloud-instance1" | |
cd spark-1.2.2-bin-hadoop2.4/ | |
sbin/start-master.sh | |
tail -f /home/user1/spark-1.2.2-bin-hadoop2.4/sbin/../logs/spark-user1-org.apache.spark.deploy.master.Master-1-cloud-instance1.out | |
bin/spark-class org.apache.spark.deploy.worker.Worker spark://127.0.0.1:7077 >> worker1.log & | |
cd apache-cassandra-2.1.7/ | |
bin/cassandra | |
tail -f logs/system.log | |
cqlsh> DESCRIBE KEYSPACEs; | |
system_traces system gtkeyspace | |
cqlsh> DESCRIBE KEYSPACE gtkeyspace ; | |
CREATE KEYSPACE gtkeyspace WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'} AND durable_writes = true; | |
cqlsh> use gtkeyspace ; | |
cqlsh:gtkeyspace> DESCRIBE tables; | |
export CXXFLAGS="$CXXFLAGS -fPIC" | |
sudo apt-get install build-essential autoconf automake libtool zlib1g-dev swig ant | |
sudo apt-get install libstdc++6-4.6-dev libstdc++5 libstdc++-4.8-dev libc++-dev | |
wget http://download.osgeo.org/gdal/1.10.1/gdal-1.10.1.tar.gz | |
tar -xvzf gdal-1.10.1.tar.gz | |
cd gdal-1.10.1/ | |
./configure --prefix=/usr --with-netcdf --with-libtiff --with-sqlite3 --with-geotiff --with-python --with-curl --with-hdf5 \ | |
--with-perl --with-geos --with-spatialite --with-java=/usr/lib/jvm/java-7-openjdk-amd64 \ | |
--with-jvm-lib=/usr/lib/jvm/java-7-openjdk-amd64/jre/lib/amd64/server/libjvm.so --with-jvm-lib-add-rpath=yes | |
mkdir -p /home/user1/dev/build/gdal-java/gdal-1.11.2/swig/java | |
cd . | |
for i in /usr/lib/ogdi/libgdal.so /usr/lib/libgdal.so.1.17.1 /usr/lib/jni/libgdalconstjni.so /usr/lib/jni/libgdaljni.so /usr/lib/libgdal.a /usr/share/java/gdal.jar; do ln -s $i .; done | |
cd - | |
export LD_LIBRARY_PATH=/home/user1/gdal-1.10.1/swig/java | |
export JAVA_OPTS="-Djava.library.path=$LD_LIBRARY_PATH"; | |
export JAVA_GDAL_DIR=$LD_LIBRARY_PATH | |
git clone https://github.com/allixender/geotrellis.git | |
git clone https://github.com/allixender/benchmark.git | |
export SPARK_HOME=~/spark-1.2.2-bin-hadoop2.4 | |
cd geotrellis | |
git checkout origin/feature/cassandra-merge-master | |
./buildall.sh | |
./publish-local.sh | |
./sbt "project spark" assembly | |
cd .. | |
cd benchmark | |
git checkout origin/feature/cassandra | |
./sbt "project geotiff" compile "project spark" compile assembly | |
cd .. | |
# Please instead use: | |
# - ./spark-submit with --driver-class-path to augment the driver classpath | |
# - spark.executor.extraClassPath to augment the executor classpath | |
04:07:34 SparkConf: Setting 'spark.executor.extraClassPath' to '/home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar' as a work-around. | |
04:07:34 SparkConf: Setting 'spark.driver.extraClassPath' to '/home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar' as a work-around. | |
gsutil cp gs://mybucket/tasmax_day_CCSM4_rcp60_r1i1p1_20060101-20401231.nc . | |
gsutil cp gs://mybucket/tasmax_day_CCSM4_rcp60_r1i1p1_21010101-21241231.nc . | |
-rw-rw-r-- 1 user1 user1 2.7G Jun 30 10:43 tasmax_day_CCSM4_rcp60_r1i1p1_20060101-20401231.nc | |
-rw-rw-r-- 1 user1 user1 1.9G Jun 30 00:26 tasmax_day_CCSM4_rcp60_r1i1p1_21010101-21241231.nc | |
### INGEST NETCDF | |
### based on cassandra geotiff ingest, with a script from https://github.com/kyeah/gt-admin/blob/master/ingest-cassandra.sh | |
spark-1.2.2-bin-hadoop2.4/bin/spark-submit --master local --conf spark.shuffle.consolidateFiles=true \ | |
--class climate.cmd.CassandraIngestCommand \ | |
--jars /home/user1/geotrellis/spark/target/scala-2.10/geotrellis-spark-assembly-0.10.0-SNAPSHOT.jar \ | |
--conf spark.cassandra.connection.host=127.0.0.1 --conf spark.cassandra.connection.rpc.port=9160 \ | |
--conf spark.cassandra.connection.native.port=9042 \ | |
--driver-library-path /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar \ | |
--driver-memory 12G --executor-memory 8G \ | |
/home/user1/benchmark/spark/target/scala-2.10/benchmark-spark-assembly-0.1.0.jar --host 127.0.0.1 --keyspace gtkeyspace --crs EPSG:4326 \ | |
--pyramid true --clobber true --input file:/home/user1/tasmax_day_CCSM4_rcp60_r1i1p1_21010101-21241231.nc \ | |
--layerName tasmaxrcp60ccsm4 --table tasmaxrcp60ccsm4 | |
# from empty cassandra (the netcdf was 1.8 GB) | |
user1@cloud-instance1:~$ du -sh apache-cassandra-2.1.7/data/* | |
2.5G apache-cassandra-2.1.7/data/commitlog | |
5.9G apache-cassandra-2.1.7/data/data | |
4.0K apache-cassandra-2.1.7/data/saved_caches | |
# benchmark | |
# obviously needs two layers :-p goddamnit | |
spark-1.2.2-bin-hadoop2.4/bin/spark-submit --master local --class climate.cmd.CasBenchmark \ | |
--jars /home/user1/geotrellis/spark/target/scala-2.10/geotrellis-spark-assembly-0.10.0-SNAPSHOT.jar \ | |
--conf spark.cassandra.connection.host=127.0.0.1 --conf spark.cassandra.connection.rpc.port=9160 \ | |
--conf spark.cassandra.connection.native.port=9042 \ | |
--driver-library-path /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java /home/user1/gdal-1.10.1/swig/java:/home/user1/gdal-1.10.1/swig/java/gdal.jar \ | |
/home/user1/benchmark/spark/target/scala-2.10/benchmark-spark-assembly-0.1.0.jar \ | |
--host 127.0.0.1 --keyspace gtkeyspace --layers "tasmaxrcp60ccsm4:3" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment