wget http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm
sudo yum --nogpgcheck localinstall cloudera-cdh-5-0.x86_64.rpm
sudo yum clean all
sudo yum install hadoop-hdfs-namenode
sudo yum install R git
sudo yum install spark-core spark-master spark-python
cd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
for i in `cat /root/mesos-ec2/slaves`; | |
do | |
ssh $i 'echo -n `hostname`" "; nslookup `hostname` | grep Address | grep 10'; | |
done | awk '{print $1" "$3}' > /tmp/internal | |
for i in `cat /root/mesos-ec2/slaves`; | |
do | |
ssh $i "echo -n $i' '; nslookup $i | grep Address | grep 10"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[info] ReplSuite: | |
[info] - simple foreach with accumulator | |
[info] - external vars | |
[warn] /home/shivaram/projects/spark/core/src/test/scala/spark/FileServerSuite.scala:49: method toURL in class File is deprecated: see corresponding Javadoc for more information. | |
[warn] val partitionSumsWithSplit = nums.mapPartitionsWithSplit { | |
[warn] ^ | |
[info] - external classes | |
[info] - external functions | |
[warn] Note: /home/shivaram/projects/spark/streaming/src/test/java/spark/streaming/JavaAPISuite.java uses unchecked or unsafe operations. | |
[warn] Note: Recompile with -Xlint:unchecked for details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.apache.spark.scheduler | |
import scala.util.Random | |
import org.apache.spark.storage.BlockManagerId | |
import org.apache.spark.util.collection.{Utils => CollectionUtils} | |
object TopkBench extends testing.Benchmark { | |
val toTake = sys.props("toTake").toInt | |
val numHosts = 1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Shows Spark PR diff using cdiff https://github.com/ymattw/cdiff | |
if [[ $# -ne 1 ]]; | |
then | |
echo "Usage spark-diff <pr_num>" | |
exit 1 | |
fi | |
command -v cdiff >/dev/null 2>&1 || { echo >&2 "Install cdiff using pip."; exit 1; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
FWDIR="$(cd "`dirname "$0"`"; pwd)" | |
pushd $FWDIR >/dev/null | |
echo -n "JIRA Password:"; | |
read -s password | |
echo "" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download Spark 1.4 from http://spark.apache.org/downloads.html | |
# | |
# Download the nyc flights dataset as a CSV from https://s3-us-west-2.amazonaws.com/sparkr-data/nycflights13.csv | |
# Launch SparkR using | |
# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3 | |
# The SparkSQL context should already be created for you as sqlContext | |
sqlContext | |
# Java ref type org.apache.spark.sql.SQLContext id 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sys.setenv(SPARK_HOME="/Users/shivaram/spark-1.4.1") | |
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths())) | |
library(SparkR) | |
sc <- sparkR.init(master="local") | |
sqlContext <- sparkRSQL.init(sc) | |
df <- createDataFrame(sqlContext, faithful) | |
# Select one column | |
head(select(df, df$eruptions)) |
OlderNewer