I hereby claim:
- I am shivaram on github.
- I am shivaram (https://keybase.io/shivaram) on keybase.
- I have a public key ASAcRXjNGcE3Wivd9PLqf-4EpoDcjMUuhxSEANR88silxQo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
# If you are using Spark 1.4, then launch SparkR with the command | |
# | |
# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3 | |
# as the `sparkPackages=` flag was only added in Spark 1.4.1. | |
# # This will work in Spark 1.4.1. | |
sc <- sparkR.init(spark_link, sparkPackages = "com.databricks:spark-csv_2.10:1.0.3") | |
sqlContext <- sparkRSQL.init(sc) | |
flights <- read.df(sqlContext, "s3n://sparkr-data/nycflights13.csv","com.databricks.spark.csv", header="true") |
Sys.setenv(SPARK_HOME="/Users/shivaram/spark-1.4.1") | |
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths())) | |
library(SparkR) | |
sc <- sparkR.init(master="local") | |
sqlContext <- sparkRSQL.init(sc) | |
df <- createDataFrame(sqlContext, faithful) | |
# Select one column | |
head(select(df, df$eruptions)) |
# Download Spark 1.4 from http://spark.apache.org/downloads.html | |
# | |
# Download the nyc flights dataset as a CSV from https://s3-us-west-2.amazonaws.com/sparkr-data/nycflights13.csv | |
# Launch SparkR using | |
# ./bin/sparkR --packages com.databricks:spark-csv_2.10:1.0.3 | |
# The SparkSQL context should already be created for you as sqlContext | |
sqlContext | |
# Java ref type org.apache.spark.sql.SQLContext id 1 |
#!/bin/bash | |
FWDIR="$(cd "`dirname "$0"`"; pwd)" | |
pushd $FWDIR >/dev/null | |
echo -n "JIRA Password:"; | |
read -s password | |
echo "" |
#!/bin/bash | |
# Shows Spark PR diff using cdiff https://github.com/ymattw/cdiff | |
if [[ $# -ne 1 ]]; | |
then | |
echo "Usage spark-diff <pr_num>" | |
exit 1 | |
fi | |
command -v cdiff >/dev/null 2>&1 || { echo >&2 "Install cdiff using pip."; exit 1; } |
package org.apache.spark.scheduler | |
import scala.util.Random | |
import org.apache.spark.storage.BlockManagerId | |
import org.apache.spark.util.collection.{Utils => CollectionUtils} | |
object TopkBench extends testing.Benchmark { | |
val toTake = sys.props("toTake").toInt | |
val numHosts = 1000 |
wget http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm
sudo yum --nogpgcheck localinstall cloudera-cdh-5-0.x86_64.rpm
sudo yum clean all
sudo yum install hadoop-hdfs-namenode
sudo yum install R git
sudo yum install spark-core spark-master spark-python
cd
[info] ReplSuite: | |
[info] - simple foreach with accumulator | |
[info] - external vars | |
[warn] /home/shivaram/projects/spark/core/src/test/scala/spark/FileServerSuite.scala:49: method toURL in class File is deprecated: see corresponding Javadoc for more information. | |
[warn] val partitionSumsWithSplit = nums.mapPartitionsWithSplit { | |
[warn] ^ | |
[info] - external classes | |
[info] - external functions | |
[warn] Note: /home/shivaram/projects/spark/streaming/src/test/java/spark/streaming/JavaAPISuite.java uses unchecked or unsafe operations. | |
[warn] Note: Recompile with -Xlint:unchecked for details. |