Skip to content

Instantly share code, notes, and snippets.

@ianblenke
Last active August 29, 2015 14:10
Show Gist options
  • Save ianblenke/a4bcee000533bb988c25 to your computer and use it in GitHub Desktop.
Save ianblenke/a4bcee000533bb988c25 to your computer and use it in GitHub Desktop.
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object Lab1a {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("lab1a")
val sc = new SparkContext(conf)
//
// NOTE: Run Spark Shell first before executing the following commands for Spark example.
val data = 1 to 10000
// create an RDD based on that data
val distData = sc.parallelize(data)
// use a filter to select values less than 10
distData.filter(_ < 10).collect()
distData.filter(_ < 10).collect().foreach(println)
distData.cache
// Try collect method again after calling cache.
distData.filter(_ < 10).collect().foreach(println)
// Simple Spark Apps: WordCount
val f = sc.textFile("README.md")
val wc = f.flatMap(l => l.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
wc.collect()
wc.collect().foreach(println)
// Simple Spark Apps: WordCount
val f = sc.textFile("README.md")
val wc = f.flatMap(l => l.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
wc.collect()
wc.collect().foreach(println)
// Check the app name first.
sc.appName
textFile = sc.textFile("data/README.md")
# Number of items in this RDD
textFile.count()
# First item in this RDD
textFile.first()
linesWithSpark = textFile.filter(lambda line: "Spark" in line)
linesWithSpark.first()
# How many lines contain "Spark"?
textFile.filter(lambda line: "Spark" in line).count()
exit()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment