ianblenke · August 29, 2015 14:10
diff --git a/lab1a.scala b/lab1a.scala
 import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 import org.apache.spark.SparkConf

 object Lab1a {
  def main(args: Array[String]) {

    val conf = new SparkConf().setAppName("lab1a")
    val sc = new SparkContext(conf)

    //
    // NOTE: Run Spark Shell first before executing the following commands for Spark example.

    val data = 1 to 10000
    // create an RDD based on that data
    val distData = sc.parallelize(data)
    // use a filter to select values less than 10
    distData.filter(_ < 10).collect()
    distData.filter(_ < 10).collect().foreach(println)
    distData.cache
    // Try collect method again after calling cache.
    distData.filter(_ < 10).collect().foreach(println)

    // Simple Spark Apps: WordCount
    val f = sc.textFile("README.md")
    val wc = f.flatMap(l => l.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
    wc.collect()
    wc.collect().foreach(println)
  
  // Simple Spark Apps: WordCount
    val f = sc.textFile("README.md")
    val wc = f.flatMap(l => l.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
    wc.collect()
    wc.collect().foreach(println)

    // Check the app name first.
    sc.appName
    textFile = sc.textFile("data/README.md")
    # Number of items in this RDD
    textFile.count()
    # First item in this RDD
    textFile.first()
    linesWithSpark = textFile.filter(lambda line: "Spark" in line)
    linesWithSpark.first()
    # How many lines contain "Spark"?

    textFile.filter(lambda line: "Spark" in line).count()
    exit()
  }
 }
	import org.apache.spark.SparkContext
	import org.apache.spark.SparkContext._
	import org.apache.spark.SparkConf

	object Lab1a {
	def main(args: Array[String]) {

	val conf = new SparkConf().setAppName("lab1a")
	val sc = new SparkContext(conf)

	//
	// NOTE: Run Spark Shell first before executing the following commands for Spark example.

	val data = 1 to 10000
	// create an RDD based on that data
	val distData = sc.parallelize(data)
	// use a filter to select values less than 10
	distData.filter(_ < 10).collect()
	distData.filter(_ < 10).collect().foreach(println)
	distData.cache
	// Try collect method again after calling cache.
	distData.filter(_ < 10).collect().foreach(println)

	// Simple Spark Apps: WordCount
	val f = sc.textFile("README.md")
	val wc = f.flatMap(l => l.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
	wc.collect()
	wc.collect().foreach(println)

	// Simple Spark Apps: WordCount
	val f = sc.textFile("README.md")
	val wc = f.flatMap(l => l.split(" ")).map(word => (word, 1)).reduceByKey(_ + _)
	wc.collect()
	wc.collect().foreach(println)

	// Check the app name first.
	sc.appName
	textFile = sc.textFile("data/README.md")
	# Number of items in this RDD
	textFile.count()
	# First item in this RDD
	textFile.first()
	linesWithSpark = textFile.filter(lambda line: "Spark" in line)
	linesWithSpark.first()
	# How many lines contain "Spark"?

	textFile.filter(lambda line: "Spark" in line).count()
	exit()
	}
	}