Skip to content

Instantly share code, notes, and snippets.

val m = Map(
(1, Order(1, "2017-01-01", 100, "COMPLETE")),
(2, Order(2, "2017-01-01", 20, "CLOSED"))
)
//as m is immutable we cannot use +=, ++=, -= and --=
m + ((3, Order(3, "2017-01-01", 301, "PENDING")))
m + (3 -> Order(3, "2017-01-01", 301, "PENDING"))
//let us define m as mutable Map
val os = Map(
(1, Order(1, "2017-01-01", 100, "COMPLETE")),
(2, Order(2, "2017-01-01", 20, "CLOSED")),
(3, Order(3, "2017-01-01", 301, "PENDING")),
(4, Order(4, "2017-01-01", 202, "CLOSED")),
(5, Order(5, "2017-01-01", 3013, "COMPLETE"))
)
m(1)
m.get(1).get
val t = List((1, Set(
Order(1, "2017-01-01", 100, "COMPLETE"),
Order(2, "2017-01-01", 20, "CLOSED"),
Order(3, "2017-01-01", 301, "PENDING"),
Order(4, "2017-01-01", 202, "CLOSED"),
Order(5, "2017-01-01", 3013, "COMPLETE"),
Order(6, "2017-01-01", 203, "PENDING"),
Order(7, "2017-01-01", 3014, "COMPLETE"),
Order(8, "2017-01-01", 20, "NEW"),
Order(9, "2017-01-01", 301, "PENDING"),
name := "jdbcdemo"
version := "1.0"
scalaVersion := "2.11.8"
object hw {
def main(args: Array[String]) {
println("Hello " + args(0))
}
}
val data = 1 to 1000000
val dataRDD = sc.parallelize(data)
dataRDD.reduce((acc, value) => acc + value)
import org.apache.spark.{SparkConf,SparkContext}
val conf = new SparkConf().setAppName("Spark Demo").setMaster("local")
val sc = new SparkContext(conf)
val path = "/public/retail_db" or val path = "/Users/itversity/Research/data/retail_db"
val rdd = sc.textFile(path + "/orders")
rdd.first
rdd.take(10)
rdd.collect
rdd.take(10).foreach(println)
rdd.take(10).foreach(k => println(k.split(",")(0) + "\t" + k.split(",")(1)))
val path = "/public/retail_db" or val path = "/Users/itversity/Research/data/retail_db"
val rdd = sc.textFile(path + "/orders")
rdd.reduce((agg, ele) => {
if(agg.split(",")(2).toInt < ele.split(",")(2).toInt) agg else ele
})
rdd.top(2)
rdd.takeOrdered(5)(Ordering[Int].reverse.on(x => x.split(",")(2).toInt)).foreach(println)
val orders = sc.textFile("/public/retail_db/orders") // On the lab accessing HDFS
val orders = sc.textFile("/Users/itversity/Research/data/retail_db/orders") // Accessing locally on the PC
// Change to valid path as per your preference. Make sure the directory orders exist in the path (locally or on HDFS)
orders.take(10).foreach(println)
val completedOrders = orders.filter(rec => rec.split(",")(3) == "COMPLETE")
val pendingOrders = orders.
filter(order => {
val o = order.split(",")
(o(3).contains("PENDING") || o(3) == "PROCESSING") && o(1).contains("2013-08")
})