Skip to content

Instantly share code, notes, and snippets.

val path = "/public/retail_db" or val path = "/Users/itversity/Research/data/retail_db"
val orders201312 = sc.textFile(path + "/orders").
filter(order => order.split(",")(1).contains("2013-12")).
map(order => (order.split(",")(0).toInt, order.split(",")(1)))
val orderItems = sc.textFile(path + "/order_items").
map(rec => (rec.split(",")(1).toInt, rec.split(",")(2).toInt))
val distinctProducts201312 = orders201312.
val path = "/public/retail_db" or val path = "/Users/itversity/Research/data/retail_db"
val orders = sc.textFile(path + "/orders").
map(rec => (rec.split(",")(0).toInt, rec))
val orderItems = sc.textFile(path + "/order_items").
map(rec => (rec.split(",")(1).toInt, rec))
val ordersJoin = orders.join(orderItems)
ordersJoin.take(10).foreach(println)
val path = "/Users/itversity/Research/data/retail_db" or val path = "/public/retail_db"
val orderItems = sc.textFile(path + "/order_items").
map(orderItem => (orderItem.split(",")(1).toInt, orderItem.split(",")(4).toFloat))
// Compute revenue for each order
orderItems.
reduceByKey((total, orderItemSubtotal) => total + orderItemSubtotal).
take(100).
foreach(println)
val path = "/Users/itversity/Research/data/retail_db" or val path = "/public/retail_db"
val orders = sc.textFile(path + "/orders")
// orders sorted by status
orders.
map(order => {
val o = order.split(",")
(o(3), order)
}).
import scala.io.Source
val fileName = "/Users/itversity/Research/data/elections/ls2014.tsv"
val results = Source.fromFile(fileName).getLines
// results is now a collection of type Iterator[String]
import scala.io.Source
val fileName = "/Users/itversity/Research/data/elections/ls2014.tsv"
val results = Source.fromFile(fileName).getLines
val notas = results.filter(rec => rec.split("\t")(2) == "None of the Above")
// val notas = results.partition(rec => rec.split("\t")(2) == "None of the Above")._1
val notaByState = notas.
map(rec => (rec.split("\t")(0), rec.split("\t")(10).toInt)).
toList.
val path = "/public/retail_db"
val products = sc.textFile(path + "/products")
val minPricedProductsByCategory = products.
filter(product => product.split(",")(4) != "").
map(product => {
val p = product.split(",")
(p(1).toInt, product)
}).
reduceByKey((agg, product) => {
val path = "/Users/itversity/Research/data/retail_db" or val path = "/public/retail_db"
val products = sc.textFile(path + "/products")
val productsGroupByCategory = products.
filter(product => product.split(",")(4) != "").
map(product => {
val p = product.split(",")
(p(1).toInt, product)
}).
package wordcount
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.hadoop.fs._
/**
* Created by itversity on 31/05/17.
*/
object WordCount {
def main(args: Array[String]) = {
name := "sparkdemo"
version := "1.0"
scalaVersion := "2.10.5"
libraryDependencies += "org.apache.spark" % "spark-core_2.10" % "1.6.3"
libraryDependencies += "com.typesafe" % "config" % "1.3.0"