Last active
August 29, 2015 14:05
-
-
Save helxsz/8a4882ed949e9a30c860 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"isActive": true, | |
"balance": "$2,504.16", | |
"age": 31, | |
"eyeColor": "green", | |
"name": "Mendez Gilmore", | |
"gender": "male", | |
"registered": "2014-07-20T04:47:48 -01:00", | |
"latitude": 79.452285, | |
"longitude": -163.151555 | |
}, | |
{ | |
"isActive": true, | |
"balance": "$3,791.11", | |
"age": 20, | |
"eyeColor": "blue", | |
"name": "Lynn Santana", | |
"gender": "male", | |
"registered": "2014-06-22T23:00:44 -01:00", | |
"latitude": -3.60444, | |
"longitude": -147.956637 | |
}, | |
{ | |
"isActive": true, | |
"balance": "$2,450.66", | |
"age": 31, | |
"eyeColor": "blue", | |
"name": "Kelley Shaffer", | |
"gender": "female", | |
"registered": "2014-05-17T08:36:55 -01:00", | |
"latitude": 29.1312, | |
"longitude": -36.645241 | |
}, | |
{ | |
"isActive": false, | |
"balance": "$1,794.35", | |
"age": 23, | |
"eyeColor": "brown", | |
"name": "Gardner Lamb", | |
"gender": "male", | |
"registered": "2014-03-21T09:29:33 -00:00", | |
"latitude": 18.80767, | |
"longitude": 153.42069 | |
}, | |
{ | |
"isActive": false, | |
"balance": "$1,913.56", | |
"age": 22, | |
"eyeColor": "green", | |
"name": "Lambert Daugherty", | |
"gender": "male", | |
"registered": "2014-06-12T12:22:10 -01:00", | |
"latitude": -48.750177, | |
"longitude": -172.932591 | |
}, | |
{ | |
"isActive": true, | |
"balance": "$3,338.16", | |
"age": 32, | |
"eyeColor": "green", | |
"name": "Pickett Bradshaw", | |
"gender": "male", | |
"registered": "2014-04-14T09:02:57 -01:00", | |
"latitude": -19.16996, | |
"longitude": 121.900084 | |
}, | |
{ | |
"isActive": false, | |
"balance": "$1,613.38", | |
"age": 24, | |
"eyeColor": "green", | |
"name": "Blair Mcconnell", | |
"gender": "male", | |
"registered": "2014-03-24T05:34:30 -00:00", | |
"latitude": -5.684265, | |
"longitude": -104.292739 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 | 1 | 1 | 1 | |
---|---|---|---|---|
2 | 2 | 2 | 2 | |
3 | 3 | 3 | 3 | |
4 | 4 | 4 | 4 | |
5 | 5 | 5 | 5 | |
6 | 6 | 6 | 6 | |
7 | 7 | 7 | 7 | |
8 | 8 | 8 | 8 | |
9 | 9 | 9 | 9 | |
10 | 10 | 10 | 10 | |
11 | 11 | 11 | 11 | |
12 | 12 | 12 | 12 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"facebook": { | |
"application": "Coupons", | |
"author": { | |
"avatar": "https://graph.facebook.com/100004343800786/picture", | |
"id": "100004343800786", | |
"link": "http://www.facebook.com/profile.php?id=100004343800786", | |
"name": "Flint Beastwood", | |
"type": "user" | |
}, | |
"caption": "woobox.com", | |
"created_at": "Fri, 01 Aug 2014 09:36:31 +0000", | |
"description": "I've got my FREE Steam key for GTR Evolution from Bundle Stars and PC Gamer. Go get yours at http://www.pcgamer.com!", | |
"id": "100004343800786_341086752712782", | |
"is_share": false, | |
"link": "http://woobox.com/328djz", | |
"name": "FREE GTR Evolution Steam key from Bundle Stars and PC Gamer", | |
"picture": "https://fbexternal-a.akamaihd.net/app_full_proxy.php?app=174961479209942&v=1&size=z&cksum=fb17e9efd3aa14b116341c354f32f4f6&src=http%3A%2F%2Fwoobox.com%2Foffers%2Fshareimage%2F328djz%3F53ce7b0149ef2-Week5-PCGamer-Campaign.jpg", | |
"source": "Coupons (174961479209942)", | |
"type": "link" | |
}, | |
"interaction": { | |
"author": { | |
"avatar": "https://graph.facebook.com/100004343800786/picture", | |
"id": "100004343800786", | |
"link": "http://www.facebook.com/profile.php?id=100004343800786", | |
"name": "Flint Beastwood", | |
"type": "user" | |
}, | |
"content": "I've got my FREE Steam key for GTR Evolution from Bundle Stars and PC Gamer. Go get yours at http://www.pcgamer.com!", | |
"created_at": "Fri, 01 Aug 2014 09:36:31 +0000", | |
"id": "1e4195f51a03a980e0666e90d199ebba", | |
"link": "http://www.facebook.com/100004343800786_341086752712782", | |
"received_at": 1406885879.6349001, | |
"schema": { | |
"version": 3 | |
}, | |
"source": "Coupons (174961479209942)", | |
"subtype": "link", | |
"title": "FREE GTR Evolution Steam key from Bundle Stars and PC Gamer", | |
"type": "facebook" | |
}, | |
"salience": { | |
"content": { | |
"sentiment": 0 | |
}, | |
"title": { | |
"sentiment": 0 | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://www.json-generator.com | |
// lab 1 | |
val rdd = sc.textFile("/root/project/exampleCsv.csv") | |
val lines = rdd.map(line => {val array = line.split(",");(array(0).toInt,array(1).toInt+array(3).toInt)}) | |
lines.collect().foreach(print) | |
// lab 2 | |
val lines1 = rdd.map(line => {val array = line.split(",");(array(0).toInt, ( array(1).toInt,array(3).toInt) )}) | |
lines.mapValues(v => (v._1*2, v._2*3)).collect().foreach(print) | |
// lab 2.2 | |
case class Record(v1:Double, v2:Double, v3:Double) | |
val data = sc.textFile("/root/data/kmeans_data.txt") | |
// 1 | |
val person = data.map(line =>{val items = line.split(" ");Record(items(0).toDouble,items(1).toDouble,items(2).toDouble)}).collect().foreach(println) | |
// 2 | |
val person = data.map(line =>{line.split(" ") match{ case Array(v1, v2, v3) => Record(v1.toDouble, v2.toDouble, v3.toDouble) }}).collect().foreach(println) | |
// 3 | |
val person = data.map(_.split(" ") match{ case Array(v1, v2, v3) => Record(v1.toDouble, v2.toDouble, v3.toDouble) }).collect().foreach(println) | |
// 4 | |
val sqlContext = new org.apache.spark.sql.SQLContext(sc) | |
import sqlContext.createSchemaRDD | |
val person = data.map(_.split(" ") match{ case Array(v1, v2, v3) => Record(v1.toDouble, v2.toDouble, v3.toDouble) }) | |
person.registerAsTable("person") | |
val teens = sqlContext.sql("SELECT v1, v2, v3 from person where v2 > 1") | |
// 4.1 | |
teens.collect().foreach(println) | |
// 4.2 | |
import org.apache.spark.mllib.linalg.{Vector, Vectors} | |
import org.apache.spark.mllib.regression.LabeledPoint | |
// error : value v1 is not a memeber of org.apache.spark.sql.Row | |
val training = teens.map(record => { val features = Vectors.dense(record.v1, record.v2, record.v3); val label = record.v1; LabeledPoint(label, features)} ) | |
// error : type mismatch | |
val training = teens.map(row=> { val features = Vectors.dense( row(0), row(1), row(2)); val label = row(0); LabeledPoint(label, features)} ) | |
// solution | |
val training = teens.map(row=> { val features = Vectors.dense( row.getDouble(0), row.getDouble(1), row.getDouble(2)); val label = row.getDouble(0); LabeledPoint(label, features)} ) | |
training.collect().foreach(println) | |
// lab 3 | |
val b= sc.parallelize( List(1,2,3,4,5,6,7,8,9), 3) | |
b.foreachPartition(iter => iter.foreach(value => println("value :"+value))) | |
// lab | |
val a = sc.parallelize(Seq((1,1),(1,1),(1,3),(3,5),(3,5),(3,7)),2) | |
a.distinct().mapValues(v=>v*2).collect().foreach(println) | |
// https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala | |
import org.json4s.jackson.JsonMethods | |
import org.json4s.jackson.JsonMethods._ | |
import org.json4s.JsonAST._ | |
import org.json4s.DefaultFormats | |
// problem lab 4 | |
val a = sc.textFile("/root/project/a.json") | |
a.map(line => parse(line)).map(json => (json \ "gender", (json \ "name",json \ "age")).collect().foreach(println) | |
// problem lab 5 | |
val a= sc.parallelize("""{"name":"Yin","age":25,"gender":"male"},{"name":"Kin","age":23,"gender":"male"},{"name":"xin","age":29,"gender":"female"},""" :: Nil) | |
a.map(line => parse(line)).map(json => (json \ "gender", (json \ "name",json \ "age")).collect().foreach(println) | |
// problem lab 6 | |
val lines = sc.textFile("/root/data/facebook.json") | |
lines.map(line => {parse(line)}).filter(json => { (json \ "facebook" \ "application").extract[String] == "Coupons" }).collect().foreach(println) | |
// https://gist.github.com/cotdp/fda64b4248e43a3c8f46 | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task not serializable: java.io.NotSerializableException: org.json4s.DefaultFormats$ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment