Last active
August 29, 2015 14:22
-
-
Save Pyppe/59056d26ef6afb269813 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[info] Running Example | |
EXTRACTED USERS: | |
(0,{"id_str":"0","name":"Petra Sievinen","testing":"petra","followers_count":1,"foobar":[1,2,3]}) | |
(2,{"id_str":"2","name":"Retweeter"}) | |
(1,{"testing":"jepjep","id_str":"1","name":"Pyppe"}) | |
(5,{"id_str":"5","name":"Quoter","testing":"aino","followers_count":333,"lang":"en"}) | |
WHOLE TWEET WITH LITE USERS: | |
{ | |
"created_at" : "Wed Jun 10 20:01:01 +0000 2015", | |
"id_str" : "608725574924808192", | |
"text" : "Testing", | |
"truncated" : false, | |
"retweet_count" : 0, | |
"favorite_count" : 0, | |
"entities" : { | |
"hashtags" : [ { | |
"text" : "lovedungeon", | |
"indices" : [ 57, 70 ] | |
} ], | |
"trends" : [ ], | |
"urls" : [ { | |
"url" : "https://t.co/abc", | |
"expanded_url" : "https://twitter.com/diipadaapa/status/1234", | |
"display_url" : "twitter.com/diipadaapa/sta…", | |
"indices" : [ 72, 95 ] | |
} ], | |
"user_mentions" : [ { | |
"screen_name" : "foo", | |
"name" : "Foo bar", | |
"id" : 102025211, | |
"id_str" : "102025211", | |
"indices" : [ 0, 13 ] | |
} ], | |
"symbols" : [ ] | |
}, | |
"favorited" : false, | |
"retweeted" : false, | |
"filter_level" : "low", | |
"lang" : "fi", | |
"timestamp_ms" : "1433966461293", | |
"user" : { | |
"id_str" : "0", | |
"name" : "Petra Sievinen" | |
}, | |
"retweeted_status" : { | |
"hip" : "hei", | |
"user" : { | |
"id_str" : "2", | |
"name" : "Retweeter" | |
}, | |
"quoted_status" : { | |
"some" : "property", | |
"user" : { | |
"id_str" : "1", | |
"name" : "Pyppe" | |
} | |
} | |
}, | |
"quoted_status" : { | |
"id_str" : "4", | |
"geo" : null, | |
"favorited" : false, | |
"retweeted" : false, | |
"possibly_sensitive" : false, | |
"filter_level" : "low", | |
"lang" : "fi", | |
"user" : { | |
"id_str" : "5", | |
"name" : "Quoter" | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"created_at": "Wed Jun 10 20:01:01 +0000 2015", | |
"id_str": "608725574924808192", | |
"text": "Testing", | |
"truncated": false, | |
"retweeted_status": { | |
"hip": "hei", | |
"user": { | |
"id_str": "2", | |
"name": "Retweeter" | |
}, | |
"quoted_status": { | |
"some": "property", | |
"user": { | |
"testing": "jepjep", | |
"id_str": "1", | |
"name": "Pyppe" | |
} | |
} | |
}, | |
"quoted_status": { | |
"id_str": "4", | |
"user": { | |
"id_str": "5", | |
"name": "Quoter", | |
"testing": "aino", | |
"followers_count": 333, | |
"lang": "en" | |
}, | |
"geo": null, | |
"favorited": false, | |
"retweeted": false, | |
"possibly_sensitive": false, | |
"filter_level": "low", | |
"lang": "fi" | |
}, | |
"retweet_count": 0, | |
"favorite_count": 0, | |
"entities": { | |
"hashtags": [ | |
{ | |
"text": "lovedungeon", | |
"indices": [ | |
57, | |
70 | |
] | |
} | |
], | |
"trends": [], | |
"urls": [ | |
{ | |
"url": "https://t.co/abc", | |
"expanded_url": "https://twitter.com/diipadaapa/status/1234", | |
"display_url": "twitter.com/diipadaapa/sta…", | |
"indices": [ | |
72, | |
95 | |
] | |
} | |
], | |
"user_mentions": [ | |
{ | |
"screen_name": "foo", | |
"name": "Foo bar", | |
"id": 102025211, | |
"id_str": "102025211", | |
"indices": [ | |
0, | |
13 | |
] | |
} | |
], | |
"symbols": [] | |
}, | |
"favorited": false, | |
"retweeted": false, | |
"filter_level": "low", | |
"lang": "fi", | |
"timestamp_ms": "1433966461293", | |
"user": { | |
"id_str": "0", | |
"name": "Petra Sievinen", | |
"testing": "petra", | |
"followers_count": 1, | |
"foobar": [1,2,3] | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object Example extends App { | |
/** | |
* Extract users from following paths (from given tweet-JSON): | |
* __ \ "user" | |
* __ \ "retweeted_status" \ "user" | |
* __ \ "retweeted_status" \ "quoted_status" \ "user" | |
* __ \ "quoted_status" \ "user" | |
* and replace those where only user-fields `name` and `id_str` remain | |
* | |
* @return a tuple of: | |
* 1. tweet-JSON with "lite" users (only fields `id_str` and `name`) | |
* 2. Map of full user-JSONs (key id_str) | |
*/ | |
def extractUsers(js: JsObject, userFieldFilter: (String => Boolean)) = Try { | |
@tailrec | |
def extract(tweet: JsObject, | |
users: Map[String, JsObject], | |
pathsToCheck: List[JsPath]): (JsObject, Map[String, JsObject]) = { | |
pathsToCheck match { | |
case Nil => (tweet, users) | |
case path :: tail => | |
path.asSingleJson(tweet) match { | |
case user: JsObject => | |
val id = (user \ "id_str").as[String] | |
val liteUser = JsObject(user.fields.filter { kv => userFieldFilter(kv._1)}) | |
val withLiteUser = | |
tweet.transform(path.json.prune).get. | |
deepMerge(Json.obj().transform(path.json.put(liteUser)).get) | |
extract(withLiteUser, users + (id -> user), tail) | |
case _ => | |
extract(tweet, users, tail) | |
} | |
} | |
} | |
extract(js, Map.empty, List( | |
(__ \ "user"), | |
(__ \ "retweeted_status" \ "user"), | |
(__ \ "retweeted_status" \ "quoted_status" \ "user"), | |
(__ \ "quoted_status" \ "user") | |
)) | |
} | |
val (tweetWithLiteUsers, users) = extractUsers(exampleTweet, Set("id_str", "name")).get | |
println("EXTRACTED USERS:") | |
users foreach println | |
println("WHOLE TWEET WITH LITE USERS:") | |
println(Json.prettyPrint(tweetWithLiteUsers)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment