Skip to content

Instantly share code, notes, and snippets.

@Pyppe
Last active August 29, 2015 14:22
Show Gist options
  • Save Pyppe/59056d26ef6afb269813 to your computer and use it in GitHub Desktop.
Save Pyppe/59056d26ef6afb269813 to your computer and use it in GitHub Desktop.
[info] Running Example
EXTRACTED USERS:
(0,{"id_str":"0","name":"Petra Sievinen","testing":"petra","followers_count":1,"foobar":[1,2,3]})
(2,{"id_str":"2","name":"Retweeter"})
(1,{"testing":"jepjep","id_str":"1","name":"Pyppe"})
(5,{"id_str":"5","name":"Quoter","testing":"aino","followers_count":333,"lang":"en"})
WHOLE TWEET WITH LITE USERS:
{
"created_at" : "Wed Jun 10 20:01:01 +0000 2015",
"id_str" : "608725574924808192",
"text" : "Testing",
"truncated" : false,
"retweet_count" : 0,
"favorite_count" : 0,
"entities" : {
"hashtags" : [ {
"text" : "lovedungeon",
"indices" : [ 57, 70 ]
} ],
"trends" : [ ],
"urls" : [ {
"url" : "https://t.co/abc",
"expanded_url" : "https://twitter.com/diipadaapa/status/1234",
"display_url" : "twitter.com/diipadaapa/sta…",
"indices" : [ 72, 95 ]
} ],
"user_mentions" : [ {
"screen_name" : "foo",
"name" : "Foo bar",
"id" : 102025211,
"id_str" : "102025211",
"indices" : [ 0, 13 ]
} ],
"symbols" : [ ]
},
"favorited" : false,
"retweeted" : false,
"filter_level" : "low",
"lang" : "fi",
"timestamp_ms" : "1433966461293",
"user" : {
"id_str" : "0",
"name" : "Petra Sievinen"
},
"retweeted_status" : {
"hip" : "hei",
"user" : {
"id_str" : "2",
"name" : "Retweeter"
},
"quoted_status" : {
"some" : "property",
"user" : {
"id_str" : "1",
"name" : "Pyppe"
}
}
},
"quoted_status" : {
"id_str" : "4",
"geo" : null,
"favorited" : false,
"retweeted" : false,
"possibly_sensitive" : false,
"filter_level" : "low",
"lang" : "fi",
"user" : {
"id_str" : "5",
"name" : "Quoter"
}
}
}
{
"created_at": "Wed Jun 10 20:01:01 +0000 2015",
"id_str": "608725574924808192",
"text": "Testing",
"truncated": false,
"retweeted_status": {
"hip": "hei",
"user": {
"id_str": "2",
"name": "Retweeter"
},
"quoted_status": {
"some": "property",
"user": {
"testing": "jepjep",
"id_str": "1",
"name": "Pyppe"
}
}
},
"quoted_status": {
"id_str": "4",
"user": {
"id_str": "5",
"name": "Quoter",
"testing": "aino",
"followers_count": 333,
"lang": "en"
},
"geo": null,
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "fi"
},
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "lovedungeon",
"indices": [
57,
70
]
}
],
"trends": [],
"urls": [
{
"url": "https://t.co/abc",
"expanded_url": "https://twitter.com/diipadaapa/status/1234",
"display_url": "twitter.com/diipadaapa/sta…",
"indices": [
72,
95
]
}
],
"user_mentions": [
{
"screen_name": "foo",
"name": "Foo bar",
"id": 102025211,
"id_str": "102025211",
"indices": [
0,
13
]
}
],
"symbols": []
},
"favorited": false,
"retweeted": false,
"filter_level": "low",
"lang": "fi",
"timestamp_ms": "1433966461293",
"user": {
"id_str": "0",
"name": "Petra Sievinen",
"testing": "petra",
"followers_count": 1,
"foobar": [1,2,3]
}
}
object Example extends App {
/**
* Extract users from following paths (from given tweet-JSON):
* __ \ "user"
* __ \ "retweeted_status" \ "user"
* __ \ "retweeted_status" \ "quoted_status" \ "user"
* __ \ "quoted_status" \ "user"
* and replace those where only user-fields `name` and `id_str` remain
*
* @return a tuple of:
* 1. tweet-JSON with "lite" users (only fields `id_str` and `name`)
* 2. Map of full user-JSONs (key id_str)
*/
def extractUsers(js: JsObject, userFieldFilter: (String => Boolean)) = Try {
@tailrec
def extract(tweet: JsObject,
users: Map[String, JsObject],
pathsToCheck: List[JsPath]): (JsObject, Map[String, JsObject]) = {
pathsToCheck match {
case Nil => (tweet, users)
case path :: tail =>
path.asSingleJson(tweet) match {
case user: JsObject =>
val id = (user \ "id_str").as[String]
val liteUser = JsObject(user.fields.filter { kv => userFieldFilter(kv._1)})
val withLiteUser =
tweet.transform(path.json.prune).get.
deepMerge(Json.obj().transform(path.json.put(liteUser)).get)
extract(withLiteUser, users + (id -> user), tail)
case _ =>
extract(tweet, users, tail)
}
}
}
extract(js, Map.empty, List(
(__ \ "user"),
(__ \ "retweeted_status" \ "user"),
(__ \ "retweeted_status" \ "quoted_status" \ "user"),
(__ \ "quoted_status" \ "user")
))
}
val (tweetWithLiteUsers, users) = extractUsers(exampleTweet, Set("id_str", "name")).get
println("EXTRACTED USERS:")
users foreach println
println("WHOLE TWEET WITH LITE USERS:")
println(Json.prettyPrint(tweetWithLiteUsers))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment