Last active
October 4, 2015 01:28
-
-
Save joewiz/048489ac436966b17b40 to your computer and use it in GitHub Desktop.
Parse and transform tweets using XQuery 3.1's JSON support
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.1"; | |
(: parse tweets using XQuery 3.1's JSON support | |
: see http://www.w3.org/TR/xpath-functions-31/#json | |
: sample JSON from https://dev.twitter.com/rest/reference/get/statuses/user_timeline | |
:) | |
let $json := json-doc('/db/user_timeline.json') | |
let $tweets := $json?* | |
return | |
<tweets>{ | |
for $tweet in $tweets | |
let $id := $tweet?id_str | |
let $created := $tweet?created_at | |
let $text := $tweet?text | |
let $user-id := $tweet?user?screen_name | |
let $url := concat('https://twitter.com/', $user-id, '/statuses/', $id) | |
return | |
<tweet> | |
<id>{$id}</id> | |
<created>{$created}</created> | |
<text>{$text}</text> | |
<user-id>{$user-id}</user-id> | |
<url>{$url}</url> | |
</tweet> | |
}</tweets> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- results of parse-tweets.xq when using sample JSON from https://dev.twitter.com/rest/reference/get/statuses/user_timeline --> | |
<tweets> | |
<tweet> | |
<id>240859602684612608</id> | |
<created>Wed Aug 29 17:12:58 +0000 2012</created> | |
<text>Introducing the Twitter Certified Products Program: https://t.co/MjJ8xAnT</text> | |
<user-id>twitterapi</user-id> | |
<url>https://twitter.com/twitterapi/statuses/240859602684612608</url> | |
</tweet> | |
<tweet> | |
<id>239413543487819778</id> | |
<created>Sat Aug 25 17:26:51 +0000 2012</created> | |
<text>We are working to resolve issues with application management & logging in to the dev portal: https://t.co/p5bOzH0k ^TS</text> | |
<user-id>twitterapi</user-id> | |
<url>https://twitter.com/twitterapi/statuses/239413543487819778</url> | |
</tweet> | |
</tweets> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.1"; | |
(: transform JSON with XQuery :) | |
import module namespace ju = "http://joewiz.org/ns/xquery/json-util" at "/db/json-util.xqm"; | |
(: see https://gist.github.com/joewiz/d986da715facaad633db for this function :) | |
let $json := json-doc('/db/user_timeline.json') | |
let $tweets := $json?* | |
let $new-tweets := | |
array { | |
for $tweet in $tweets | |
let $id := $tweet?id_str | |
let $created := $tweet?created_at | |
let $text := $tweet?text | |
let $user-id := $tweet?user?screen_name | |
let $url := concat('https://twitter.com/', $user-id, '/statuses/', $id) | |
return | |
map { | |
'id_str': $id, | |
'created_at': $created, | |
'user': map { | |
'screen_name': $user-id | |
}, | |
'url': $url | |
} | |
} | |
return | |
ju:serialize-json($new-tweets) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"created_at": "Wed Aug 29 17:12:58 +0000 2012", | |
"user": {"screen_name": "twitterapi"}, | |
"id_str": "240859602684612608", | |
"url": "https://twitter.com/twitterapi/statuses/240859602684612608" | |
}, | |
{ | |
"created_at": "Sat Aug 25 17:26:51 +0000 2012", | |
"user": {"screen_name": "twitterapi"}, | |
"id_str": "239413543487819778", | |
"url": "https://twitter.com/twitterapi/statuses/239413543487819778" | |
} | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"coordinates": null, | |
"favorited": false, | |
"truncated": false, | |
"created_at": "Wed Aug 29 17:12:58 +0000 2012", | |
"id_str": "240859602684612608", | |
"entities": { | |
"urls": [{ | |
"expanded_url": "https://dev.twitter.com/blog/twitter-certified-products", | |
"url": "https://t.co/MjJ8xAnT", | |
"indices": [ | |
52, | |
73 | |
], | |
"display_url": "dev.twitter.com/blog/twitter-c\u2026" | |
}], | |
"hashtags": [], | |
"user_mentions": [] | |
}, | |
"in_reply_to_user_id_str": null, | |
"contributors": null, | |
"text": "Introducing the Twitter Certified Products Program: https://t.co/MjJ8xAnT", | |
"retweet_count": 121, | |
"in_reply_to_status_id_str": null, | |
"id": 240859602684612608, | |
"geo": null, | |
"retweeted": false, | |
"possibly_sensitive": false, | |
"in_reply_to_user_id": null, | |
"place": null, | |
"user": { | |
"profile_sidebar_fill_color": "DDEEF6", | |
"profile_sidebar_border_color": "C0DEED", | |
"profile_background_tile": false, | |
"name": "Twitter API", | |
"profile_image_url": "http://a0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png", | |
"created_at": "Wed May 23 06:01:13 +0000 2007", | |
"location": "San Francisco, CA", | |
"follow_request_sent": false, | |
"profile_link_color": "0084B4", | |
"is_translator": false, | |
"id_str": "6253282", | |
"entities": { | |
"url": {"urls": [{ | |
"expanded_url": null, | |
"url": "http://dev.twitter.com", | |
"indices": [ | |
0, | |
22 | |
] | |
}]}, | |
"description": {"urls": []} | |
}, | |
"default_profile": true, | |
"contributors_enabled": true, | |
"favourites_count": 24, | |
"url": "http://dev.twitter.com", | |
"profile_image_url_https": "https://si0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png", | |
"utc_offset": -28800, | |
"id": 6253282, | |
"profile_use_background_image": true, | |
"listed_count": 10775, | |
"profile_text_color": "333333", | |
"lang": "en", | |
"followers_count": 1212864, | |
"protected": false, | |
"notifications": null, | |
"profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png", | |
"profile_background_color": "C0DEED", | |
"verified": true, | |
"geo_enabled": true, | |
"time_zone": "Pacific Time (US & Canada)", | |
"description": "The Real Twitter API. I tweet about API changes, service issues and happily answer questions about Twitter and our API. Don't get an answer? It's on my website.", | |
"default_profile_image": false, | |
"profile_background_image_url": "http://a0.twimg.com/images/themes/theme1/bg.png", | |
"statuses_count": 3333, | |
"friends_count": 31, | |
"following": null, | |
"show_all_inline_media": false, | |
"screen_name": "twitterapi" | |
}, | |
"in_reply_to_screen_name": null, | |
"source": "<a href=\"//sites.google.com/site/yorufukurou/\" rel=\"nofollow\">YoruFukurou<\/a>", | |
"in_reply_to_status_id": null | |
}, | |
{ | |
"coordinates": null, | |
"favorited": false, | |
"truncated": false, | |
"created_at": "Sat Aug 25 17:26:51 +0000 2012", | |
"id_str": "239413543487819778", | |
"entities": { | |
"urls": [{ | |
"expanded_url": "https://dev.twitter.com/issues/485", | |
"url": "https://t.co/p5bOzH0k", | |
"indices": [ | |
97, | |
118 | |
], | |
"display_url": "dev.twitter.com/issues/485" | |
}], | |
"hashtags": [], | |
"user_mentions": [] | |
}, | |
"in_reply_to_user_id_str": null, | |
"contributors": null, | |
"text": "We are working to resolve issues with application management & logging in to the dev portal: https://t.co/p5bOzH0k ^TS", | |
"retweet_count": 105, | |
"in_reply_to_status_id_str": null, | |
"id": 239413543487819778, | |
"geo": null, | |
"retweeted": false, | |
"possibly_sensitive": false, | |
"in_reply_to_user_id": null, | |
"place": null, | |
"user": { | |
"profile_sidebar_fill_color": "DDEEF6", | |
"profile_sidebar_border_color": "C0DEED", | |
"profile_background_tile": false, | |
"name": "Twitter API", | |
"profile_image_url": "http://a0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png", | |
"created_at": "Wed May 23 06:01:13 +0000 2007", | |
"location": "San Francisco, CA", | |
"follow_request_sent": false, | |
"profile_link_color": "0084B4", | |
"is_translator": false, | |
"id_str": "6253282", | |
"entities": { | |
"url": {"urls": [{ | |
"expanded_url": null, | |
"url": "http://dev.twitter.com", | |
"indices": [ | |
0, | |
22 | |
] | |
}]}, | |
"description": {"urls": []} | |
}, | |
"default_profile": true, | |
"contributors_enabled": true, | |
"favourites_count": 24, | |
"url": "http://dev.twitter.com", | |
"profile_image_url_https": "https://si0.twimg.com/profile_images/2284174872/7df3h38zabcvjylnyfe3_normal.png", | |
"utc_offset": -28800, | |
"id": 6253282, | |
"profile_use_background_image": true, | |
"listed_count": 10775, | |
"profile_text_color": "333333", | |
"lang": "en", | |
"followers_count": 1212864, | |
"protected": false, | |
"notifications": null, | |
"profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme1/bg.png", | |
"profile_background_color": "C0DEED", | |
"verified": true, | |
"geo_enabled": true, | |
"time_zone": "Pacific Time (US & Canada)", | |
"description": "The Real Twitter API. I tweet about API changes, service issues and happily answer questions about Twitter and our API. Don't get an answer? It's on my website.", | |
"default_profile_image": false, | |
"profile_background_image_url": "http://a0.twimg.com/images/themes/theme1/bg.png", | |
"statuses_count": 3333, | |
"friends_count": 31, | |
"following": null, | |
"show_all_inline_media": false, | |
"screen_name": "twitterapi" | |
}, | |
"in_reply_to_screen_name": null, | |
"source": "<a href=\"//sites.google.com/site/yorufukurou/\" rel=\"nofollow\">YoruFukurou<\/a>", | |
"in_reply_to_status_id": null | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment