Skip to content

Instantly share code, notes, and snippets.

@neilkod
Created November 12, 2010 15:59
Show Gist options
  • Save neilkod/674260 to your computer and use it in GitHub Desktop.
Save neilkod/674260 to your computer and use it in GitHub Desktop.
i'm having trouble getting mongodb to recognize my regular expression. i'm trying to list documents(tweets) where user.screen_name starts with an n
output
121980
--------- tweets from neilkod -------
neilkod
neilkod
neilkod
neilkod
neilkod
---------tweets from users that start with n--------
-----done------
def regexpTest():
tweets = createConnection()
theCount = tweets.count()
print theCount
print "--------- tweets from neilkod -------"
cur = tweets.find({'user.screen_name':'neilkod'},{'user.screen_name':1})
for v in cur:
print v['user']['screen_name']
print "---------tweets from users that start with n--------"
cur = tweets.find({'user.screen_name':'/^n.*/'},{'user.screen_name':1})
for v in cur:
print v['user']['screen_name']
print "-----done------"
example document:
> db.tweets.findOne()
{
"_id" : ObjectId("4cdd49080e37707a39caa92c"),
"retweeted_status" : {
"geo" : null,
"retweet_count" : null,
"in_reply_to_status_id" : null,
"text" : "Last chance to get #RavenHunt poker coin at #PubCon today: I'll be outside Salon C at 4:10. Clue No. 1 to follow again on Twitter.",
"entities" : {
"hashtags" : [
{
"text" : "RavenHunt",
"indices" : [
19,
29
]
},
{
"text" : "PubCon",
"indices" : [
44,
51
]
}
],
"urls" : [ ],
"user_mentions" : [ ]
},
"retweeted" : false,
"place" : null,
"in_reply_to_user_id" : null,
"in_reply_to_status_id_str" : null,
"coordinates" : null,
"source" : "<a href=\"http://itunes.apple.com/app/twitter/id333903271?mt=8\" rel=\"nofollow\">Twitter for iPad</a>",
"truncated" : false,
"in_reply_to_user_id_str" : null,
"id_str" : "2147676590182400",
"favorited" : false,
"created_at" : "Tue Nov 09 23:57:00 +0000 2010",
"contributors" : null,
"user" : {
"lang" : "en",
"profile_background_image_url" : "http://a1.twimg.com/profile_background_images/149836964/twitterbackground2.jpg",
"following" : null,
"notifications" : null,
"favourites_count" : 2,
"listed_count" : 20,
"profile_background_tile" : false,
"profile_background_color" : "ffffff",
"statuses_count" : 463,
"description" : "Communications Director at Raven Internet Marketing Tools. Factoid junkie, middle child, past-life journo. When you use utilize incorrectly, God kills a kitten.",
"show_all_inline_media" : false,
"profile_use_background_image" : true,
"profile_text_color" : "333333",
"contributors_enabled" : false,
"location" : "Nashville, TN",
"id_str" : "172055179",
"geo_enabled" : true,
"profile_link_color" : "3e7994",
"followers_count" : 293,
"friends_count" : 366,
"protected" : false,
"profile_image_url" : "http://a3.twimg.com/profile_images/1112291963/arienne-holland3_normal.jpg",
"verified" : false,
"created_at" : "Wed Jul 28 20:19:42 +0000 2010",
"profile_sidebar_fill_color" : "e0e0e0",
"name" : "Arienne Holland",
"follow_request_sent" : null,
"time_zone" : "Central Time (US & Canada)",
"url" : "http://raventools.com",
"screen_name" : "RavenArienne",
"id" : 172055179,
"utc_offset" : -21600,
"profile_sidebar_border_color" : "ffffff"
},
"in_reply_to_screen_name" : null,
"id" : NumberLong("2147676590182400")
},
"geo" : null,
"retweet_count" : null,
"in_reply_to_status_id" : null,
"text" : "RT @RavenArienne: Last chance to get #RavenHunt poker coin at #PubCon today: I'll be outside Salon C at 4:10. Clue No. 1 to follow again ...",
"new_id_str" : "2148451106160640",
"entities" : {
"hashtags" : [
{
"text" : "RavenHunt",
"indices" : [
37,
47
]
},
{
"text" : "PubCon",
"indices" : [
62,
69
]
}
],
"urls" : [ ],
"user_mentions" : [
{
"indices" : [
3,
16
],
"id_str" : "172055179",
"name" : "Arienne Holland",
"screen_name" : "RavenArienne",
"id" : 172055179
}
]
},
"retweeted" : false,
"place" : null,
"in_reply_to_user_id" : null,
"in_reply_to_status_id_str" : null,
"coordinates" : null,
"source" : "<a href=\"http://www.tweetdeck.com\" rel=\"nofollow\">TweetDeck</a>",
"new_id" : NumberLong("2148451106160640"),
"truncated" : true,
"in_reply_to_user_id_str" : null,
"id_str" : "2148451106160640",
"favorited" : false,
"created_at" : "Wed Nov 10 00:00:05 +0000 2010",
"contributors" : null,
"user" : {
"lang" : "en",
"profile_background_image_url" : "http://a1.twimg.com/profile_background_images/88502652/twilk_background_4bb63d5b40aea.jpg",
"following" : null,
"notifications" : null,
"favourites_count" : 13,
"listed_count" : 15,
"profile_background_tile" : true,
"profile_background_color" : "9ae4e8",
"statuses_count" : 3009,
"description" : "search engine marketing; digital marketing strategies; wine enthusiast; soccer fan; travel; education",
"show_all_inline_media" : false,
"profile_use_background_image" : true,
"profile_text_color" : "000000",
"contributors_enabled" : false,
"location" : "Portland",
"id_str" : "16107612",
"geo_enabled" : true,
"profile_link_color" : "0000ff",
"followers_count" : 569,
"friends_count" : 710,
"protected" : false,
"profile_image_url" : "http://a2.twimg.com/profile_images/1157439518/pic2_normal.jpg",
"verified" : false,
"created_at" : "Wed Sep 03 03:06:28 +0000 2008",
"profile_sidebar_fill_color" : "e0ff92",
"name" : "Trish Carey",
"follow_request_sent" : null,
"time_zone" : "Pacific Time (US & Canada)",
"url" : "http://www.facebook.com/careytn",
"screen_name" : "TrishCarey",
"id" : 16107612,
"utc_offset" : -28800,
"profile_sidebar_border_color" : "87bc44"
},
"in_reply_to_screen_name" : null,
"id" : NumberLong("2148451106160640")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment