Created
May 12, 2012 22:11
-
-
Save jm3/2669385 to your computer and use it in GitHub Desktop.
jm3-redis-stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# aggregate stats from redis from our tweet research | |
# util: pretty-print bignums w/commas for readability | |
pp <- function(x){ | |
format( x, big.mark=",", scientific=FALSE) | |
} | |
# open up access to redis data | |
library(rredis) | |
redisConnect() | |
# core sets + zsets | |
sets <- c('tweets:hashtags', 'tweets:links', 'tweets:mentions', | |
'user:is_public') | |
zsets <- c('words', 'user:followers', 'user:num_tweets') | |
# 8 language sets + 120 country sets | |
langs <- c("DE", "EN", "ES", "FA", "FR", "NL", "PT", "RU") | |
countries <- c( | |
"AE", "AF", "AG", "AM", "AO", "AQ", "AR", "AT", "AU", "AZ", "BA", | |
"BB", "BD", "BE", "BH", "BN", "BR", "BS", "BW", "BY", "CA", "CH", | |
"CL", "CN", "CO", "CR", "CU", "CY", "DE", "DK", "DO", "DZ", "EC", | |
"EE", "EG", "ES", "ET", "FI", "FJ", "FK", "FR", "GB", "GE", "GH", | |
"GI", "GL", "GR", "GT", "GU", "HK", "HN", "HR", "HU", "ID", "IE", | |
"IL", "IN", "IR", "IT", "JM", "JO", "JP", "KE", "KH", "KP", "KR", | |
"KW", "LB", "LK", "LT", "LU", "LV", "MA", "MC", "MK", "MT", "MU", | |
"MW", "MX", "MY", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", | |
"PE", "PH", "PK", "PL", "PT", "PY", "QA", "RO", "RS", "RU", "RW", | |
"SA", "SE", "SG", "SI", "SN", "SV", "TH", "TR", "TT", "TW", "TZ", | |
"UA", "UG", "US", "UY", "VA", "VE", "VI", "VN", "XK", "ZA", "ZW") | |
# walk the list of key names and pretty-print stats for each set | |
for (i in 1:length(sets)) { | |
print( paste(sets[i], ":", pp( redisSCard(sets[i])))) | |
} | |
# ...and zset | |
for (i in 1:length(zsets)) { | |
print( paste(zsets[i], ":", pp( redisZCard(zsets[i])))) | |
} | |
# emit basic cardinality for all languages... | |
lang_stats <- c(1:length(langs)) | |
for (i in 1:length(langs)) { | |
key <- paste("user:lang:",langs[i],sep="") | |
card <- redisSCard(key) | |
lang_stats[i] <- card | |
print( paste(key, pp(card))) | |
} | |
lang_stats <- data.frame(langs,lang_stats) | |
names(lang_stats) <- c("tweet language","occurrences") | |
# ...and countries | |
country_stats <- c(1:length(countries)) | |
for (i in 1:length(countries)) { | |
key <- paste("user:country:",countries[i],sep="") | |
card <- redisSCard(key) | |
country_stats[i] <- card | |
print( paste(key, pp( card))) | |
} | |
country_stats <- data.frame(countries,country_stats) | |
names(country_stats) <- c("tweet country", "occurrences") | |
# clean up the workspace | |
rm(i,card,key) | |
# after the run, stats accrue in 2 data.frames: lang_stats + country_stats | |
# "tweets:hashtags : 458,640 | |
# "tweets:links : 270,319 | |
# "tweets:mentions : 1,086,466 | |
# "user:is_public : 1,812,923 | |
# | |
# "words : 503,999 | |
# "user:followers : 1,711,305 | |
# "user:num_tweets : 1,207,538 | |
# | |
# "user:lang:DE 9,369 | |
# "user:lang:EN 1,622,940 | |
# "user:lang:ES 62,800 | |
# "user:lang:FA 932 | |
# "user:lang:FR 166,233 | |
# "user:lang:NL 3,361 | |
# "user:lang:PT 5,109 | |
# "user:lang:RU 124,741 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment