Created
October 24, 2013 22:37
-
-
Save cbare/7146353 to your computer and use it in GitHub Desktop.
Read challenge participant data from Synapse for the purpose of extracting the participant's locations. The locations will be used to construct a map.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## A script to read participant data out of Synapse and | |
| ## (help) clean it up. | |
| ## | |
| ## J. Christopher Bare | |
| ## chris.bare@sagebase.org | |
| ## Oct. 24, 2013 | |
| ############################################################ | |
| ## read evaluations | |
| ev1 <- synGetEvaluation(1917695) | |
| ev2 <- synGetEvaluation(1917696) | |
| ## check if all participants for both challenges are the same | |
| par1 <- synGetParticipants(ev1@properties$id, limit=10000) | |
| par2 <- synGetParticipants(ev2@properties$id, limit=10000) | |
| ## note that the only discrepancy is me :) | |
| setdiff(sapply(par1@results, function(par) { par$userId }), sapply(par2@results, function(par) { par$userId })) | |
| synGetUserProfile(377358) | |
| ## get user profile objects | |
| user_ids_1 <- sapply(par1@results, function(par) { par$userId }) | |
| users <- lapply(user_ids_1, synGetUserProfile) | |
| ## a helper function to deal with empty slots | |
| nonempty <- function(x) { if (length(x)==0) { NA } else { x } } | |
| ## build a data.frame | |
| user_data <- do.call(rbind, lapply(users, function(user) { | |
| data.frame( | |
| id=user$ownerId, | |
| displayName=user$displayName, | |
| firstName=user$firstName, | |
| lastName=user$lastName, | |
| email=nonempty(user$email), | |
| position=nonempty(user$position), | |
| industry=nonempty(user$industry), | |
| organization=nonempty(user$company), | |
| location=nonempty(user$location), | |
| team=nonempty(user$teamName) | |
| )} | |
| )) | |
| write.csv(user_data, file='tox_challenge_user_data.csv', row.names=F) | |
| ## insert lots of manual scrubbing in the text editor here | |
| ## write.csv(participants, file='Desktop/tox_challenge_participants_scrubbed.csv', row.names=F) | |
| ## we now have nicely regularized locations | |
| participants <- read.csv('Desktop/tox_challenge_participants_scrubbed.csv', header=T, stringsAsFactors=F) | |
| ## split into country, city and, for US locations, state | |
| split_locations <- strsplit(participants$Location.inferred, ", *") | |
| participants$country <- sapply(split_locations, function(loc) { loc[length(loc)] }) | |
| participants$city <- sapply(split_locations, function(loc) { if (length(loc) > 1) { loc[1]} else { NA } }) | |
| participants$state <- sapply(split_locations, function(loc) { if (length(loc) ==3 && loc[3]=='USA') { loc[2]} else { NA } }) | |
| ## what an international crew we have! | |
| table(participants$country, useNA='ifany') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment