Last active
August 29, 2015 14:02
-
-
Save MarkEdmondson1234/490f28c56dd0b7c43776 to your computer and use it in GitHub Desktop.
A couple of functions used to process Tweets after fetching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### tweetdata.r | |
### Fetch Twitter data and Data Transformations | |
### Requires you have authenticated in auth.R | |
### 15th June 2014 | |
### Mark Edmondson @HoloMarkeD | |
### | |
### For use with Twitter API, processTweets() takes the df$tweetDF data.frame as input | |
### And calculates/transforms data for the plots | |
processTweets <- function(tweetDF, | |
timeToTweet=1, | |
timeToReply=2, | |
timeToRetweet=0.5){ | |
require(lubridate) | |
require(reshape2) | |
require(stringr) | |
tweetDF$URL <- str_extract(tweetDF$text, "https?\\://[\\.\\/a-zA-Z0-9]+") | |
tweetDF$URL_B <- str_detect(tweetDF$text, "https?\\://[\\.\\/a-zA-Z0-9]+") | |
tweetDF$week <- gsub(" ","0",sprintf("%2d", week(tweetDF$created))) | |
tweetDF$hour <- hour(tweetDF$created) | |
tweetDF$day <- day(tweetDF$created) | |
tweetDF$month <- gsub(" ","0",sprintf("%2d", month(tweetDF$created))) | |
tweetDF$year <- year(tweetDF$created) | |
tweetDF$ymd <- paste(tweetDF$year,tweetDF$month,tweetDF$day,sep="-") | |
tweetDF$ym <- paste(tweetDF$year,tweetDF$month,sep="-") | |
tweetDF$yw <- paste(tweetDF$year, tweetDF$week, sep="_") | |
tweetDF$daypart <- ifelse(tweetDF$hour < 6,"early",tweetDF$hour) | |
tweetDF$daypart <- ifelse(tweetDF$hour < 12 & tweetDF$hour >= 6,"morning",tweetDF$daypart) | |
tweetDF$daypart <- ifelse(tweetDF$hour < 18 & tweetDF$hour >= 12,"afternoon",tweetDF$daypart) | |
tweetDF$daypart <- ifelse(tweetDF$hour < 24 & tweetDF$hour >= 18,"evening",tweetDF$daypart) | |
tweetDF$daypart <- factor(tweetDF$daypart, levels=c("early","morning","afternoon","evening"), ordered=TRUE) | |
tweetDF$hour <- as.factor(hour(tweetDF$created)) | |
tweetDF$timespent <- ifelse(is.na(tweetDF$replyToSN), timeToTweet, timeToReply) | |
tweetDF$timespent <- ifelse(tweetDF$retweeted, timeToRetweet, tweetDF$timespent) | |
m <- melt(tweetDF[,c("yw","daypart","timespent")]) | |
tweetTimeDate <- dcast(m, yw + daypart ~ variable, sum) | |
# tweetTimeDate$ymd <- as.Date(tweetTimeDate$ymd, format="%Y-%m-%d") | |
tweetTimeDate <- tweetTimeDate[order(tweetTimeDate$yw),] | |
mMonth <- melt(tweetDF[,c("ym","daypart","timespent")]) | |
tweetTimeDateMonth <- dcast(mMonth, ym + daypart ~ variable, sum) | |
tweetTimeDateMonth <- tweetTimeDateMonth[order(tweetTimeDateMonth$ym),] | |
m2 <- melt(tweetDF[,c("yw","hour","timespent")]) | |
tweetTimeDate2 <- dcast(m2, yw + hour ~ variable, sum) | |
tweetTimeDate2 <- tweetTimeDate2[order(tweetTimeDate2$yw),] | |
m3 <- melt(tweetDF[,c("yw","URL_B","timespent")]) | |
tweetTimeDate3 <- dcast(m3, yw + URL_B ~ variable, sum) | |
tweetTimeDate3 <- tweetTimeDate3[order(tweetTimeDate3$yw),] | |
m3Month <- melt(tweetDF[,c("ym","URL_B","timespent")]) | |
tweetTimeDate3Month <- dcast(m3Month, ym + URL_B ~ variable, sum) | |
tweetTimeDate3Month <- tweetTimeDate3Month[order(tweetTimeDate3Month$ym),] | |
### output is a list of dataframes for use in plots | |
return(list(df1=tweetDF, | |
tweetTD=tweetTimeDate, | |
tweetTDm=tweetTimeDateMonth, | |
tweetHour=tweetTimeDate2, | |
tweetLinks=tweetTimeDate3, | |
tweetLinksMonth=tweetTimeDate3Month | |
)) | |
} | |
### extra pre-processing is needed for twitter exports, which is then fed into processTweets() to get similar output | |
processTwitterExport <- function(export){ | |
export$created <- strftime(as.character(export$timestamp)) | |
## there is no time in timestamps from 2010-11-04 21:46:36 in Twitter API response - just dates | |
export$text <- as.character(export$text) | |
export$replyToSN <- export$in_reply_to_user_id | |
export$retweeted <- !is.na(export$retweeted_status_id) | |
tweetList <- processTweets(export) | |
return(tweetList) | |
} | |
### example data processed again to demo processTweets() or could use processFetchTweets("holomarked",n=3200) | |
MarkTweetProcessed <- processTweets(tweetDF) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment