-
-
Save KWMalik/3881665 to your computer and use it in GitHub Desktop.
Practising some data shaping in R for #compdata week 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compdata week 1 pracitce | |
# Script reads a NodeXL twitter search for #compdata hashtag that's been uploaded to Google Spreadsheet | |
# Data is reshaped using subsetting to get a slice of rows columns fitting a certiain condition | |
# read csv from Google Spreadsheet, headers in row 2 in this case an vertices list | |
vertices <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=1&output=csv",header=TRUE,skip=1,) | |
# see number of rows | |
nrow(vertices) | |
# read csv from Google Spreadsheet, headers in row 2 in this case an edges list | |
edges <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=0&output=csv",header=TRUE,skip=1,) | |
# look at the data | |
str(edges) | |
# Note that $ Relationship : Factor w/ 4 levels "Followed","Mentions" | |
# What are all the levels in $Relationship | |
table(edges$Relationship) | |
# how many rows are there where $Tweet that contains 'I just signed up for Computing for Data Analysis .. ' | |
iJust <- grepl("^I just signed up for Computing for Data Analysis", edges$Tweet) | |
table(iJust) | |
# Want to get a subset of data of $Vertex.1 and $Vertex.2 where $Relationship is 'Followed' | |
# To get 'Followed' subset | |
followed <- edges$Relationship == "Followed" | |
# now make a new data.frame with 1st two cols of edges $Vertex.1 and $Vertex.2 where followed | |
edgeList <- edges[followed,1:2] | |
str(edgeList) | |
# lines 10 and 13 can be combined using | |
edgeList <- edges[edges$Relationship == "Followed",1:2] | |
# look at the new data | |
str(edgeList) | |
# Now look at most frequent occurences of $Vertex.1 values from edges | |
# table will give us a frquency table | |
topInVert1 <-data.frame(table(edges$Vertex.1)) | |
# now we can change the order | |
topInVert1 <- topInVert1[order(-topInVert1$Freq), ] | |
#print the top 10 results | |
topInVert1[1:10,] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment