KWMalik · October 12, 2012 21:37
diff --git a/gistfile1.rebol b/gistfile1.rebol
 # compdata week 1 pracitce 
 # Script reads a NodeXL twitter search for #compdata hashtag that's been uploaded to Google Spreadsheet
 # Data is reshaped using subsetting to get a slice of rows columns fitting a certiain condition

 # read csv from Google Spreadsheet, headers in row 2 in this case an vertices list
 vertices <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=1&output=csv",header=TRUE,skip=1,)

 # see number of rows
 nrow(vertices)


 # read csv from Google Spreadsheet, headers in row 2 in this case an edges list
 edges <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=0&output=csv",header=TRUE,skip=1,)

 # look at the data 
 str(edges)

 # Note that $ Relationship             : Factor w/ 4 levels "Followed","Mentions"
 # What are all the levels in $Relationship
 table(edges$Relationship)

 # how many rows are there where $Tweet that contains 'I just signed up for Computing for Data Analysis .. '
 iJust <- grepl("^I just signed up for Computing for Data Analysis", edges$Tweet)
 table(iJust)

 # Want to get a subset of data of $Vertex.1 and $Vertex.2 where $Relationship is 'Followed'
 # To get 'Followed' subset
 followed <- edges$Relationship == "Followed"

 # now make a new data.frame with 1st two cols of edges $Vertex.1 and $Vertex.2 where followed
 edgeList <- edges[followed,1:2]
 str(edgeList)

 # lines 10 and 13 can be combined using
 edgeList <- edges[edges$Relationship == "Followed",1:2]

 # look at the new data
 str(edgeList)

 # Now look at most frequent occurences of $Vertex.1 values from edges

 # table will give us a frquency table
 topInVert1 <-data.frame(table(edges$Vertex.1))

 # now we can change the order
 topInVert1 <- topInVert1[order(-topInVert1$Freq), ]

 #print the top 10 results
 topInVert1[1:10,]
	# compdata week 1 pracitce
	# Script reads a NodeXL twitter search for #compdata hashtag that's been uploaded to Google Spreadsheet
	# Data is reshaped using subsetting to get a slice of rows columns fitting a certiain condition

	# read csv from Google Spreadsheet, headers in row 2 in this case an vertices list
	vertices <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=1&output=csv",header=TRUE,skip=1,)

	# see number of rows
	nrow(vertices)


	# read csv from Google Spreadsheet, headers in row 2 in this case an edges list
	edges <- read.csv("https://docs.google.com/spreadsheet/pub?key=0AqGkLMU9sHmLdHJ1Y0Jsb0R4MjdXM2M1WExXU21FVWc&single=true&gid=0&output=csv",header=TRUE,skip=1,)

	# look at the data
	str(edges)

	# Note that $ Relationship : Factor w/ 4 levels "Followed","Mentions"
	# What are all the levels in $Relationship
	table(edges$Relationship)

	# how many rows are there where $Tweet that contains 'I just signed up for Computing for Data Analysis .. '
	iJust <- grepl("^I just signed up for Computing for Data Analysis", edges$Tweet)
	table(iJust)

	# Want to get a subset of data of $Vertex.1 and $Vertex.2 where $Relationship is 'Followed'
	# To get 'Followed' subset
	followed <- edges$Relationship == "Followed"

	# now make a new data.frame with 1st two cols of edges $Vertex.1 and $Vertex.2 where followed
	edgeList <- edges[followed,1:2]
	str(edgeList)

	# lines 10 and 13 can be combined using
	edgeList <- edges[edges$Relationship == "Followed",1:2]

	# look at the new data
	str(edgeList)

	# Now look at most frequent occurences of $Vertex.1 values from edges

	# table will give us a frquency table
	topInVert1 <-data.frame(table(edges$Vertex.1))

	# now we can change the order
	topInVert1 <- topInVert1[order(-topInVert1$Freq), ]

	#print the top 10 results
	topInVert1[1:10,]