Skip to content

Instantly share code, notes, and snippets.

@chengjun
Created July 7, 2014 10:00
Show Gist options
  • Save chengjun/07fe07b05ea36fe8fbdd to your computer and use it in GitHub Desktop.
Save chengjun/07fe07b05ea36fe8fbdd to your computer and use it in GitHub Desktop.
#---------load data------------#
setwd("F:/digg/")
ft = read.csv("./final_front_zero_mean_time.csv", head=T, na.string='NA', stringsAsFactors=T)
dat = read.csv("./digg_votes_threshold.csv", header=T, stringsAsFactors = F)
# storyid user time threshold
# 1 oay as a session
time = dat$time
time = as.POSIXct(time, origin="1970-01-01")
time = as.numeric(as.Date(time)) - 14394
get_daily_data = function(i){
datd = dat[which(time == i), 1:3]
# sort by uer, time
datd = datd[with(datd, order(user, time)), ]
return(datd)
}
get_individual_stream = function(i){
day_user = data[which(data$user == unique_user[i]),]
story_list = c("source", day_user$storyid, "sink")
stream = embed(story_list, 2)[,2:1]
return(stream)
}
get_daily_clickstream = function(unique_user){
streams = lapply(1:length(unique_user), get_individual_stream)
streams = do.call(rbind, streams)
return(streams)
}
plot_clickstream = function(net){
require(igraph)
g =graph.data.frame(net,directed=TRUE )
E(g)$weight = net[,3]
g = simplify(g, remove.loops = TRUE)
set.seed(34)
l=layout.fruchterman.reingold(g)
V(g)$size = log(centralization.degree(g)$res+ 1)
E(g)$width = (E(g)$weight-min(E(g)$weight))/1000
node_name = V(g)$name
node_name[which(!node_name%in%c("source", "sink"))] = ""
plot(g, vertex.label= node_name,
edge.curved = FALSE, vertex.frame.color="#FFFFFF",
vertex.label.cex =1.2, edge.arrow.size=0.1, layout=l )
}
day_id = 10
data = get_daily_data(day_id )
data = unique(data)
unique_user = unique(data$user)
net = data.frame(get_daily_clickstream(unique_user)); dim(net)
net$weight = 1
net = aggregate(weight ~ X1 + X2, FUN = sum, data=net); dim(net)
plot_clickstream(net)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment