Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created July 21, 2017 17:33
Show Gist options
  • Save explodecomputer/3cddb1e157923db2f8ac062b0f56db8c to your computer and use it in GitHub Desktop.
Save explodecomputer/3cddb1e157923db2f8ac062b0f56db8c to your computer and use it in GitHub Desktop.
subreddit algebra
load("z_score_matrix.RData")
nom <- colnames(z_score_matrix)
nom <- do.call(rbind, strsplit(nom, split="\\|"))
colnames(z_score_matrix) <- nom[,1]
R <- cor(z_score_matrix, use="pair")^2
findrelsubreddit <- function(cursubs,curops,numret=20, mat=R) {
curvec = 0
for(i in 1:length(cursubs)) {
curvec = ifelse(curops[i]=="+",
list(curvec + mat[which(rownames(mat)==cursubs[i]),]),
list(curvec - mat[which(rownames(mat)==cursubs[i]),]))[[1]]
}
curclosesubs = cosine(x=curvec,y=R)
curclosesubso = order(curclosesubs,decreasing=TRUE)
curclosesubsorder = curclosesubs[curclosesubso]
curclosesubsorderc = curclosesubsorder[-which(tolower(names(curclosesubsorder))%in%cursubs)]
return(tail(curclosesubsorderc,numret))
}
findrelsubreddit("Age of smoking initiation ", "+")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment