Last active
November 20, 2023 23:29
-
-
Save epijim/8524938 to your computer and use it in GitHub Desktop.
scrape facebook from R. Based off
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################### | |
## ## | |
## Setup ## | |
## ## | |
############################################################################################### | |
# install.packages("Rfacebook") # from CRAN | |
# install.packages("Rook") # from CRAN | |
# install.packages("igraph") # from CRAN | |
setwd("//fs-home/home$/jab254/Desktop/Facebook R") | |
library(Rfacebook) | |
library(Rook) | |
library(igraph) | |
library(gtools) | |
library(reshape) | |
library(lubridate) | |
library(ggplot2) | |
library(scales) | |
#from https://developers.facebook.com/apps - setup an app and copy the stuff below | |
#fb_oauth <- fbOAuth(app_id="APP ID", app_secret="APP SECRET") | |
#now we have our fb_oauth connection | |
#so we will just save them to be able to use them later | |
#save(fb_oauth, file="fb_oauth") | |
#so if you want to connect to Facebook again you just have to call | |
load("fb_oauth") | |
#the getUsers function return public information about one or more Facebook user | |
me <- getUsers("me", token=fb_oauth) | |
me$name # my name | |
my_friends <- getFriends(token=fb_oauth, simplify=TRUE) | |
head(my_friends, n=10) #first 10 friends - ID is the order they joined FB | |
nrow(my_friends) # number of friends | |
############################################################################################### | |
## ## | |
## Download info ## | |
## ## | |
############################################################################################### | |
#Too many friends in one group gets rejected by FB API. | |
#Break into groups of 80. | |
split_my_friends_1 <- my_friends[1:80,] | |
split_my_friends_2 <- my_friends[81:160,] | |
split_my_friends_3 <- my_friends[161:240,] | |
split_my_friends_4 <- my_friends[241:320,] | |
split_my_friends_5 <- my_friends[321:400,] | |
split_my_friends_6 <- my_friends[401:480,] | |
split_my_friends_7 <- my_friends[481:nrow(my_friends),] | |
my_friends_info_1 <- getUsers(split_my_friends_1$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info_2 <- getUsers(split_my_friends_2$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info_3 <- getUsers(split_my_friends_3$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info_4 <- getUsers(split_my_friends_4$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info_5 <- getUsers(split_my_friends_5$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info_6 <- getUsers(split_my_friends_6$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info_7 <- getUsers(split_my_friends_7$id, token=fb_oauth, private_info=TRUE) | |
my_friends_info<- rbind.fill(my_friends_info_1,my_friends_info_2, | |
my_friends_info_3,my_friends_info_4, | |
my_friends_info_5,my_friends_info_6, | |
my_friends_info_7) | |
#SAVE IT! | |
save(my_friends_info,file="my_friends_info.Rda") | |
write.table(my_friends_info, "my_friends_info.txt", sep="\t") | |
############################################################################################### | |
## ## | |
## Take a gander ## | |
## ## | |
############################################################################################### | |
#Summary | |
table(my_friends_info$relationship_status) | |
table(my_friends_info$gender) | |
table(my_friends_info$location) | |
table(my_friends_info$hometown) | |
#Age histogram | |
# format year of birth | |
my_friends_info$year <- mdy(my_friends_info$birthday) | |
x <- my_friends_info$year | |
pdf('bornhist.pdf') | |
h<-hist(x, breaks=15, freq=T, | |
col="blue", xlab="Year born (according to FB)", | |
main="") | |
dev.off() | |
# Not suprisingly it's the least mature/youngest people I know on FB with these statuses... | |
print( | |
my_friends_info[ | |
which(my_friends_info$relationship_status == "It's complicated") , | |
c("name") | |
] ) | |
print( | |
my_friends_info[ | |
which(my_friends_info$relationship_status == "In an open relationship") , | |
c("name") | |
] ) | |
############################################################################################### | |
## ## | |
## prepare net ## | |
## ## | |
############################################################################################### | |
# Download who is friends with who (within those friends with me!) | |
my_network <- getNetwork(fb_oauth, format="adj.matrix") | |
# friends who are friends with me alone | |
singletons <- rowSums(my_network)==0 | |
# remove singletons | |
my_graph <- graph.adjacency(my_network[!singletons,!singletons]) | |
# make connections one way | |
my_graph_simple <- simplify(my_graph) | |
# set up plot | |
#actual model | |
layout <- layout.drl(my_graph_simple,options=list(simmer.attraction=0)) | |
#styling of plot | |
E(my_graph_simple)$color <- rgb(.5, .5, 0, 0.15) | |
E(my_graph_simple)$width <- 0.0001 | |
#Plot 0 - leaving it on auto | |
pdf('0_auto.pdf') | |
plot(my_graph_simple, layout=layout.auto, | |
vertex.label=NA,) | |
dev.off() | |
#Plot 1 - with names | |
pdf('1_default.pdf') | |
plot(my_graph_simple, vertex.size=2, | |
#vertex.label=NA, | |
vertex.label.cex=0.2, | |
edge.arrow.size=0, edge.curved=TRUE,layout=layout) | |
dev.off() | |
#Plot 2 - no names | |
pdf('2_default.pdf') | |
plot(my_graph_simple, vertex.size=2, | |
vertex.label=NA, | |
vertex.label.cex=0.2, | |
edge.arrow.size=0, edge.curved=TRUE,layout=layout) | |
dev.off() | |
# Let's colour based on connectedness. | |
#this is the number of "shortest paths" going | |
#through a particular individual | |
hc4 <- heat.colors(10) | |
g.bet <- betweenness(my_graph_simple) | |
vcolors <- factor(cut(g.bet, quantile(g.bet), include.lowest = TRUE)) | |
vcolors <- quantcut(g.bet, q=seq(0,1,by=0.1)) | |
vcolors2 <- hc4[vcolors] | |
#Plot 3 - no names | |
pdf('3_coloured.pdf') | |
plot(my_graph_simple, vertex.size=2, vertex.color=vcolors2, | |
vertex.label=NA, | |
vertex.label.cex=2, | |
edge.arrow.size=0, edge.curved=TRUE,layout=layout) | |
dev.off() | |
#Plot 4 - with names | |
pdf('4_coloured_withnames.pdf') | |
plot(my_graph_simple, vertex.size=2, vertex.color=vcolors2, | |
#vertex.label=NA, | |
vertex.label.cex=0.5, | |
edge.arrow.size=0, edge.curved=TRUE,layout=layout) | |
dev.off() | |
############################################################################################### | |
## ## | |
## A facebook page ## | |
## ## | |
############################################################################################### | |
# Our MCR social events page | |
mcrents <- getPage("jesusmcrents", fb_oauth, n = 5000) | |
# The pub on my street | |
brewhouse <- getPage("TheCambridgeBrewHouse", fb_oauth, n = 5000) | |
# A brilliant cartoon | |
archer <- getPage("ArcherFX", fb_oauth, n = 5000) | |
#number of likes | |
mcrents[which.max(mcrents$likes_count), ] # most popular post ever had 3 likes and 0 comments | |
brewhouse[which.max(brewhouse$likes_count), ] # most popular post ever had 16 likes and 2 comments | |
archer[which.max(archer$likes_count), ] # most popular post ever had 73,013 likes and 2146 comments, | |
# and 9315 shares and was this video | |
# https://www.facebook.com/photo.php?v=10200194370090771 | |
# convert Facebook date format to R date format | |
format.facebook.date <- function(datestring) { | |
date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT") | |
} | |
# aggregate metric counts over month | |
aggregate.metric <- function(metric) { | |
m <- aggregate(archer[[paste0(metric, "_count")]], list(month = archer$month), | |
mean) | |
m$month <- as.Date(paste0(m$month, "-15")) | |
m$metric <- metric | |
return(m) | |
} | |
# create data frame with average metric counts per month | |
archer$datetime <- format.facebook.date(archer$created_time) | |
archer$month <- format(archer$datetime, "%Y-%m") | |
df.list <- lapply(c("likes", "comments", "shares"), aggregate.metric) | |
df <- do.call(rbind, df.list) | |
# visualize evolution in metric | |
#Plot last - archer page | |
pdf('archer.pdf') | |
ggplot(df, aes(x = month, y = x, group = metric)) + geom_line(aes(color = metric)) + | |
scale_x_date(breaks = "years", labels = date_format("%Y")) + | |
scale_y_log10("Average count per post", | |
breaks = c(10, 100, 1000, 10000, 50000)) + | |
theme_bw() + theme(axis.title.x = element_blank()) | |
dev.off() | |
######################################################################### | |
#pull photos from facebook | |
# download small profile picture of each friend | |
dir.create("photos") | |
for (i in 1:length(friends.id)) | |
download.file(paste(friends.pic[i]),mode="wb", | |
destfile=paste("photos/",friends.id[i],".jpg",sep="")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment