Skip to content

Instantly share code, notes, and snippets.

@mdhorine
Created May 30, 2018 22:17
Show Gist options
  • Save mdhorine/c5872f26c5877cb185646a575d456fa2 to your computer and use it in GitHub Desktop.
Save mdhorine/c5872f26c5877cb185646a575d456fa2 to your computer and use it in GitHub Desktop.
SNA - Assignment 1
###################################
## Social Network Analysis ##
## Module 1 Assignment ##
## Author: mhorine ##
###################################
# Load the igraph library
library(igraph)
# Load the edgelist and examine the data
el=read.csv(file.choose(),header=TRUE) # read an edgelist .csv file that you select
head(el) #shows some top rows of the edgelist to check the data
# Treat node labels as characters, not numbers
el[,1]=as.character(el[,1]) #node names in the first column of el
el[,2]=as.character(el[,2]) #node names in the second column of el
# Transform el data to matrix, then edgelist graph (initially just connections)
m <- as.matrix(el[, c(1:2)])
g <- graph.edgelist(m, directed=FALSE)
# Add type as an attribute of the connections
E(g)$type <- as.character(el[,3])
table(E(g)$type)
# Question 6 - Number of nodes who were partners
q6 <- subgraph.edges(graph=g, E(g)[type=="Partners"], delete.vertices = TRUE)
q6 # Shows 183 nodes, 240 connections
# Question 7 - Number of edges in network who were sharing computers
q7 <- subgraph.edges(graph=g, E(g)[type=="Computer"], delete.vertices = TRUE)
q7 # Shows 39 nodes, 23 edges
# Question 8 - Diameter for network of students doing homework at same time
q8 <- subgraph.edges(graph=g, E(g)[type=="Time"], delete.vertices = TRUE)
diameter(q8) # Shows diameter of 12
# Question 9 - Which network has the highest density
edge_density(q6) # 0.01
edge_density(q7) # 0.03 - Highest density
edge_density(q8) # 0.02
# Question 10 - Which network has the shortest average path
average.path.length(q6, directed = FALSE) # 1.32
average.path.length(q7, directed = FALSE) # 1.18 - Shortest average path
average.path.length(q8, directed = FALSE) # 5.22
# Question 11 - Which network has the highest number of components
count_components(q6) # 50 - Highest number of components
count_components(q7) # 17
count_components(q8) # 22
# Note: For this and the following questions, we need to add the node attributes to the graphs
# Start by importing the node data
nodes <- readRDS("Asgn1-Nodes.RData")
colnames(nodes) # Name, Sex, Grade, Education, id
# Import the information from the node data to a new attribute on the vector
V(g)$Name <- as.character(nodes$Name[match(V(g)$name,nodes$id)])
V(g)$Sex <- as.character(nodes$Sex[match(V(g)$name,nodes$id)])
V(g)$Grade <- as.numeric(nodes$Grade[match(V(g)$name,nodes$id)]) # note numeric
V(g)$Education <- as.character(nodes$Education[match(V(g)$name,nodes$id)])
# Recreate the subgraphs now that we've added in node attributes - and name them better this time
Partners <- subgraph.edges(graph=g, E(g)[type=="Partners"], delete.vertices = TRUE)
Computer <- subgraph.edges(graph=g, E(g)[type=="Computer"], delete.vertices = TRUE)
Time <- subgraph.edges(graph=g, E(g)[type=="Time"], delete.vertices = TRUE)
# Add degree as an attribute to each of the subgraphs
V(Partners)$Degree <- degree(Partners)
V(Computer)$Degree <- degree(Computer)
V(Time)$Degree <- degree(Time)
# Question 12 - Which student has the highest degree of those sharing a computer
V(Computer)$Name[V(Computer)$Degree==max(degree(Computer))] # Uless (3)
# Question 13 - Which student has the highest degree of those that are partners
V(Partners)$Name[V(Partners)$Degree==max(degree(Partners))] # Chayden, Kaleaha, Margrit, Nyzaiah (4)
# Questions 14-16 - Match the degree distribution
degree(Partners) %>% table() %>% barplot(,xlab="degree", ylab="number of nodes") # Q14
degree(Computer) %>% table() %>% barplot(,xlab="degree", ylab="number of nodes") # Q15
degree(Time) %>% table() %>% barplot(,xlab="degree", ylab="number of nodes") # Q16
# Question 17 - Which distribution represents power-law distribution the least?
# From wikipedia: In statistics, a power law is a functional relationship between two quantities,
# where a relative change in one quantity results in a proportional relative change in the other
# quantity, independent of the initial size of those quantities: one quantity varies as a power of
# another.
# Answer is Partners, since it doesn't show the long-tail
# Question 18 - Name of the student with the highest betweenness of those working at the same time
V(Time)$Name[betweenness(Time)==max(betweenness(Time, directed = FALSE))]
# Question 19 - Which network has the highest centralisation
centr_degree(Partners)$centralization # 0.008
centr_degree(Computer)$centralization # 0.048
centr_degree(Time)$centralization # 0.049
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment