Created
May 30, 2018 22:17
-
-
Save mdhorine/c5872f26c5877cb185646a575d456fa2 to your computer and use it in GitHub Desktop.
SNA - Assignment 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################### | |
## Social Network Analysis ## | |
## Module 1 Assignment ## | |
## Author: mhorine ## | |
################################### | |
# Load the igraph library | |
library(igraph) | |
# Load the edgelist and examine the data | |
el=read.csv(file.choose(),header=TRUE) # read an edgelist .csv file that you select | |
head(el) #shows some top rows of the edgelist to check the data | |
# Treat node labels as characters, not numbers | |
el[,1]=as.character(el[,1]) #node names in the first column of el | |
el[,2]=as.character(el[,2]) #node names in the second column of el | |
# Transform el data to matrix, then edgelist graph (initially just connections) | |
m <- as.matrix(el[, c(1:2)]) | |
g <- graph.edgelist(m, directed=FALSE) | |
# Add type as an attribute of the connections | |
E(g)$type <- as.character(el[,3]) | |
table(E(g)$type) | |
# Question 6 - Number of nodes who were partners | |
q6 <- subgraph.edges(graph=g, E(g)[type=="Partners"], delete.vertices = TRUE) | |
q6 # Shows 183 nodes, 240 connections | |
# Question 7 - Number of edges in network who were sharing computers | |
q7 <- subgraph.edges(graph=g, E(g)[type=="Computer"], delete.vertices = TRUE) | |
q7 # Shows 39 nodes, 23 edges | |
# Question 8 - Diameter for network of students doing homework at same time | |
q8 <- subgraph.edges(graph=g, E(g)[type=="Time"], delete.vertices = TRUE) | |
diameter(q8) # Shows diameter of 12 | |
# Question 9 - Which network has the highest density | |
edge_density(q6) # 0.01 | |
edge_density(q7) # 0.03 - Highest density | |
edge_density(q8) # 0.02 | |
# Question 10 - Which network has the shortest average path | |
average.path.length(q6, directed = FALSE) # 1.32 | |
average.path.length(q7, directed = FALSE) # 1.18 - Shortest average path | |
average.path.length(q8, directed = FALSE) # 5.22 | |
# Question 11 - Which network has the highest number of components | |
count_components(q6) # 50 - Highest number of components | |
count_components(q7) # 17 | |
count_components(q8) # 22 | |
# Note: For this and the following questions, we need to add the node attributes to the graphs | |
# Start by importing the node data | |
nodes <- readRDS("Asgn1-Nodes.RData") | |
colnames(nodes) # Name, Sex, Grade, Education, id | |
# Import the information from the node data to a new attribute on the vector | |
V(g)$Name <- as.character(nodes$Name[match(V(g)$name,nodes$id)]) | |
V(g)$Sex <- as.character(nodes$Sex[match(V(g)$name,nodes$id)]) | |
V(g)$Grade <- as.numeric(nodes$Grade[match(V(g)$name,nodes$id)]) # note numeric | |
V(g)$Education <- as.character(nodes$Education[match(V(g)$name,nodes$id)]) | |
# Recreate the subgraphs now that we've added in node attributes - and name them better this time | |
Partners <- subgraph.edges(graph=g, E(g)[type=="Partners"], delete.vertices = TRUE) | |
Computer <- subgraph.edges(graph=g, E(g)[type=="Computer"], delete.vertices = TRUE) | |
Time <- subgraph.edges(graph=g, E(g)[type=="Time"], delete.vertices = TRUE) | |
# Add degree as an attribute to each of the subgraphs | |
V(Partners)$Degree <- degree(Partners) | |
V(Computer)$Degree <- degree(Computer) | |
V(Time)$Degree <- degree(Time) | |
# Question 12 - Which student has the highest degree of those sharing a computer | |
V(Computer)$Name[V(Computer)$Degree==max(degree(Computer))] # Uless (3) | |
# Question 13 - Which student has the highest degree of those that are partners | |
V(Partners)$Name[V(Partners)$Degree==max(degree(Partners))] # Chayden, Kaleaha, Margrit, Nyzaiah (4) | |
# Questions 14-16 - Match the degree distribution | |
degree(Partners) %>% table() %>% barplot(,xlab="degree", ylab="number of nodes") # Q14 | |
degree(Computer) %>% table() %>% barplot(,xlab="degree", ylab="number of nodes") # Q15 | |
degree(Time) %>% table() %>% barplot(,xlab="degree", ylab="number of nodes") # Q16 | |
# Question 17 - Which distribution represents power-law distribution the least? | |
# From wikipedia: In statistics, a power law is a functional relationship between two quantities, | |
# where a relative change in one quantity results in a proportional relative change in the other | |
# quantity, independent of the initial size of those quantities: one quantity varies as a power of | |
# another. | |
# Answer is Partners, since it doesn't show the long-tail | |
# Question 18 - Name of the student with the highest betweenness of those working at the same time | |
V(Time)$Name[betweenness(Time)==max(betweenness(Time, directed = FALSE))] | |
# Question 19 - Which network has the highest centralisation | |
centr_degree(Partners)$centralization # 0.008 | |
centr_degree(Computer)$centralization # 0.048 | |
centr_degree(Time)$centralization # 0.049 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment