Last active
August 29, 2015 14:23
-
-
Save jalapic/6d175ffe87463d1f332e to your computer and use it in GitHub Desktop.
Markov Chains in DiagrammeR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Markov Chain Diagrams Using DiagrammeR | |
### Introduction and Sample Data | |
#' Imagine a sequence of behaviors like below, where each letter (A,I,O,R,S,X,Y) refers to | |
#' a distinct behavior. | |
#' AOXXYXXXXXXYXXYXXXXXYXXXXXYXSXXXXAXAOOOXAAAOYXXXXXXSXXXXSXXYXYXXYXXYXXXXXXXXXYXXAAAAAAOAA | |
#' AOAAAOAAAAAOAAAAAAAAAAAOAAAOAAAOOAAAOAAAAAOOIAOAOAOIAOOOAAARSAAOOOAAAAOAAAOOAOOOAOAAAISAA | |
#' ARAOOAAAOASAAAAAAOOOAAAAOASOAAAOARSRSOAAOOOAOAAOAAOAAOOAAAAAASAOIASAAOAAOOARASOOARSAOOAAA | |
#' AOAAAAOAOAAAAAAASAAAAOIARSARARAOAAAOOOAAOAAOOAAOOAOAAOIAOOOAOOAAAAAAROSIOAAAOOAAAAOAOOAAO | |
#' OAAAOOAOAAASOOAOAAAOAOSASOOAOOAROOAROSAOSASOAROOAAAOSOAOOSOAAOSOOAOAAAOOOAOOASOAOSAAAASIO | |
#' OAOOAAOOAOORSIAOOOAASAOOOAOSOAOARSIXXYXXXXXXXXXXXXXSXXSXXXXXXSYXXXYXXXXXSXXXXXXXXXXXXXXSX | |
#' YXXYXXXXXS | |
#' The basic aim is to visualize this sequence in a Markov Chain diagram. Specifcally, we | |
#' want to show those behavioral transitions that occur at a higher frequency than chance, and | |
#' we wish to denote behaviors that occur more frequently in some way (e.g. make those nodes | |
#' bigger, thicker or a different color - or all 3 of these). Likewise, the edges should | |
#' reflect the weight or likelihood that a behavioral sequence transition occurs. | |
#' The following are some raw data. | |
#' | |
#' Firstly, this is the frequency of each behavior above: | |
ty<-structure(c(218, 10, 139, 15, 37, 108, 16), .Names = c("A", "I", "O", "R", "S", "X", "Y")) | |
ty | |
#' A I O R S X Y | |
#' 218 10 139 15 37 108 16 | |
#' Secondly, here is a dataframe showing the transition rate between two behaviors and the | |
#' associated p-value of that transition. | |
#' | |
tx1<- | |
structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, | |
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, | |
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, | |
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("A", "I", | |
"O", "R", "S", "X", "Y"), class = "factor"), Var2 = structure(c(1L, | |
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, | |
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, | |
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L | |
), .Label = c("A", "I", "O", "R", "S", "X", "Y"), class = "factor"), | |
value = c(0.541284403669725, 0.6, 0.517985611510791, 0.266666666666667, | |
0.351351351351351, 0.037037037037037, 0, 0.00458715596330275, | |
0, 0.0359712230215827, 0, 0.108108108108108, 0, 0, 0.334862385321101, | |
0.2, 0.352517985611511, 0.266666666666667, 0.297297297297297, | |
0, 0, 0.0596330275229358, 0, 0.00719424460431655, 0, 0.027027027027027, | |
0, 0, 0.055045871559633, 0.1, 0.0647482014388489, 0.466666666666667, | |
0, 0.0833333333333333, 0, 0.00458715596330275, 0.1, 0.0143884892086331, | |
0, 0.189189189189189, 0.75, 1, 0, 0, 0.00719424460431655, | |
0, 0.027027027027027, 0.12962962962963, 0), p = c(0, 0, 0, | |
0, 0, 0, 0.01, 0.016, 0.665, 0.081, 0.595, 0.589, 0.134, | |
0.582, 0.09, 0, 0.009, 0.005, 0, 0, 0.021, 0.003, 0.596, | |
0.044, 0.515, 0.083, 0.054, 0.501, 0, 0, 0.013, 0.141, 0.091, | |
0.039, 0.228, 0, 0.155, 0, 0.08, 0, 0, 0, 0.001, 0.584, 0.019, | |
0.501, 0.273, 0.614, 0.487)), .Names = c("Var1", "Var2", | |
"value", "p"), row.names = c(NA, -49L), class = "data.frame") | |
head(tx1,10) | |
#' Var1 Var2 value p | |
#' 1 A A 0.541284404 0.000 | |
#' 2 I A 0.600000000 0.000 | |
#' 3 O A 0.517985612 0.000 | |
#' 4 R A 0.266666667 0.000 | |
#' 5 S A 0.351351351 0.000 | |
#' 6 X A 0.037037037 0.000 | |
#' 7 Y A 0.000000000 0.010 | |
#' 8 A I 0.004587156 0.016 | |
#' 9 I I 0.000000000 0.665 | |
#' 10 O I 0.035971223 0.081 | |
#' Var1 = first behavior, Var2 = second behavior | |
#' value = transition probability | |
#' p = probability that transition probability is significant | |
##################### | |
##### PLOTTING | |
#' Some general points/issues: | |
#' | |
#' 1. I have utilized 'x' and 'y' fixed plotting of neato. I want to show all nodes | |
#' in a series of plots comparing Markov Chains of different sequences. Therefore | |
#' it is better to have all nodes in the same position for all diagrams. | |
#' I have tried to pick coordinates that eliminate line crossings, but.... | |
#' | |
#' 2. Self-loops e.g. A->A and X->X cause the biggest issue of clarity. Is there a way | |
#' of altering how they are positioned? | |
#' | |
#' 3. I don't seem to be able to scale node size by some data. (not shown below) | |
#' | |
#' 4. I don't seem to be able to scale nodes by two attributes (e.g. color and penwidth) | |
#' | |
#' 5. I can't get render_graph() to work on my machine. I used a workaround (below). | |
#-------------------------------------------------------------------------------------------------------------------------------------------# | |
library(dplyr) | |
#### EXAMPLE 1 - SCALE EDGES | |
#' I scale edges by transition probability (value in tx1) | |
# Create a node data frame | |
nodes <- | |
create_nodes(nodes = names(ty), | |
type = "letter", | |
x=c(1.75,0.25,2.9,3.25,0.6,1,2.5), | |
y=c(3,3,2,3,2,1,1)) | |
# Create an edges data frame | |
edgesx <- tx1 %>% filter(value!=0 & p<.005) | |
edges <- create_edges(from = edgesx$Var1, | |
to = edgesx$Var2, | |
label = '', | |
relationship = "letter_to_letter", | |
data = edgesx$value) | |
edges <- scale_edges(edges_df = edges, | |
to_scale = edges$data, | |
edge_attr = "penwidth", | |
range = c(1, 3)) | |
edges | |
graph <- | |
create_graph(nodes_df = nodes, | |
edges_df = edges, | |
graph_attrs = "layout = neato", | |
node_attrs = c("fontname = Helvetica", | |
"style = filled"), | |
edge_attrs = c("color = gray20", | |
"arrowsize = 0.5")) | |
# View the graph in the RStudio Viewer | |
#render_graph(graph) #for some reason this doesn't produce a viz in the viewer on my machine. There is no error message, it just doesn't show. | |
grViz(print(render_graph(graph, output = "DOT"))) #this works | |
#-------------------------------------------------------------------------------------------------------------------------------------------# | |
#### EXAMPLE 2 - SCALE NODES AND EDGES | |
#' I scale edges by transition probability (value in tx1) | |
#' I scale nodes by frequency of occurrence (ty) - penwidth | |
# Create a node data frame | |
nodes <- | |
create_nodes(nodes = names(ty), | |
type = "letter", | |
x=c(1.75,0.25,2.9,3.25,0.6,1,2.5), | |
y=c(3,3,2,3,2,1,1), | |
data=ty) | |
nodes <- scale_nodes(nodes_df = nodes, | |
to_scale = nodes$data, | |
node_attr = "penwidth", | |
range = c(2, 5)) | |
# Create an edges data frame | |
edgesx <- tx1 %>% filter(value!=0 & p<.005) | |
edges <- create_edges(from = edgesx$Var1, | |
to = edgesx$Var2, | |
label = '', | |
relationship = "letter_to_letter", | |
data = edgesx$value) | |
edges <- scale_edges(edges_df = edges, | |
to_scale = edges$data, | |
edge_attr = "penwidth", | |
range = c(1, 5)) | |
edges | |
graph <- | |
create_graph(nodes_df = nodes, | |
edges_df = edges, | |
graph_attrs = "layout = neato", | |
node_attrs = c("fontname = Helvetica", | |
"style = filled"), | |
edge_attrs = c("color = gray20", | |
"arrowsize = 0.5")) | |
# View the graph in the RStudio Viewer | |
#render_graph(graph) #for some reason this doesn't produce a viz in the viewer on my machine. There is no error message, it just doesn't show. | |
grViz(print(render_graph(graph, output = "DOT"))) #this works | |
#-------------------------------------------------------------------------------------------------------------------------------------------# | |
#### EXAMPLE 3 - NODE AND EDGE SCALING | |
#' here I scale the edgewith based on transition probablity (value in tx1) | |
#' I scale the node based on color (but I do this manually - I couldn't work the | |
#' diagrammer way of doing this) | |
#' I also try to scale node by penwidth but it doesn't work. | |
# Create a node data frame | |
nodes <- | |
create_nodes(nodes = names(ty), | |
type = "letter", | |
shape= "square", | |
x=c(1.75,0.25,2.9,3.25,0.6,1,2.5), | |
y=c(3,3,2,3,2,1,1), | |
data=ty) | |
nodes$color<-ifelse(nodes$data>100, "Red", "SandyBrown") | |
nodes <- scale_nodes(nodes_df = nodes, | |
to_scale = nodes$data, | |
node_attr = "penwidth", | |
range = c(2, 5)) | |
# Create an edges data frame | |
edgesx <- tx1 %>% filter(value!=0 & p<.005) | |
edges <- create_edges(from = edgesx$Var1, | |
to = edgesx$Var2, | |
label = '', | |
relationship = "letter_to_letter", | |
data = edgesx$value) | |
edges <- scale_edges(edges_df = edges, | |
to_scale = edges$data, | |
edge_attr = "penwidth", | |
range = c(1, 5)) | |
edges | |
graph <- | |
create_graph(nodes_df = nodes, | |
edges_df = edges, | |
graph_attrs = "layout = neato", | |
node_attrs = c("fontname = Helvetica", | |
"style = filled"), | |
edge_attrs = c("color = gray20", | |
"arrowsize = 0.5")) | |
# View the graph in the RStudio Viewer | |
#render_graph(graph) #for some reason this doesn't produce a viz in the viewer on my machine. There is no error message, it just doesn't show. | |
grViz(print(render_graph(graph, output = "DOT"))) #this works |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment