Last active
December 27, 2015 22:09
-
-
Save epijim/7397223 to your computer and use it in GitHub Desktop.
Parallel Sets. Based off a comment by Aaron Rendahl on CrossValidated.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parallelset <- function(..., freq, col="gray", border=0, layer, | |
alpha=0.5, gap.width=0.05) { | |
p <- data.frame(..., freq, col, border, alpha, stringsAsFactors=FALSE) | |
n <- nrow(p) | |
if(missing(layer)) { layer <- 1:n } | |
p$layer <- layer | |
np <- ncol(p) - 5 | |
d <- p[ , 1:np, drop=FALSE] | |
p <- p[ , -c(1:np), drop=FALSE] | |
p$freq <- with(p, freq/sum(freq)) | |
col <- col2rgb(p$col, alpha=TRUE) | |
if(!identical(alpha, FALSE)) { col["alpha", ] <- p$alpha*256 } | |
p$col <- apply(col, 2, function(x) do.call(rgb, c(as.list(x), maxColorValue = 256))) | |
getp <- function(i, d, f, w=gap.width) { | |
a <- c(i, (1:ncol(d))[-i]) | |
o <- do.call(order, d[a]) | |
x <- c(0, cumsum(f[o])) * (1-w) | |
x <- cbind(x[-length(x)], x[-1]) | |
gap <- cumsum( c(0L, diff(as.numeric(d[o,i])) != 0) ) | |
gap <- gap / max(gap) * w | |
(x + gap)[order(o),] | |
} | |
dd <- lapply(seq_along(d), getp, d=d, f=p$freq) | |
par(mar = c(0, 0, 2, 0) + 0.1, xpd=TRUE ) | |
plot(NULL, type="n",xlim=c(0, 1), ylim=c(np, 1), | |
xaxt="n", yaxt="n", xaxs="i", yaxs="i", xlab='', ylab='', frame=FALSE) | |
for(i in rev(order(p$layer)) ) { | |
for(j in 1:(np-1) ) | |
polygon(c(dd[[j]][i,], rev(dd[[j+1]][i,])), c(j, j, j+1, j+1), | |
col=p$col[i], border=p$border[i]) | |
} | |
text(0, seq_along(dd), labels=names(d), adj=c(0,-2), font=2) | |
for(j in seq_along(dd)) { | |
ax <- lapply(split(dd[[j]], d[,j]), range) | |
for(k in seq_along(ax)) { | |
lines(ax[[k]], c(j, j)) | |
text(ax[[k]][1], j, labels=names(ax)[k], adj=c(0, -0.25)) | |
} | |
} | |
} | |
# end of function, now, subset your data to the categories | |
# DataFrame, read in a table from a csv, | |
library(foreign) | |
# strings as factors saves having to redefine | |
table <- read.csv("DATA.csv", stringsAsFactors = TRUE, header=T ) | |
# Add in colours for first defining group, assuming Category 1 yes/no | |
table <- within(table, { | |
color <- ifelse(Category1=="Yes","#008888","#330066") | |
}) | |
str(table) | |
with(table, parallelset(Category1, Category2, Category3, freq=Frequency, col=color, alpha=0.2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment