leeper · December 26, 2015 13:29
diff --git a/slopegraph.r b/slopegraph.r
 # R Function to Draw Edward Tufte-style Slopeplots

 slopegraph <-
 function(
 df,
 xlim = c(.5,ncol(df)+.5),
 ylim = c(min(df)-diff(range(df))/100,max(df)+diff(range(df))/100),
 main = NULL,
 bty = 'n',
 yaxt = 'n',
 xaxt = 'n',
 xlab = '',
 ylab = '',
 add.before = NULL, # an expression to add something between drawing the blank canvas and adding the plot content (i.e., behind the slopegraph)
 add.after = NULL, # an expression to add something after adding the plot content
 labels = names(df),
 labpos.left = 2,
 labpos.right = 4,
 col.lines = par('fg'),
 col.lab = par('fg'),
 col.num = par('fg'),
 col.xaxt = par('fg'),
 offset.x = .1, # THIS DOESN'T SEEM TO WORK???
 offset.lab = .1,
 binval = 1.5, # threshold at which to force binning of labels and values (multiplier of the height of an "m")
 cex.lab = 1,
 cex.num = 1,
 font.lab = 1,
 font.num = 1,
 lty = par("lty"),
 lwd = par("lwd"),
 mai = NULL,
 ...)
 {
    if(ncol(df) < 2)
        stop('`df` must have at least two columns')
    # draw margins
    if(is.null(mai))
        par(mai=c(1, 0, if(is.null(main)) 0 else 1, 0))
    else
        par(mai=mai)
    
    plot(NA, y=NULL, xlim=xlim, ylim=ylim, main=main,
         bty=bty, yaxt=yaxt, xaxt=xaxt, xlab=xlab, ylab=ylab, ...)
    # optional expression
    if(!is.null(add.before))
        eval(add.before)
    
    # x-axis
    axis(1, 1:ncol(df), labels=labels, col=col.xaxt, col.ticks=col.xaxt)
    
    # height and width of 'm' on plotting device
    h <- strheight('m')
    w <- strwidth('m')
    
    # function for finding consecutive indices
    # from: http://stackoverflow.com/a/16118320/2338862
    seqle <- function(x,incr=1) { 
        if(!is.numeric(x)) x <- as.numeric(x) 
        n <- length(x)  
        y <- x[-1L] != x[-n] + incr 
        i <- c(which(y|is.na(y)),n) 
        list(lengths = diff(c(0L,i)),
             values = x[head(c(0L,i)+1L,-1L)]) 
    } 
    
    overlaps <- function(coldf, cat='rownames'){
        # conditionally remove exactly duplicated values
        if(any(duplicated(coldf[,1]))){
            u <- unique(coldf[,1])
            out <- cbind.data.frame(t(sapply(u, function(i)
                c(paste(rownames(coldf)[coldf[,1]==i],collapse='\n'),i))))
            rownames(out) <- out[,1]
            out[,1] <- NULL
            names(out) <- names(coldf)
            out[,1] <- as.numeric(as.character(out[,1]))
            coldf <- out[order(out[,1]),,drop=FALSE]
        }
        # function to fix overlaps
        overlaps <- which(abs(diff(coldf[,1]))<(binval*h))
        if(length(overlaps)){
            runs <- seqle(overlaps) # use seqle function
            overlaps2 <- mapply(function(i,j) seq(i,length.out=j+1), runs$values, runs$lengths)
            oldlabs <- coldf[-unique(c(overlaps,overlaps+1)),,drop=FALSE]
            newlabs <- data.frame(sapply(overlaps2, function(i) mean(coldf[i,1])))
            names(newlabs) <- names(coldf)
            if(cat=='rownames'){
                rownames(newlabs) <- 
                    sapply(overlaps2, function(i) paste(rownames(coldf)[rev(i)],collapse='\n'))
            } else if(cat=='values'){
                rownames(oldlabs) <- oldlabs[,1]
                rownames(newlabs) <-
                    sapply(overlaps2, function(i) paste(as.character(coldf[rev(i),1]),collapse='\n'))
            }
            return(rbind(oldlabs,newlabs))
        } else
            return(coldf)
    }
    
    # left-side labels
    l <- overlaps(df[order(df[,1]),1,drop=FALSE])
    text(1-offset.lab, l[,1],
         col=col.lab, rownames(l), pos=labpos.left, cex=cex.lab, font=font.lab)
    
    # right-side labels
    r <- overlaps(df[order(df[,ncol(df)]),ncol(df),drop=FALSE])
    text(ncol(df)+offset.lab, r[,1], 
         col=col.lab, rownames(r), pos=labpos.right, cex=cex.lab, font=font.lab)
    
    # numeric value labels
    valslist <- lapply(seq_along(df), function(i) overlaps(df[order(df[,i]),i,drop=FALSE], cat='values'))
    for(i in 1:length(valslist)){
        text(rep(i,nrow(valslist[[i]])), valslist[[i]][,1], rownames(valslist[[i]]),
            col=col.num, cex=cex.num, font=font.num)
    }
    
    # draw lines
    offset.x <- .1 # small offset for `segments`
    col.lines <- rep(col.lines, length.out=nrow(df))
    lty <- rep(lty, length.out=nrow(df))
    lwd <- rep(lwd, length.out=nrow(df))
    for(i in 1:nrow(df)){
        mapply(function(x1,y1,x2,y2,...){
            ysloped <- (y2-y1)*offset.x
            segments(x1+offset.x, if(y1==y2) y1 else (y1+ysloped),
                     x2-offset.x, if(y1==y2) y2 else (y2-ysloped),
                     col=col.lines[i],
                     lty=lty[i],
                     lwd=lwd[i]
                    )},
               1:(length(df[i,])-1), # x1-positions
               df[i,][-length(df[i,])], # y1-positions
               2:(length(df[i,])), # x2-positions
               df[i,][-1] # y2-positions
               )
    }
    
    # optional expression
    if(!is.null(add.after))
        eval(add.after)
    
    # return invisibly
    invisible(NULL)
 }


 # EXAMPLES

 ## Tufte's Cancer Graph (to the correct scale)
 cancer <- read.csv('tufte-cancer-survival-data.csv')
 rownames(cancer) <- cancer[,1]
 cancer <- cancer [,-1]
 pdf('tufte-cancer-survival-plot.pdf',height=16, width=12, family='Palatino')
 slopegraph(cancer, col.line='gray', xlim=c(-.5,5.5), labels=c('5 Year','10 Year','15 Year','20 Year'), binval=2.5)
 dev.off()


 ## Tufte's GNP Graph
 gnp <- read.csv('tufte-gnp-data.csv')
 gnp[,3] <- NULL
 pdf('tufte-gnp-plot.pdf',height=12, width=8, family='Palatino')
 slopegraph(gnp, col.line='gray', labels=c('1970','1979'), 
    main='Current Receipts of Goverment as a Percentage of Gross Domestic Product', binval=3.75)
 dev.off()
diff --git a/tufte-cancer-survival-data.csv b/tufte-cancer-survival-data.csv
diff --git a/tufte-gnp-data.csv b/tufte-gnp-data.csv
 Year1970,Year1979,
 Sweden,46.9,57.4
 Netherlands,44.0,55.8
 Norway,43.5,52.2,
 Britain,40.7,39.0
 France,39.0,43.4
 Germany,37.5,42.9
 Belgium,35.2,43.2
 Canada,35.2,35.8
 Finland,34.9,38.2
 Italy,30.4,35.7
 US,30.3,32.5
 Greece,26.8,30.6
 Switzerland,26.5,33.2
 Spain,22.5,27.1
 Japan,20.7,26.6
	# R Function to Draw Edward Tufte-style Slopeplots

	slopegraph <-
	function(
	df,
	xlim = c(.5,ncol(df)+.5),
	ylim = c(min(df)-diff(range(df))/100,max(df)+diff(range(df))/100),
	main = NULL,
	bty = 'n',
	yaxt = 'n',
	xaxt = 'n',
	xlab = '',
	ylab = '',
	add.before = NULL, # an expression to add something between drawing the blank canvas and adding the plot content (i.e., behind the slopegraph)
	add.after = NULL, # an expression to add something after adding the plot content
	labels = names(df),
	labpos.left = 2,
	labpos.right = 4,
	col.lines = par('fg'),
	col.lab = par('fg'),
	col.num = par('fg'),
	col.xaxt = par('fg'),
	offset.x = .1, # THIS DOESN'T SEEM TO WORK???
	offset.lab = .1,
	binval = 1.5, # threshold at which to force binning of labels and values (multiplier of the height of an "m")
	cex.lab = 1,
	cex.num = 1,
	font.lab = 1,
	font.num = 1,
	lty = par("lty"),
	lwd = par("lwd"),
	mai = NULL,
	...)
	{
	if(ncol(df) < 2)
	stop('`df` must have at least two columns')
	# draw margins
	if(is.null(mai))
	par(mai=c(1, 0, if(is.null(main)) 0 else 1, 0))
	else
	par(mai=mai)

	plot(NA, y=NULL, xlim=xlim, ylim=ylim, main=main,
	bty=bty, yaxt=yaxt, xaxt=xaxt, xlab=xlab, ylab=ylab, ...)
	# optional expression
	if(!is.null(add.before))
	eval(add.before)

	# x-axis
	axis(1, 1:ncol(df), labels=labels, col=col.xaxt, col.ticks=col.xaxt)

	# height and width of 'm' on plotting device
	h <- strheight('m')
	w <- strwidth('m')

	# function for finding consecutive indices
	# from: http://stackoverflow.com/a/16118320/2338862
	seqle <- function(x,incr=1) {
	if(!is.numeric(x)) x <- as.numeric(x)
	n <- length(x)
	y <- x[-1L] != x[-n] + incr
	i <- c(which(y\|is.na(y)),n)
	list(lengths = diff(c(0L,i)),
	values = x[head(c(0L,i)+1L,-1L)])
	}

	overlaps <- function(coldf, cat='rownames'){
	# conditionally remove exactly duplicated values
	if(any(duplicated(coldf[,1]))){
	u <- unique(coldf[,1])
	out <- cbind.data.frame(t(sapply(u, function(i)
	c(paste(rownames(coldf)[coldf[,1]==i],collapse='\n'),i))))
	rownames(out) <- out[,1]
	out[,1] <- NULL
	names(out) <- names(coldf)
	out[,1] <- as.numeric(as.character(out[,1]))
	coldf <- out[order(out[,1]),,drop=FALSE]
	}
	# function to fix overlaps
	overlaps <- which(abs(diff(coldf[,1]))<(binval*h))
	if(length(overlaps)){
	runs <- seqle(overlaps) # use seqle function
	overlaps2 <- mapply(function(i,j) seq(i,length.out=j+1), runs$values, runs$lengths)
	oldlabs <- coldf[-unique(c(overlaps,overlaps+1)),,drop=FALSE]
	newlabs <- data.frame(sapply(overlaps2, function(i) mean(coldf[i,1])))
	names(newlabs) <- names(coldf)
	if(cat=='rownames'){
	rownames(newlabs) <-
	sapply(overlaps2, function(i) paste(rownames(coldf)[rev(i)],collapse='\n'))
	} else if(cat=='values'){
	rownames(oldlabs) <- oldlabs[,1]
	rownames(newlabs) <-
	sapply(overlaps2, function(i) paste(as.character(coldf[rev(i),1]),collapse='\n'))
	}
	return(rbind(oldlabs,newlabs))
	} else
	return(coldf)
	}

	# left-side labels
	l <- overlaps(df[order(df[,1]),1,drop=FALSE])
	text(1-offset.lab, l[,1],
	col=col.lab, rownames(l), pos=labpos.left, cex=cex.lab, font=font.lab)

	# right-side labels
	r <- overlaps(df[order(df[,ncol(df)]),ncol(df),drop=FALSE])
	text(ncol(df)+offset.lab, r[,1],
	col=col.lab, rownames(r), pos=labpos.right, cex=cex.lab, font=font.lab)

	# numeric value labels
	valslist <- lapply(seq_along(df), function(i) overlaps(df[order(df[,i]),i,drop=FALSE], cat='values'))
	for(i in 1:length(valslist)){
	text(rep(i,nrow(valslist[[i]])), valslist[[i]][,1], rownames(valslist[[i]]),
	col=col.num, cex=cex.num, font=font.num)
	}

	# draw lines
	offset.x <- .1 # small offset for `segments`
	col.lines <- rep(col.lines, length.out=nrow(df))
	lty <- rep(lty, length.out=nrow(df))
	lwd <- rep(lwd, length.out=nrow(df))
	for(i in 1:nrow(df)){
	mapply(function(x1,y1,x2,y2,...){
	ysloped <- (y2-y1)*offset.x
	segments(x1+offset.x, if(y1==y2) y1 else (y1+ysloped),
	x2-offset.x, if(y1==y2) y2 else (y2-ysloped),
	col=col.lines[i],
	lty=lty[i],
	lwd=lwd[i]
	)},
	1:(length(df[i,])-1), # x1-positions
	df[i,][-length(df[i,])], # y1-positions
	2:(length(df[i,])), # x2-positions
	df[i,][-1] # y2-positions
	)
	}

	# optional expression
	if(!is.null(add.after))
	eval(add.after)

	# return invisibly
	invisible(NULL)
	}


	# EXAMPLES

	## Tufte's Cancer Graph (to the correct scale)
	cancer <- read.csv('tufte-cancer-survival-data.csv')
	rownames(cancer) <- cancer[,1]
	cancer <- cancer [,-1]
	pdf('tufte-cancer-survival-plot.pdf',height=16, width=12, family='Palatino')
	slopegraph(cancer, col.line='gray', xlim=c(-.5,5.5), labels=c('5 Year','10 Year','15 Year','20 Year'), binval=2.5)
	dev.off()


	## Tufte's GNP Graph
	gnp <- read.csv('tufte-gnp-data.csv')
	gnp[,3] <- NULL
	pdf('tufte-gnp-plot.pdf',height=12, width=8, family='Palatino')
	slopegraph(gnp, col.line='gray', labels=c('1970','1979'),
	main='Current Receipts of Goverment as a Percentage of Gross Domestic Product', binval=3.75)
	dev.off()
	Year 5	Year 10	Year 15	Year 20
Prostate	99.0	95.0	87.0	81.0
Thyroid	96.0	96.0	94.0	95.0
Testis	95.0	94.0	91.0	88.0
Melanomas	89.0	87.0	84.0	83.0
Breast	86.0	78.0	71.0	75.0
Hodgkin's	85.0	80.0	74.0	67.0
Uterus	84.0	83.0	81.0	79.0
Urinary	82.0	76.0	70.0	68.0
Cervix	71.0	64.0	63.0	60.0
Larynx	69.0	57.0	46.0	38.0
Rectum	63.0	55.0	52.0	49.0
Kidney	62.0	54.0	50.0	47.0
Colon	62.0	55.0	54.0	52.0
Non-Hodgkin's	58.0	46.0	38.0	34.0
Oral	57.0	46.0	38.0	33.0
Ovary	55.0	49.0	50.0	50.0
Leukemia	43.0	32.0	30.0	26.0
Brain	32.0	29.0	28.0	26.0
Multiple myeloma	30.0	13.0	7.0	5.0
Stomach	24.0	19.0	19.0	15.0
Lung	15.0	11.0	7.0	6.0
Esophagus	14.0	8.0	8.0	5.0
Liver	8.0	6.0	6.0	8.0
Pancreas	4.0	3.0	3.0	3.0
	Year1970,Year1979,
	Sweden,46.9,57.4
	Netherlands,44.0,55.8
	Norway,43.5,52.2,
	Britain,40.7,39.0
	France,39.0,43.4
	Germany,37.5,42.9
	Belgium,35.2,43.2
	Canada,35.2,35.8
	Finland,34.9,38.2
	Italy,30.4,35.7
	US,30.3,32.5
	Greece,26.8,30.6
	Switzerland,26.5,33.2
	Spain,22.5,27.1
	Japan,20.7,26.6