friendly · November 29, 2016 01:25
diff --git a/plot_r.R b/plot_r.R
 # Taken from: http://janhove.github.io/teaching/2016/11/21/what-correlations-look-like
 # source: http://janhove.github.io/RCode/plot_r.R
 #
 # plot_r
 # 
 # Given a correlation coefficient and a sample size, 
 # this function draws 16 scatterplots
 # whose shapes differ greatly 
 # but all of which are consistent with the correlation coefficient.
 #
 # [email protected]
 # http://janhove.github.io
 #
 # Last change: 19 November 2016
 # Modified by: M. Friendly: add data ellipses

 plot_r <- function(r = 0.6, n = 50, showdata = FALSE, ellipses=TRUE) {
  
  # Function for computing y vector.
  # Largely copy-pasted from
  # http://stats.stackexchange.com/questions/15011/generate-a-random-variable-with-a-defined-correlation-to-an-existing-variable/15040#15040
  
  compute.y <- function(x, y, r) {
    theta <- acos(r)
    X     <- cbind(x, y)                              # matrix
    Xctr  <- scale(X, center = TRUE, scale = FALSE)   # centered columns (mean 0)
    
    Id   <- diag(n)                                   # identity matrix
    Q    <- qr.Q(qr(Xctr[, 1, drop = FALSE]))         # QR-decomposition, just matrix Q
    P    <- tcrossprod(Q)          # = Q Q'           # projection onto space defined by x1
    x2o  <- (Id - P) %*% Xctr[, 2]                    # x2ctr made orthogonal to x1ctr
    Xc2  <- cbind(Xctr[, 1], x2o)                     # bind to matrix
    Y    <- Xc2 %*% diag(1 / sqrt(colSums(Xc2 ^ 2)))  # scale columns to length 1
    
    y <- Y[, 2] + (1 / tan(theta)) * Y[, 1]     # final new vector
    return(y)
  }
  
 	ell <- function(center, shape, radius, segments=60) {
 		angles <- (0:segments)*2*pi/segments
 		circle <- radius * cbind( cos(angles), sin(angles))
 		Q <- chol(shape, pivot=TRUE)
 		order <- order(attr(Q, "pivot"))
 		t( c(center) + t( circle %*% Q[,order]))
 	}

 	ellipse <- function(x, y, level=0.5, lwd=1, col="red") {
 		center <- colMeans( cbind(x,y) )
 		shape <- var( cbind(x,y)  ) 
 		radius <- sqrt( 2 * qf(level, 2, length(x)-1) )
 		ellip <- ell( center, shape, radius)
 		lines(ellip, lwd=lwd, col=col)	
 	}
 	
 	  
  # Graph settings
  op <- par(no.readonly = TRUE)
  par(
    las = 1,
    mfrow = c(4, 4),
    xaxt = "n",
    yaxt = "n",
    mar = c(1, 1.5, 2, 1.5),
    oma = c(0, 0, 3.5, 0),
    cex.main = 1.1
  )
  
  # Case 1: Textbook case with normal x distribution and normally distributed residuals
  x <- rnorm(n)           # specify x distribution
  y <- rnorm(n)           # specify y distribution
  y <- compute.y(x, y, r) # recompute y to fit with x and r
  plot(                   # plot
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(1) Normal x, normal residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df1 <- data.frame(x, y) # store x and y to data frame
 
  
  # Case 2: Textbook case with uniform x distribution and normally distributed residuals
  x <- runif(n, 0, 1)
  y <- rnorm(n)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(2) Uniform x, normal residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df2 <- data.frame(x, y)
  
  # Case 3: Skewed x distribution (positive): A couple of outliers could have high leverage.
  x <- rlnorm(n, 5)
  y <- rnorm(n)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(3) +-skewed x, normal residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df3 <- data.frame(x, y)
  
  # Case 4: Skewed x distribution (negative): Same as 4.
  x <- rlnorm(n, 5) * -1 + 5000
  y <- rnorm(n)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(4) --skewed x, normal residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df4 <- data.frame(x, y)
  
  # Case 5: Skewed residual distribution (positive), similar to 3-4
  x <- rnorm(n)
  y <- rlnorm(n, 5)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(5) Normal x, +-skewed residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df5 <- data.frame(x, y)
  
  # Case 6: Skewed residual distribution (negative), same as 5
  x <- rnorm(n)
  y <- rlnorm(n, 5)
  y <- y * -1
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(6) Normal x, --skewed residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df6 <- data.frame(x, y)
  
  # Case 7: Variance increases with x. Correlation coefficient under/oversells predictive power depending on scenario.
  # Also, significance may be affected.
  x <- rnorm(n)
  x <- sort(x, decreasing = FALSE) + abs(min(x))
  variance <- 10*x # spread increases with x
  y <- rnorm(n, 0, sqrt(variance)) # error term has larger variance with x
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(7) Increasing spread", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df7 <- data.frame(x, y)
  
  # Case 8: Same as 7, but variance decreases with x
  x <- rnorm(n)
  x <- x + abs(min(x))
  a <- 10 * (max(x)) # calculate intercept of x-standard deviation function
  variance <- a - 10*x
  y <- rnorm(n, 0, sqrt(variance))
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(8) Decreasing spread", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df8 <- data.frame(x, y)
  
  # Case 9: Nonlinearity (quadratic trend).
  x <- rnorm(n)
  y <- x ^ 2 + rnorm(n, sd = 0.2)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(9) Quadratic trend", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df9 <- data.frame(x, y)
  
  # Case 10: Sinusoid relationship
  x <- runif(n,-2 * pi, 2 * pi)
  y <- sin(x) + rnorm(n, sd = 0.2)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(10) Sinusoid relationship", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df10 <- data.frame(x, y)
  
  # Case 11: A single positive outlier can skew the results.
  x <- rnorm(n - 1)
  x <- c(sort(x), 10)
  y <- c(rnorm(n - 1), 15)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(11) A single positive outlier", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df11 <- data.frame(x, y)
  
  # Regression without outlier
  group1 <- x[1:(n - 1)]
  y1 <- y[1:(n - 1)]
  
  xseg1 <- seq(
    from = min(group1),
    to = max(group1),
    length.out = 50
  )
  pred1 <- predict(lm(y1 ~ group1), newdata = data.frame(group1 = xseg1))
  lines(xseg1, pred1, lty = 2, col = "firebrick2")
  
  # Case 12: A single negative outlier - same as 11 but the other way.
  x <- rnorm(n - 1)
  x <- c(sort(x), 10)
  y <- c(rnorm(n - 1),-15)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(12) A single negative outlier", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df12 <- data.frame(x, y)
  
  # Regression line without outlier
  group1 <- x[1:(n - 1)]
  y1 <- y[1:(n - 1)]
  
  xseg1 <- seq(
    from = min(group1),
    to = max(group1),
    length.out = 50
  )
  pred1 <- predict(lm(y1 ~ group1), newdata = data.frame(group1 = xseg1))
  lines(xseg1, pred1, lty = 2, col = "firebrick2")
  
  # Case 13: Bimodal y: Actually two groups (e.g. control and experimental). Better to model them in multiple regression model.
  x <- rnorm(n)
  y <- c(rnorm(floor(n / 2), mean = -3), rnorm(ceiling(n / 2), 3))
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(13) Bimodal residuals", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df13 <- data.frame(x, y)
  
  # Case 14: Two groups
  # Create two groups within each of which
  # the residual XY relationship is contrary to the overall relationship.
  # This will often produce examples of Simpson's paradox.
  x <- c(rnorm(floor(n / 2),-3), rnorm(ceiling(n / 2), 3))
  y <- -sign(r)* 10 * x + rnorm(n, sd = 0.5) + c(rep(0, floor(n / 2)), rep(3 * sign(r), ceiling(n / 2)))   
  
  # Symbols / colours for each group
  colour <- c(rep("blue", floor(n / 2)), rep("red", ceiling(n / 2)))
  symbol <- c(rep(1, floor(n / 2)), rep(4, ceiling(n / 2)))
  
  y <- compute.y(x, y, r)
  
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    pch = symbol,
    main = paste("(14) Two groups", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df14 <- data.frame(x, y)
  
  # Also add lines of regression within each group
  
  # Divide up y variable
  group1 <- x[colour == "blue"]
  group2 <- x[colour == "red"]
  y1 <- y[colour == "blue"]
  y2 <- y[colour == "red"]
  
  xseg1 <- seq(
    from = min(group1),
    to = max(group1),
    length.out = 50
  )
  pred1 <- predict(lm(y1 ~ group1), newdata = data.frame(group1 = xseg1))
  lines(xseg1, pred1, lty = 2, col = "firebrick2")
  
  xseg2 <- seq(
    from = min(group2),
    to = max(group2),
    length.out = 50
  )
  pred2 <- predict(lm(y2 ~ group2), newdata = data.frame(group2 = xseg2))
  lines(xseg2, pred2, lty = 2, col = "firebrick2")
  
  # Case 15: Sampling at the extremes - this would overestimate correlation coefficient based on all data.
  x <- sort(rnorm(6 * n, sd = 3))
  x1 <- x[1:floor((n / 2))]
  x2 <- x[floor(5.5 * n + 1):(6 * n)]
  x <- c(x1, x2)
  y <- rnorm(n)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(15) Sampling at the extremes", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df15 <- data.frame(x, y)
  
  # Case 16: Lumpy x/y data, as one would observe for questionnaire items
  x <- sample(1:5, n, replace = TRUE, prob = sample(c(5, 4, 3, 2, 1))) # unequal proportions for each answer just for kicks
  y <- sample(1:7, n, replace = TRUE)
  y <- compute.y(x, y, r)
  plot(
    x,
    y,
    ylab = "",
    xlab = "",
    main = paste("(16) Coarse data", sep = "")
  )
  abline(lm(y ~ x), col = "dodgerblue3")
  if (ellipses) ellipse(x, y)
  df16 <- data.frame(x, y)
  
  # Overall title
  title(
    paste("All correlations: r(", n, ") = ", r, sep = ""),
    outer = TRUE,
    cex.main = 2
  )
  par(op)
  
  # Define data frame with numeric output
  df <- list(plot = 1:16, data = list(df1, df2, df3, df4,
                                     df5, df6, df7, df8,
                                     df9, df10, df11, df12,
                                     df13, df14, df15, df16))
  df <- structure(df, class = c("tbl_df", "data.frame"), row.names = 1:16)
  
  # Show numeric output depending on setting
  if(!(showdata %in% c(NULL, TRUE, FALSE, 1:16, "all"))) {
    warning("'showdata' must be TRUE, FALSE, 'all', or a number between 1 and 16.")
  } else if(showdata %in% c(TRUE, "all")) {
    return(df)
  } else if(showdata %in% 1:16) {
    return(df$data[[showdata]])
  }
 }
	# Taken from: http://janhove.github.io/teaching/2016/11/21/what-correlations-look-like
	# source: http://janhove.github.io/RCode/plot_r.R
	#
	# plot_r
	#
	# Given a correlation coefficient and a sample size,
	# this function draws 16 scatterplots
	# whose shapes differ greatly
	# but all of which are consistent with the correlation coefficient.
	#
	# [email protected]
	# http://janhove.github.io
	#
	# Last change: 19 November 2016
	# Modified by: M. Friendly: add data ellipses

	plot_r <- function(r = 0.6, n = 50, showdata = FALSE, ellipses=TRUE) {

	# Function for computing y vector.
	# Largely copy-pasted from
	# http://stats.stackexchange.com/questions/15011/generate-a-random-variable-with-a-defined-correlation-to-an-existing-variable/15040#15040

	compute.y <- function(x, y, r) {
	theta <- acos(r)
	X <- cbind(x, y) # matrix
	Xctr <- scale(X, center = TRUE, scale = FALSE) # centered columns (mean 0)

	Id <- diag(n) # identity matrix
	Q <- qr.Q(qr(Xctr[, 1, drop = FALSE])) # QR-decomposition, just matrix Q
	P <- tcrossprod(Q) # = Q Q' # projection onto space defined by x1
	x2o <- (Id - P) %*% Xctr[, 2] # x2ctr made orthogonal to x1ctr
	Xc2 <- cbind(Xctr[, 1], x2o) # bind to matrix
	Y <- Xc2 %*% diag(1 / sqrt(colSums(Xc2 ^ 2))) # scale columns to length 1

	y <- Y[, 2] + (1 / tan(theta)) * Y[, 1] # final new vector
	return(y)
	}

	ell <- function(center, shape, radius, segments=60) {
	angles <- (0:segments)2pi/segments
	circle <- radius * cbind( cos(angles), sin(angles))
	Q <- chol(shape, pivot=TRUE)
	order <- order(attr(Q, "pivot"))
	t( c(center) + t( circle %*% Q[,order]))
	}

	ellipse <- function(x, y, level=0.5, lwd=1, col="red") {
	center <- colMeans( cbind(x,y) )
	shape <- var( cbind(x,y) )
	radius <- sqrt( 2 * qf(level, 2, length(x)-1) )
	ellip <- ell( center, shape, radius)
	lines(ellip, lwd=lwd, col=col)
	}


	# Graph settings
	op <- par(no.readonly = TRUE)
	par(
	las = 1,
	mfrow = c(4, 4),
	xaxt = "n",
	yaxt = "n",
	mar = c(1, 1.5, 2, 1.5),
	oma = c(0, 0, 3.5, 0),
	cex.main = 1.1
	)

	# Case 1: Textbook case with normal x distribution and normally distributed residuals
	x <- rnorm(n) # specify x distribution
	y <- rnorm(n) # specify y distribution
	y <- compute.y(x, y, r) # recompute y to fit with x and r
	plot( # plot
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(1) Normal x, normal residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df1 <- data.frame(x, y) # store x and y to data frame


	# Case 2: Textbook case with uniform x distribution and normally distributed residuals
	x <- runif(n, 0, 1)
	y <- rnorm(n)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(2) Uniform x, normal residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df2 <- data.frame(x, y)

	# Case 3: Skewed x distribution (positive): A couple of outliers could have high leverage.
	x <- rlnorm(n, 5)
	y <- rnorm(n)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(3) +-skewed x, normal residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df3 <- data.frame(x, y)

	# Case 4: Skewed x distribution (negative): Same as 4.
	x <- rlnorm(n, 5) * -1 + 5000
	y <- rnorm(n)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(4) --skewed x, normal residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df4 <- data.frame(x, y)

	# Case 5: Skewed residual distribution (positive), similar to 3-4
	x <- rnorm(n)
	y <- rlnorm(n, 5)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(5) Normal x, +-skewed residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df5 <- data.frame(x, y)

	# Case 6: Skewed residual distribution (negative), same as 5
	x <- rnorm(n)
	y <- rlnorm(n, 5)
	y <- y * -1
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(6) Normal x, --skewed residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df6 <- data.frame(x, y)

	# Case 7: Variance increases with x. Correlation coefficient under/oversells predictive power depending on scenario.
	# Also, significance may be affected.
	x <- rnorm(n)
	x <- sort(x, decreasing = FALSE) + abs(min(x))
	variance <- 10*x # spread increases with x
	y <- rnorm(n, 0, sqrt(variance)) # error term has larger variance with x
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(7) Increasing spread", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df7 <- data.frame(x, y)

	# Case 8: Same as 7, but variance decreases with x
	x <- rnorm(n)
	x <- x + abs(min(x))
	a <- 10 * (max(x)) # calculate intercept of x-standard deviation function
	variance <- a - 10*x
	y <- rnorm(n, 0, sqrt(variance))
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(8) Decreasing spread", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df8 <- data.frame(x, y)

	# Case 9: Nonlinearity (quadratic trend).
	x <- rnorm(n)
	y <- x ^ 2 + rnorm(n, sd = 0.2)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(9) Quadratic trend", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df9 <- data.frame(x, y)

	# Case 10: Sinusoid relationship
	x <- runif(n,-2 * pi, 2 * pi)
	y <- sin(x) + rnorm(n, sd = 0.2)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(10) Sinusoid relationship", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df10 <- data.frame(x, y)

	# Case 11: A single positive outlier can skew the results.
	x <- rnorm(n - 1)
	x <- c(sort(x), 10)
	y <- c(rnorm(n - 1), 15)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(11) A single positive outlier", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df11 <- data.frame(x, y)

	# Regression without outlier
	group1 <- x[1:(n - 1)]
	y1 <- y[1:(n - 1)]

	xseg1 <- seq(
	from = min(group1),
	to = max(group1),
	length.out = 50
	)
	pred1 <- predict(lm(y1 ~ group1), newdata = data.frame(group1 = xseg1))
	lines(xseg1, pred1, lty = 2, col = "firebrick2")

	# Case 12: A single negative outlier - same as 11 but the other way.
	x <- rnorm(n - 1)
	x <- c(sort(x), 10)
	y <- c(rnorm(n - 1),-15)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(12) A single negative outlier", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df12 <- data.frame(x, y)

	# Regression line without outlier
	group1 <- x[1:(n - 1)]
	y1 <- y[1:(n - 1)]

	xseg1 <- seq(
	from = min(group1),
	to = max(group1),
	length.out = 50
	)
	pred1 <- predict(lm(y1 ~ group1), newdata = data.frame(group1 = xseg1))
	lines(xseg1, pred1, lty = 2, col = "firebrick2")

	# Case 13: Bimodal y: Actually two groups (e.g. control and experimental). Better to model them in multiple regression model.
	x <- rnorm(n)
	y <- c(rnorm(floor(n / 2), mean = -3), rnorm(ceiling(n / 2), 3))
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(13) Bimodal residuals", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df13 <- data.frame(x, y)

	# Case 14: Two groups
	# Create two groups within each of which
	# the residual XY relationship is contrary to the overall relationship.
	# This will often produce examples of Simpson's paradox.
	x <- c(rnorm(floor(n / 2),-3), rnorm(ceiling(n / 2), 3))
	y <- -sign(r)* 10 * x + rnorm(n, sd = 0.5) + c(rep(0, floor(n / 2)), rep(3 * sign(r), ceiling(n / 2)))

	# Symbols / colours for each group
	colour <- c(rep("blue", floor(n / 2)), rep("red", ceiling(n / 2)))
	symbol <- c(rep(1, floor(n / 2)), rep(4, ceiling(n / 2)))

	y <- compute.y(x, y, r)

	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	pch = symbol,
	main = paste("(14) Two groups", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df14 <- data.frame(x, y)

	# Also add lines of regression within each group

	# Divide up y variable
	group1 <- x[colour == "blue"]
	group2 <- x[colour == "red"]
	y1 <- y[colour == "blue"]
	y2 <- y[colour == "red"]

	xseg1 <- seq(
	from = min(group1),
	to = max(group1),
	length.out = 50
	)
	pred1 <- predict(lm(y1 ~ group1), newdata = data.frame(group1 = xseg1))
	lines(xseg1, pred1, lty = 2, col = "firebrick2")

	xseg2 <- seq(
	from = min(group2),
	to = max(group2),
	length.out = 50
	)
	pred2 <- predict(lm(y2 ~ group2), newdata = data.frame(group2 = xseg2))
	lines(xseg2, pred2, lty = 2, col = "firebrick2")

	# Case 15: Sampling at the extremes - this would overestimate correlation coefficient based on all data.
	x <- sort(rnorm(6 * n, sd = 3))
	x1 <- x[1:floor((n / 2))]
	x2 <- x[floor(5.5 * n + 1):(6 * n)]
	x <- c(x1, x2)
	y <- rnorm(n)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(15) Sampling at the extremes", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df15 <- data.frame(x, y)

	# Case 16: Lumpy x/y data, as one would observe for questionnaire items
	x <- sample(1:5, n, replace = TRUE, prob = sample(c(5, 4, 3, 2, 1))) # unequal proportions for each answer just for kicks
	y <- sample(1:7, n, replace = TRUE)
	y <- compute.y(x, y, r)
	plot(
	x,
	y,
	ylab = "",
	xlab = "",
	main = paste("(16) Coarse data", sep = "")
	)
	abline(lm(y ~ x), col = "dodgerblue3")
	if (ellipses) ellipse(x, y)
	df16 <- data.frame(x, y)

	# Overall title
	title(
	paste("All correlations: r(", n, ") = ", r, sep = ""),
	outer = TRUE,
	cex.main = 2
	)
	par(op)

	# Define data frame with numeric output
	df <- list(plot = 1:16, data = list(df1, df2, df3, df4,
	df5, df6, df7, df8,
	df9, df10, df11, df12,
	df13, df14, df15, df16))
	df <- structure(df, class = c("tbl_df", "data.frame"), row.names = 1:16)

	# Show numeric output depending on setting
	if(!(showdata %in% c(NULL, TRUE, FALSE, 1:16, "all"))) {
	warning("'showdata' must be TRUE, FALSE, 'all', or a number between 1 and 16.")
	} else if(showdata %in% c(TRUE, "all")) {
	return(df)
	} else if(showdata %in% 1:16) {
	return(df$data[[showdata]])
	}
	}