Created
July 25, 2012 19:50
-
-
Save stevenworthington/5205d0dbf20ba4c9d2b8 to your computer and use it in GitHub Desktop.
Permutation test for group differences using 3D coordinate data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# =============================================================================== | |
# Name : centroid_perm | |
# Original author : Steven Worthington ([email protected]) | |
# Affiliation : IQSS, Harvard University | |
# Date (mm/dd/yyyy) : 06/14/2012 | |
# Version : v0.8 | |
# Aim : exact permutation test for group differences | |
# =============================================================================== | |
# Goal: | |
# To demonstrate that the points (i.e., the trabecular orientation) | |
# of each species are occupying (or not) a different area on the sphere. | |
# values in both datasets are in radians (required for the Haversine formula) | |
# A = chimps, B = humans, C = fossils | |
# dataset #1 | |
pts1 <- data.frame( | |
Long = c(1.0727,1.5966,0.8448,1.1049,0.7834,0.8565,1.8754,1.4727,1.6216, | |
1.5203,0.7419,0.3658,1.6343,1.3046,1.5075), | |
Lat = c(1.3990,1.2087,1.4028,1.3570,1.3266,1.3806,1.4814,1.3943,1.4372, | |
1.3573,1.4939,1.4475,1.4156,1.2779,1.4175), | |
lvl = c( rep('A', 6), rep('B', 6), rep('C', 3) ) | |
) | |
# dataset #1 with humans and fossils lumped together | |
pts11 <- data.frame( | |
Long = c(1.0727,1.5966,0.8448,1.1049,0.7834,0.8565,1.8754,1.4727,1.6216, | |
1.5203,0.7419,0.3658,1.6343,1.3046,1.5075), | |
Lat = c(1.3990,1.2087,1.4028,1.3570,1.3266,1.3806,1.4814,1.3943,1.4372, | |
1.3573,1.4939,1.4475,1.4156,1.2779,1.4175), | |
lvl = c( rep('A', 6), rep('B', 6), rep('B', 3) ) | |
) | |
# dataset #2 | |
pts2 <- data.frame( | |
Long = c(2.5952,1.3934,1.8102,1.7794,5.0217,1.4840,6.1798,1.1547,1.6422, | |
5.1650,0.4375,0.5192,0.8254,6.027,1.9699), | |
Lat = c(1.4742,1.2668,1.2611,1.4101,1.5240,1.3420,1.5194,1.4619,1.5554, | |
1.5452,1.4370,1.4864,1.5296,1.4265,1.4940), | |
lvl = c( rep('A', 6), rep('B', 6), rep('C', 3)) | |
) | |
# dataset #2 with humans and fossils lumped together | |
pts22 <- data.frame( | |
Long = c(2.5952,1.3934,1.8102,1.7794,5.0217,1.4840,6.1798,1.1547,1.6422, | |
5.1650,0.4375,0.5192,0.8254,6.027,1.9699), | |
Lat = c(1.4742,1.2668,1.2611,1.4101,1.5240,1.3420,1.5194,1.4619,1.5554, | |
1.5452,1.4370,1.4864,1.5296,1.4265,1.4940), | |
lvl = c( rep('A', 6), rep('B', 6), rep('B', 3)) | |
) | |
# utility functions | |
# --------------------------------------------------- | |
# rad <- function(x) x * (pi / 180) # convert degrees into radians | |
# deg <- function(x) x * (180 / pi) # convert radians into degrees | |
# --------------------------------------------------- | |
long_antepodes <- function(points) { | |
# convert longitudinal radians larger than pi to their antepodal values | |
points$Long <- sapply(points$Long, function(x) { if (x > pi) { x - pi } else { x } }) | |
return(points) | |
} | |
# --------------------------------------------------- | |
LatLong2cartesian <- function(long, lat) { | |
# convert lat/long to cartesian (x,y,z) coordinates | |
r <- 1 | |
x <- r * cos(lat) * cos(long) | |
y <- r * cos(lat) * sin(long) | |
z <- r * sin(lat) | |
return(data.frame(x = x, y = y, z = z)) | |
} | |
# --------------------------------------------------- | |
xyz_surface <- function(coords_ave) { | |
# project centroids onto the surface of the sphere | |
x_ave <- coords_ave[1] | |
y_ave <- coords_ave[2] | |
z_ave <- coords_ave[3] | |
L <- sqrt( x_ave^2 + y_ave^2 + z_ave^2) | |
x <- x_ave / L | |
y <- y_ave / L | |
z <- z_ave / L | |
return(data.frame(x = x, y = y, z = z)) | |
} | |
# --------------------------------------------------- | |
cartesian2LatLong <- function(coordsSum) { | |
# convert cartesian (x,y,z) coordinates to lat/long | |
X <- coordsSum[1] | |
Y <- coordsSum[2] | |
Z <- coordsSum[3] | |
Long <- atan2(Y, X) | |
Hyp <- sqrt(X^2 + Y^2) | |
Lat <- atan2(Z, Hyp) | |
# r <- sqrt(X^2 + Y^2 + Z^2) | |
# Lat2 <- asin(Z / r) | |
return(c(Long, Lat)) | |
} | |
# --------------------------------------------------- | |
hf <- function(longs, lats, cent_long, cent_lat) { | |
# function to calculate distance between 2 points on a sphere | |
r <- 1 # sphere radius | |
delta.long <- (longs - cent_long) | |
delta.lat <- (lats - cent_lat) | |
b <- sin(delta.lat/2)^2 + cos(lats) * cos(cent_lat) * sin(delta.long/2)^2 | |
p <- 2 * asin(pmin(1, sqrt(b))) | |
dis <- r * p | |
return(dis) | |
} | |
# --------------------------------------------------- | |
upermn <- function(x) { | |
# function to calculate number of unique permutations | |
duplicates <- as.numeric(table(x)) | |
factorial(length(x)) / prod(factorial(duplicates)) | |
} | |
# --------------------------------------------------- | |
uniqueperm <- function(group1, group2, group1n, group2n) { | |
# function to calculate all unique permutations of pts$lvl | |
totaln <- sum(group1n, group2n) | |
x <- rep(group1, totaln) # vector of pts$lvl for group 1 | |
position <- combn(totaln, group2n) # all possible ways to place group2 lvls among group1 lvls | |
perms <- as.data.frame( apply(position, 2, function(p) replace(x, p, group2)) ) # all perms | |
return(perms) # returns a df of factors, with each column a unique permutation of lvls | |
} | |
# --------------------------------------------------- | |
dens <- function (nullDist, obs) { | |
# function to plot kernal density | |
d <- density(nullDist, na.rm = TRUE) | |
r <- dist( range(nullDist) ) | |
plot(d, type = "n", main = "Null distribution and observed value", | |
xlim = c( min(nullDist)-(r/10), | |
pmax( obs+(obs/10), max(nullDist)+(r/10) ) ) ) | |
polygon(d, col = "cornflowerblue", border = NA) | |
rug(nullDist, col = "darkred", lwd = 0.7) | |
} | |
# --------------------------------------------------- | |
# ----------------------------------------------------------------------- | |
latlong_centroids <- function(LL, print = FALSE){ | |
# function to calculate centroids of lat/long coordinates | |
# convert longitudinal radians larger than pi to their antepodal values | |
LL <- long_antepodes(LL) | |
# rows are x,y,z, and each column corresponds to a row in pts | |
coords <- mapply(LatLong2cartesian, long = LL$Long, lat = LL$Lat) | |
coords <- matrix(unlist(coords), nrow = nrow(coords), ncol = ncol(coords)) | |
if (print == TRUE) { | |
cat("\n") | |
cat("x, y, z Cartesian coordinates:", "\n") | |
print(coords) | |
} | |
# calculate centroids as row means | |
coordsCentroids <- data.frame(A = rowMeans(coords[, LL$lvl == "A"]), | |
B = rowMeans(coords[, LL$lvl == "B"]), | |
C = rowMeans(coords[, LL$lvl == "C"]) ) | |
if (print == TRUE) { | |
cat("\n") | |
cat("x, y, z Cartesian centroids within sphere:", "\n") | |
print(coordsCentroids) | |
} | |
surfaceCentroids <- sapply(coordsCentroids, xyz_surface) | |
surfaceCentroids <- as.data.frame( matrix(unlist(surfaceCentroids), | |
nrow = nrow(surfaceCentroids), ncol = ncol(surfaceCentroids)) ) | |
if (print == TRUE) { | |
cat("\n") | |
cat("x, y, z Cartesian centroids on sphere surface:", "\n") | |
print(surfaceCentroids) | |
} | |
centroids <- t( sapply(surfaceCentroids, cartesian2LatLong) ) | |
centroids <- data.frame(Long = centroids[, 1], Lat = centroids[, 2], | |
lvl = colnames(coordsCentroids)) | |
if (print == TRUE) { | |
cat("\n") | |
cat("Lat/Long centroids:", "\n") | |
print(centroids) | |
} | |
return(centroids) | |
} | |
# ----------------------------------------------------------------------- | |
# ----------------------------------------------------------------------- | |
centroid_perm <- function(pt, comparison = c("AB", "AC", "BC")) { | |
# function for performing permutation test on group centroid differences | |
if (!is.data.frame(pt)) | |
stop(deparse(substitute(pt)), " is not a data frame.") | |
if (any(pt$Long | pt$Lat) > pi*2) | |
stop(deparse(substitute(pt)), " does not contain Lat/Long in radians.") | |
comparison <- match.arg(comparison) | |
if (comparison == "AB") { | |
points <- pt[pt$lvl %in% c("A", "B"), ] # subset pt by groups of interest | |
calc.diff <- function(c){ | |
centDistAB <- hf(c$Long[c$lvl == "A"], c$Lat[c$lvl == "A"], | |
c$Long[c$lvl == "B"], c$Lat[c$lvl == "B"]) | |
return(centDistAB) | |
} | |
} | |
else if (comparison == "AC") { | |
points <- pt[pt$lvl %in% c("A", "C"), ] # subset pt by groups of interest | |
calc.diff <- function(c){ | |
centDistAC <- hf(c$Long[c$lvl == "A"], c$Lat[c$lvl == "A"], | |
c$Long[c$lvl == "C"], c$Lat[c$lvl == "C"]) | |
return(centDistAC) | |
} | |
} | |
else if (comparison == "BC") { | |
points <- pt[pt$lvl %in% c("B", "C"), ] # subset pt by groups of interest | |
calc.diff <- function(c){ | |
centDistBC <- hf(c$Long[c$lvl == "B"], c$Lat[c$lvl == "B"], | |
c$Long[c$lvl == "C"], c$Lat[c$lvl == "C"]) | |
return(centDistBC) | |
} | |
} | |
# for observed (unpermuted) groups | |
centroids <- latlong_centroids(points, print = TRUE) # calculate Lat/Long centroids | |
t.obs <- calc.diff(centroids) # calculate distance between centroids | |
# calculate all unique permutations for null distribution | |
lvl_points <- droplevels(points) # drop redundent factor level | |
perms <- uniqueperm(levels(lvl_points$lvl)[1], levels(lvl_points$lvl)[2], | |
table(lvl_points$lvl)[1], table(lvl_points$lvl)[2]) | |
# permute group assignments accross coordinates | |
perm.vals <- vector(mode = "numeric", length = ncol(perms)) | |
for (i in seq_along(perms)){ | |
points$lvl <- perms[, i] | |
centroids <- latlong_centroids(points) # recalculate centroids for each group assignment | |
perm.vals[i] <- calc.diff(centroids) | |
} | |
perm.ecdf <- ecdf(perm.vals) # cumulative distribution function of null hypothesis | |
p.val <- 1 - perm.ecdf(t.obs) # p-value | |
# pdf plot | |
dens(perm.vals, t.obs) | |
arrows(t.obs, 4, t.obs, 0, col = "darkred", lwd = 1.5) | |
text(t.obs, 6.5, labels = paste("Data set:", deparse(substitute(pt))), cex = 0.8) | |
text(t.obs, 6, labels = paste("Comparison:", comparison), cex = 0.8) | |
text(t.obs, 5.5, labels = paste("Permutations:", upermn(lvl_points$lvl)), cex = 0.8) | |
text(t.obs, 5, labels = paste("P-value =", round(p.val, 4)), cex = 0.8) | |
# the p-value and other info | |
cat("\n") | |
cat("Data set:", deparse(substitute(pt)), "\n") | |
cat("Comparison:", comparison, "\n") | |
cat("Permutations:", upermn(lvl_points$lvl), "\n") | |
cat("P-value =", p.val, "\n") | |
return(p.val) | |
} | |
# ------------------------------------------------------------------ | |
# ------------------------------------------------------------------ | |
# function calls: | |
pts1_ab <- centroid_perm(pts1, "AB") | |
pts1_ac <- centroid_perm(pts1, "AC") | |
pts1_bc <- centroid_perm(pts1, "BC") | |
# | |
pts2_ab <- centroid_perm(pts2, "AB") | |
pts2_ac <- centroid_perm(pts2, "AC") | |
pts2_bc <- centroid_perm(pts2, "BC") | |
# | |
pts11_ab <- centroid_perm(pts11, "AB") # humans and fossils merged | |
pts22_ab <- centroid_perm(pts22, "AB") # humans and fossils merged | |
p_values <- data.frame(comparison = c("AB", "AC", "BC", "new_AB"), | |
pts1 = c(pts1_ab, pts1_ac, pts1_bc, pts11_ab), | |
pts2 = c(pts2_ab, pts2_ac, pts2_bc, pts22_ab)) | |
p_values # ~2 minutes | |
# adjust p-values for multiple comparisons | |
p_values$pts1_adj <- p.adjust(p = p_values$pts1, method = "holm") # sequential Bonferroni | |
p_values$pts2_adj <- p.adjust(p = p_values$pts2, method = "holm") # sequential Bonferroni | |
# ------------------------------------------------------------------ | |
# ------------------------------------------------------------------ | |
dens_bw<- function (nullDist, obs, p) { | |
# function to plot kernal density in black and white | |
d <- density(nullDist, na.rm = TRUE) | |
r <- dist( range(nullDist) ) | |
plot(d, main = "Null distribution and observed value", | |
xlim = c( min(nullDist)-(r/10), | |
pmax( obs+(obs/10), max(nullDist)+(r/10) ) ) ) | |
rug(nullDist, col = "gray30", lwd = 0.1) | |
arrows(obs, 4, obs, 0, lwd = 1.2) | |
text(obs, 5, labels = paste("P-value =", round(p, 4)), cex = 0.8) | |
} | |
# need to run the analysis outside of the centroid_perm() function | |
# to get values for perm.vals and t.obs | |
dens_bw(nullDist = perm.vals, obs = t.obs, p = 0.0043956) | |
# ------------------------------------------------------------------ | |
# =================================================================== | |
# Plotting the points in 3D | |
# =================================================================== | |
# data for pts2 | |
# ------------------------------------------ | |
# Cartesian coordinates | |
coords <- structure(c(-0.0824040452325179, 0.05011425118923, 0.995338201395415, | |
0.0528229766618636, 0.294638046695378, 0.954147868297211, -0.0722679466402008, | |
0.296077263247749, 0.952426164107293, -0.033136207288388, 0.15653684561824, | |
0.987116106509479, -0.01423971677629, 0.0445592704815626, 0.998905251703224, | |
0.0196611658175792, 0.22595158795114, 0.973940100037498, -0.0510993918414077, | |
0.00530182936886244, 0.99867949951863, 0.0439281880452844, 0.0994078687663381, | |
0.994076651935047, -0.00109837695599064, 0.0153564877827651, | |
0.999881478901895, -0.0111924388220814, 0.0230164764392326, 0.999672431912342, | |
0.120833273964134, 0.0565173503067401, 0.991062616093158, 0.0731873209047366, | |
0.0418265579026938, 0.996440743402637, 0.0279340451173512, 0.030263287830029, | |
0.99915155133398, -0.139103121978486, 0.0364368128034391, 0.989607336335259, | |
-0.0298131604990723, 0.0706913445215261, 0.997052611084689), .Dim = c(3L, | |
15L)) | |
# Cartesian centroids | |
coordsCentroids <- structure(list(A = c(-0.0215939622429923, 0.177979544197217, | |
0.97697894867502), B = c(0.0290930958824458, 0.0402377617611054, | |
0.996635570293951), C = c(-0.0469940791200691, 0.0457971483849981, | |
0.995270499584642)), .Names = c("A", "B", "C"), row.names = c(NA, | |
-3L), class = "data.frame") | |
# Cartesian centroids projected to sphere surface | |
surfaceCentroids <- structure(list(V1 = c(-0.0217397719917556, 0.179181322376338, | |
0.983575841521747), V2 = c(0.0291551465469009, 0.0403235821173697, | |
0.998761225796763), V3 = c(-0.0471151038906367, 0.0459150906764468, | |
0.997833639157123)), .Names = c("V1", "V2", "V3"), row.names = c(NA, | |
-3L), class = "data.frame") | |
# lat/long coordinates for dataset #2 | |
pts2 <- data.frame( | |
Long = c(2.5952,1.3934,1.8102,1.7794,5.0217,1.4840,6.1798,1.1547,1.6422, | |
5.1650,0.4375,0.5192,0.8254,6.027,1.9699), | |
Lat = c(1.4742,1.2668,1.2611,1.4101,1.5240,1.3420,1.5194,1.4619,1.5554, | |
1.5452,1.4370,1.4864,1.5296,1.4265,1.4940), | |
lvl = c( rep('A', 6), rep('B', 6), rep('C', 3)) | |
) | |
# lat/long centroids | |
LatLong_centroids <- structure(list(Long = c(1.69153452750487, 0.944780830082602, | |
2.36909295471935), Lat = c(1.38930629806737, 1.52101620931788, | |
1.50496102499399), lvl = structure(1:3, .Label = c("A", "B", | |
"C"), class = "factor")), .Names = c("Long", "Lat", "lvl"), row.names = c("V1", | |
"V2", "V3"), class = "data.frame") | |
# 3D plots | |
# ------------------------------------------ | |
library(rgl) | |
# rgl.open() | |
# Cartesian coordinates | |
open3d() | |
plot3d(coords[1, ], coords[2, ], coords[3, ], col = as.numeric(pts2$lvl), size = 10) | |
text3d(coordsCentroids[1, ], coordsCentroids[2, ], coordsCentroids[3, ], | |
texts = c("A", "B", "C"), col = c(1, 2, 3), adj = 0) | |
text3d(surfaceCentroids[1, ], surfaceCentroids[2, ], surfaceCentroids[3, ], | |
texts = c("a", "b", "c"), col = c(1, 2, 3), adj = 0) | |
# Lat/Long plot on sphere | |
library(sm) | |
sm.sphere(lat = pts2$Lat * (180 / pi), long = pts2$Long * (180 / pi), | |
theta = 30, phi = 120, pch = as.numeric(pts2$lvl), col = as.numeric(pts2$lvl)) | |
sm.sphere(lat = LatLong_centroids$Lat * (180 / pi), long = LatLong_centroids$Long * (180 / pi), | |
theta = 30, phi = 120, pch = c(1,2,3), col = c(1,2,3), add = TRUE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment