Skip to content

Instantly share code, notes, and snippets.

@dsparks
Created April 11, 2011 21:46
Show Gist options
  • Save dsparks/914428 to your computer and use it in GitHub Desktop.
Save dsparks/914428 to your computer and use it in GitHub Desktop.
Pairwise distance CPG, polarization, or segregation measure
require(cluster)
Polarizer <- function(group, data){
if((mean(is.na(group)) > 0.99) | length(group) <= 1){
Output <- c(NA, NA)
} else {
SameGroup <- try(as.matrix(daisy(data.frame(group))) == 0)
diag(SameGroup) <- NA
SameGroup[lower.tri(SameGroup)] <- NA
PairwiseDistance <- as.matrix(dist(data))
PairwiseDistance[lower.tri(PairwiseDistance)] <- NA
WithinDistances <- PairwiseDistance[SameGroup == 1]
BetweenDistances <- PairwiseDistance[SameGroup != 1]
WithinMean <- mean(WithinDistances, na.rm = T)
BetweenMean <- mean(BetweenDistances, na.rm = T)
WithinSE <- sd(WithinDistances, na.rm = T) / sqrt(sum(!is.na(WithinDistances)))
BetweenSE <- sd(BetweenDistances, na.rm = T) / sqrt(sum(!is.na(BetweenDistances)))
MeanRatio <- BetweenMean / WithinMean
LogMeanRatio <- log(MeanRatio, base = 10)
RatioSE <- MeanRatio * sqrt((WithinSE / WithinMean) ^ 2 + (BetweenSE / BetweenMean) ^ 2)
# This returns a logged Estimate, but an unlogged Std. Err:
Output <- c(LogMeanRatio, RatioSE)
}
names(Output) <- c("Coefficient", "SE")
return(Output)
}
Polarizer <- function(group, data){
SameGroup <- as.matrix(dist(as.numeric(as.factor(group)))) == 0
diag(SameGroup) <- NA
SameGroup[lower.tri(SameGroup)] <- NA
PairwiseDistance <- as.matrix(dist(data))
PairwiseDistance[lower.tri(PairwiseDistance)] <- NA
PairwiseDistance <- PairwiseDistance / max(PairwiseDistance, na.rm = T)
WithinDistances <- PairwiseDistance[SameGroup == 1]
BetweenDistances <- PairwiseDistance[SameGroup != 1]
WithinMean <- mean(WithinDistances, na.rm = T)
BetweenMean <- mean(BetweenDistances, na.rm = T)
WithinSE <- sd(WithinDistances, na.rm = T) / sqrt(sum(!is.na(WithinDistances)))
BetweenSE <- sd(BetweenDistances, na.rm = T) / sqrt(sum(!is.na(BetweenDistances)))
MeanDifference <- BetweenMean - WithinMean
MeanDifferenceSE <- sqrt(WithinSE ^ 2 + BetweenSE ^ 2)
Output <- c(MeanDifference, MeanDifferenceSE)
names(Output) <- c("Coefficient", "SE")
return(Output)
}
# Normalized and logistic'd:
Polarizer <- function(group, data){
Normalize <- function(x){(x - mean(x, na.rm = T)) / sd(x, na.rm = T)}
Logistic <- function(x){exp(x) / (1 + exp(x))}
SameGroup <- as.matrix(dist(as.numeric(as.factor(group)))) == 0
diag(SameGroup) <- NA
SameGroup[lower.tri(SameGroup)] <- NA
PairwiseDistance <- as.matrix(dist(data))
PairwiseDistance[lower.tri(PairwiseDistance)] <- NA
PairwiseDistance <- Normalize(PairwiseDistance)
PairwiseDistance <- Logistic(PairwiseDistance)
WithinDistances <- PairwiseDistance[SameGroup == 1]
BetweenDistances <- PairwiseDistance[SameGroup != 1]
WithinMean <- mean(WithinDistances, na.rm = T)
BetweenMean <- mean(BetweenDistances, na.rm = T)
WithinSE <- sd(WithinDistances, na.rm = T) / sqrt(sum(!is.na(WithinDistances)))
BetweenSE <- sd(BetweenDistances, na.rm = T) / sqrt(sum(!is.na(BetweenDistances)))
MeanDifference <- BetweenMean - WithinMean
MeanDifferenceSE <- sqrt(WithinSE ^ 2 + BetweenSE ^ 2)
Output <- c(MeanDifference, MeanDifferenceSE)
names(Output) <- c("Coefficient", "SE")
return(Output)
}
# Some sample data:
N <- 100
Groups <- sort(sample(LETTERS[1:3], N, replace = T))
Locations <- matrix(rnorm(3*100), ncol = 3)
Locations <- Locations[order(Locations[, 1]), ]
plot(Locations, type = "n")
text(Locations[, 1:2], Groups, cex = exp(Locations[, 3]) / (1+exp(Locations[, 3])))
# Test:
Polarizer(Groups, Locations)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment