Created
November 1, 2013 09:31
-
-
Save ibartomeus/7263051 to your computer and use it in GitHub Desktop.
This approach to assess multi-functionality is based in the idea that sites providing best multiple functions will have not only a high mean value across function (approach 3 in Byrnes et al.) but also low variability in the function delivered across functions (i.e. Coef of var). It assumes you have read Byrnes et al paper (http://arxiv.org/abs/…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This approach to assess multifunctionality is based in the idea that sites providing | |
# best multiple functions will have not only a high mean value across function | |
# (approach 3 in Byrnes et al.) but also low variability in the function delivered | |
# across functions (i.e. Coef of var). | |
#I use Byrnes multifunc package to ilustrate it. | |
library(devtools) | |
install_github("multifunc", "jebyrnes") | |
library(multifunc) | |
library(ggplot2) | |
#load data and clean selected variables as per their example | |
data(all_biodepth) | |
allVars<-qw(biomassY3, root3, N.g.m2, light3, N.Soil, wood3, cotton3) | |
germany<-subset(all_biodepth, all_biodepth$location=="Germany") | |
vars<-whichVars(germany, allVars) | |
#re-normalize N.Soil so that everything is on the same sign-scale (e.g. the maximum level of a function is the "best" function) | |
germany$N.Soil<- -1*germany$N.Soil +max(germany$N.Soil, na.rm=TRUE) | |
#Average method from multifunc package (aproach 3 in Byrnes et al.) | |
germany<-cbind(germany, getStdAndMeanFunctions(germany, vars)) | |
head(germany) | |
ggplot(aes(x=Diversity, y=meanFunction),data=germany)+geom_point(size=3)+ | |
theme_bw(base_size=15)+ | |
stat_smooth(method="lm", colour="black", size=2) | |
#modified Byrnes function to calculate also CV | |
#first I load two functions need later on | |
#coeficient of variation | |
coef_var <- function(x, na.rm = TRUE) { | |
ifelse(sd(x, na.rm = na.rm) == 0 & mean(x, na.rm = na.rm) == 0, 0, | |
(sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)))} | |
#and the distance from a point to a line borrowed from: | |
## Credits: | |
## Theory by Paul Bourke http://local.wasp.uwa.edu.au/~pbourke/geometry/pointline/ | |
## Based in part on C code by Damian Coventry Tuesday, 16 July 2002 | |
## Based on VBA code by Brandon Crosby 9-6-05 (2 dimensions) | |
## With grateful thanks for answering our needs! | |
## This is an R (http://www.r-project.org) implementation by Gregoire Thomas 7/11/08 | |
## I.Bartomeus added the intersection Point to the Output | |
distancePointLine <- function(x, y, slope, intercept) { | |
## x, y is the point to test. | |
## slope, intercept is the line to check distance. | |
## | |
## Returns distance from the line. | |
## | |
## Returns 9999 on 0 denominator conditions. | |
x1 <- x-10 | |
x2 <- x+10 | |
y1 <- x1*slope+intercept | |
y2 <- x2*slope+intercept | |
distancePointSegment(x,y, x1,y1, x2,y2) | |
} | |
distancePointSegment <- function(px, py, x1, y1, x2, y2, return_point = TRUE) { | |
## px,py is the point to test. | |
## x1,y1,x2,y2 is the line to check distance. | |
## | |
## Returns distance from the line, or if the intersecting point on the line nearest | |
## the point tested is outside the endpoints of the line, the distance to the | |
## nearest endpoint. | |
## | |
## Returns 9999 on 0 denominator conditions. | |
lineMagnitude <- function(x1, y1, x2, y2) sqrt((x2-x1)^2+(y2-y1)^2) | |
ans <- NULL | |
ix <- iy <- 0 # intersecting point | |
lineMag <- lineMagnitude(x1, y1, x2, y2) | |
if( lineMag < 0.00000001) { | |
warning("short segment") | |
return(9999) | |
} | |
u <- (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1))) | |
u <- u / (lineMag * lineMag) | |
if((u < 0.00001) || (u > 1)) { | |
## closest point does not fall within the line segment, take the shorter distance | |
## to an endpoint | |
ix <- lineMagnitude(px, py, x1, y1) | |
iy <- lineMagnitude(px, py, x2, y2) | |
if(ix > iy) ans <- iy | |
else ans <- ix | |
} else { | |
## Intersecting point is on the line, use the formula | |
ix <- x1 + u * (x2 - x1) | |
iy <- y1 + u * (y2 - y1) | |
ans <- lineMagnitude(px, py, ix, iy) | |
} | |
if(return_point == TRUE){ | |
Out <- c(ix = ix, iy=iy, ans=ans) | |
}else{ | |
Out <- ans | |
} | |
Out | |
} | |
#now I tweak byrnes function to get also the CV and calculate a mean function modified by CV | |
getMeanCV <- function(data, vars, standardizeFunction=standardizeUnitScale){ | |
ret<-colwise(standardizeFunction)(data[,which(names(data) %in% vars)]) | |
names(ret)<-paste(names(ret), ".std", sep="") | |
ret$meanFunction<-rowSums(ret)/ncol(ret) | |
# get CV | |
ret$CVFunction<- apply(ret, MARGIN= 1, coef_var) | |
# See relationship between mean and CV | |
plot(ret$meanFunction ~ ret$CVFunction, xlim = c(0,1), ylim = c(0,1)) | |
abline(coef= c(1, -1)) #need to be sure is constrained between 0-1. | |
# My rationale is that best sites will be in the upper left corner, | |
# let's rank them along the 45º line, then. | |
# first we find for each data point its closest point situated on the drawn line | |
p<- matrix(ncol = 3, nrow = nrow(ret)) | |
for(i in 1:nrow(ret)){ | |
p[i,] <- distancePointLine(x= ret$CVFunction[i], y = ret$meanFunction[i], | |
slope = -1, intercept = 1) | |
} | |
colnames(p) <- c("x","y","distToLine") | |
ret <- cbind(ret,p[,c(1,2)]) | |
#plot the new points | |
points(ret$x,ret$y, col = "red") | |
#then I calculate its relative distances from each new point p to the worst point (i.e.site with max Y min X) | |
minimum <- c(max(ret$CVFunction), min(ret$meanFunction)) | |
#to avoid a case with 0 function maybe is better to use as min c(0,1)? | |
#minimum <- c(1,0) | |
dis <- c() | |
for(i in 1:nrow(ret)){ | |
points_ <- c(ret$CVFunction[i],ret$meanFunction[i]) | |
dis[i] <- sqrt(sum((points_ - minimum) ^ 2)) | |
} | |
ret$MeanCV <- dis #store the new mean modified by CV | |
#x and y can be removed from ret before exiting. | |
#plot can also be removed, but helps visually | |
return(ret) | |
} | |
#now, a site with a moderate mean function, but low CV (i.e. most functions are fullfiled) | |
#is ranked better than a site with the same mean function, | |
#but with high values for some functions and low for others. | |
#Use this method in the germany example | |
germany2 <-cbind(germany, getMeanCV(germany, vars)) | |
head(germany2) | |
#see how the calculated measures correlate | |
plot(germany2$meanFunction ~ germany2$MeanCV) | |
plot(germany2$CVFunction ~ germany2$MeanCV) | |
#See the reslut | |
ggplot(aes(x=Diversity, y=MeanCV),data=germany2)+geom_point(size=3)+ | |
theme_bw(base_size=15)+ | |
stat_smooth(method="lm", colour="black", size=2) | |
#To visualize all functions | |
colnames(germany2) | |
germanymelted <- melt(germany2[,c(8,129:134,141,144)], id.vars = "Diversity") | |
ggplot(aes(x=Diversity, y=value),data=germanymelted)+geom_point(size=3)+ | |
facet_grid(~variable) + theme_bw(base_size=15)+ | |
stat_smooth(method="lm", colour="black", size=2) + | |
xlab("\nSpecies Richness") + | |
ylab("Standardized value of function\n") | |
#Note that the modified mean value is artificial... so face values should not be compared, but only slopes. | |
#In this case doesn't change much the output, I guess because most functions are correlated among them? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment