Last active
February 18, 2020 03:47
-
-
Save willpearse/1bdfaff2eb8a93080f159d7a77993d96 to your computer and use it in GitHub Desktop.
MADcomm broken (fixable?) functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @export | |
.reed.2017a <- function(...) { | |
data <-read.csv("http://pasta.lternet.edu/package/data/eml/knb-lter-sbc/17/30/a7899f2e57ea29a240be2c00cce7a0d4", as.is=TRUE) | |
names(data) <- tolower(names(data)) | |
data$count[data$count < 0] <- 0 | |
data$taxon_species[data$taxon_species == -99999] <- NA | |
data$taxon_genus[data$taxon_genus == -99999] <- NA | |
data$species <- with(data, paste(taxon_genus, taxon_species, sep="_")) | |
data$site <- with(data, paste(site, transect, sep="_")) | |
data$site_year <- with(data, paste(site, year, sep="_")) | |
data <- with(data, tapply(count, list(site_year, species), sum, na.rm = TRUE)) | |
data[is.na(data)] <- 0 | |
temp <- strsplit(rownames(data), "_") | |
year <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,3] | |
name <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,1] | |
return(.matrix.melt(data, | |
data.frame(units="#"), | |
data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
data.frame(species=colnames(data), taxonomy=NA))) | |
} | |
#' @export | |
.reed.2017b <- function(...) { | |
data <-read.csv("https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/19/23/5daf0da45925ba9014872c6bc9f6c8bb") | |
names(data) <- tolower(names(data)) | |
data$count[data$count < 0] <- 0 | |
data$taxon_species[data$taxon_species == -99999] <- NA | |
data$species <- with(data, paste(taxon_genus, taxon_species, sep="_")) | |
data$site <- with(data, paste(site, transect, sep="_")) | |
data$site_year <- with(data, paste(site, year, sep="_")) | |
data <- with(data, tapply(count, list(site_year, species), sum, na.rm = TRUE)) | |
data[is.na(data)] <- 0 | |
temp <- strsplit(rownames(data), "_") | |
year <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,3] | |
name <- matrix(unlist(temp), ncol=3, byrow=TRUE)[,1] | |
return(.matrix.melt(data, | |
data.frame(units="#"), | |
data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
data.frame(species=colnames(data), taxonomy=NA))) | |
} | |
#' @export | |
.rodriguezBuritica.2013 <- function(...){ | |
data <- read.csv(suppdata("E094-083","SMCover.csv",from = "esa_archives")) | |
species.data <- read.csv(suppdata("E094-083","Species.csv",from = "esa_archives")) | |
species.data$ReportedName <- sub(" ", "_", species.data$ReportedName) | |
species.data$AcceptedName <- sub(" ", "_", species.data$AcceptedName) | |
data$species <- species.data$AcceptedName[match(data$Code, species.data$Code)] | |
data$plot_year <- with(data, paste(Plot, Year, sep = "_")) | |
transformed.data <- with(data, tapply(Cover, list(plot_year, species), sum, na.rm=TRUE)) | |
transformed.data[is.na(transformed.data)] <- 0 | |
temp <- strsplit(rownames(transformed.data), "_") | |
year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2] | |
name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1] | |
return(.matrix.melt(transformed.data, | |
data.frame(units="#"), | |
data.frame(id=rownames(transformed.data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
data.frame(species=colnames(transformed.data), taxonomy=NA))) | |
} | |
#' @export | |
.ross.2014 <- function(...){ | |
data <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-and.3136.5&entityid=88e40dc185bd3f00e7464398b61f40fc", header = TRUE) | |
species <- data$SCI_NAME | |
data$id <- rep(paste(data$BIOGEOGRAPHY,data$DATE)) | |
site.metadata <- data[!duplicated(data$id),] | |
site.metadata <- with(site.metadata, | |
data.frame(id=id, year=DATE, name=BIOGEOGRAPHY, lat=NA,long=NA, address=NA,area=NA) | |
) | |
site <- rep(paste(data$BIOGEOGRAPHY,data$DATE), 856) | |
abundance <- as.vector(data$INDIVIDUALS) | |
abundance[is.na(abundance)] <- 0 | |
return(.df.melt(species, site, abundance, | |
study.metadata=data.frame(units="#"), | |
site.metadata, | |
species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
)) | |
} | |
#' @export | |
.truxa.2015 <- function(...){ | |
data <- as.data.frame(read_xlsx(suppdata("10.5061/dryad.fg8f6/1", "Appendix_3.xlsx"), skip=1)) #use skip to skip any rows that you don't want/aren't useful | |
comm <- data[,-1:-3] #get rid of columns you don't want | |
rownames(comm) <- data$Species #name the rows what you want | |
comm <- t(comm) #t=transpose, flip the rows and columns | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
data.frame(id=rownames(comm),year="2006-2008", | |
name=c("Danube non-flooded", "Danude flooded", "Leitha non-flooded", "Leitha flooded", "Morava non-flooded", "Morava flooded"), | |
lat=c("16\u00BA41'24", "16\u00BA42'20", "16\u00BA51'32", "16\u00BA53'26", "16\u00BA53'22"), | |
long=c("48\u00BA08'41", "48\u00BA07'53", "48\u00BA00'19", "48\u00BA03'28", "48\u00BA17'00", "48\u00BA17'96"), | |
address="Eastern Austria",area="na"), | |
data.frame(species=colnames(comm),taxonomy="Lepidoptera"))) | |
} | |
#' @export | |
.schmitt.2012 <- function(...){ | |
addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-sbc/46/3/4ded739e78e50552837cf100f251f7ab" | |
addr <- sub("^https","http",addr) | |
data <-read.csv(addr,header=F, skip=1, sep=",", quote='"', | |
col.names=c("YEAR", "MONTH", "DATE", "SITE", "DEPTH", "REP", | |
"SP_CODE", "COUNT", "COMMENTS", "Common_Name", | |
"taxon_GROUP", "SURVEY", "taxon_PHYLUM", | |
"taxon_CLASS", "taxon_ORDER", "taxon_FAMILY", | |
"taxon_GENUS", "taxon_SPECIES"), check.names=TRUE) | |
data$species <- with(data, paste(taxon_GENUS, taxon_SPECIES, sep="_")) | |
data$site.year.depth <- with(data, paste(SITE, YEAR, DEPTH, sep="_")) | |
site.id <- unique(data$site.year.depth) | |
year <- data$YEAR[!duplicated(data$site.year.depth)] | |
name <- data$SITE[!duplicated(data$site.year.depth)] | |
return(.df.melt(data$species, data$site.year.depth, data$COUNT, | |
data.frame(units="#"), | |
data.frame(id=site.id, year, name, lat=NA, long=NA, address="Santa Cruz Island, CA, USA", area=NA), | |
data.frame(species=unique(data$species), taxonomy="Pycnopodia"))) | |
} | |
#' @export | |
.sandau.2017 <- function(...){ | |
tmp.file <- tempfile() | |
download.file("https://www.datadryad.org/bitstream/handle/10255/dryad.129944/BB_all_4_SimilMatrices_Dryad.xlsx?sequence=1", tmp.file) | |
data <- read.xls(tmp.file, sheet=2) | |
lookup <- read.xls(suppdata("10.5061/dryad.44bm6", "BB_all_4_SimilMatrices_Dryad.xlsx"), sheet=1, skip=5, header=FALSE, as.is=TRUE)[-1:-8,] | |
lookup[,2] <- .sanitize.text(lookup[,2]) | |
lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_")) | |
lookup <- setNames(lookup[,2], lookup[,1]) | |
names(data)[names(data) %in% names(lookup)] <- lookup[names(data)[names(data) %in% names(lookup)]] | |
site_year <- with(data, paste(data$PlotID, Year, sep="_")) | |
data <- cbind(site_year, data) | |
comm.mat <-data[-1:-11] | |
#This sets the row names to the unique plot_year identifier | |
rownames(comm.mat) <-data[,1] | |
site.metadata <- data[!duplicated(data$site_year),] | |
return(.matrix.melt(comm.mat, | |
data.frame(units="%", treatment=""), | |
data.frame(id=site.metadata$site_year, name=site.metadata$PlotID, year=site.metadata$Year, lat=NA, long=NA, address="Grandcour", treatment=site.metadata$Treat, area="20 x 20 m"), | |
data.frame(species=unique(lookup, taxonomy="Plantae")))) | |
} | |
#' @export | |
.russo.2015 <- function(...){ | |
species <- read.xls(suppdata("10.5061/dryad.6cr82", "DataforDryad_netmaludome.xlsx"), header=FALSE, as.is=TRUE, nrow=2)[2:1,] | |
species <- unname(apply(as.matrix(species), 2, paste, collapse="_"))[-1] | |
data <- read.xls(suppdata("10.5061/dryad.6cr82", "DataforDryad_netmaludome.xlsx"), as.is=TRUE, skip=3) | |
comm <- as.matrix(data[,-1]) | |
colnames(comm) <- species; rownames(comm) <- data[,1] | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
data.frame(id=rownames(comm), name=colnames(comm), year="2008-2013", lat=NA, long=NA, address=NA, area="New York state, USA"), | |
data.frame(species=colnames(comm), taxonomy=NA) | |
)) | |
} | |
# YEAR UNKNOWN | |
#' @export | |
.mcknight.2000 <- function(...){ | |
data <- read.csv(file="https://pasta.lternet.edu/package/data/eml/knb-lter-mcm/12/3/7f8537c0f0f80a255551ad61d9d512dc",header=TRUE) | |
species <- unique(data$Species) | |
data$id <- rep(paste(data$Location,data$Date)) | |
site.metadata <- data[!duplicated(data$id),] | |
site.metadata <- with(site.metadata, | |
data.frame(id=id, year=Date, name=Location, lat=NA,long=NA, address="antarctica",area=NA) | |
) | |
site <- rep(paste(data$Location,data$Date), 27) | |
abundance <- as.vector(data[,10]) | |
abundance[is.na(abundance)] <- 0 | |
return(.df.melt(species, site, abundance, | |
study.metadata=data.frame(units="#"), | |
site.metadata, | |
species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
)) | |
} | |
#' @export | |
.mcmahon.2017 <- function(...){ | |
abun <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=da11cbc268d91fef78c78bd2813adbf6", header = TRUE) | |
site_meta1 <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=a508f609c7d45f1c10604a4722acfd04", header = TRUE) | |
site_meta2 <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=d35b86dbfcf7bf6eab90a2fd5539809c", header = TRUE) | |
org_meta <- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.349.2&entityid=5c558e387eadadf707a3f84742b0d3e1", header = TRUE) | |
colnames(abun)[1] <- "OTU" | |
data_meta1 <- merge(abun, site_meta1, by = "Sample_Name") | |
site_data <- merge(data_meta1, site_meta2, by = "Sample_Name") | |
data <- merge(org_meta, site_data, by = "OTU") | |
comm <- with(data, tapply(value, list(paste(Lake,Collection_Date,sep="_", OTU), length))) | |
site.names <- sapply(strsplit(rownames(comm), "_"), function(x) x[1]) | |
years <- sapply(strsplit(rownames(comm), "_"), function(x) x[2]) | |
comm[is.na(comm)] <- 0 | |
unique <- data[!duplicated(data$OTU),] | |
colnames(unique)[1] <- 'Species' | |
unique <- unique[,-9:-20] | |
return(.matrix.melt(comm, | |
data.frame(units="p/a"), | |
data.frame(id=rownames(comm),years,site.names,lat=NA,long=NA,address="North of Minocqua, Wisconsin USA",area="Depth"), | |
data.frame(species=colnames(comm),taxonomy=unique, ))) | |
} | |
#' @export | |
.miller.2013 <- function(...){ | |
data<- read.csv(file = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-and.2739.7&entityid=1743caa458ea7bb640833d884576f51c", header = TRUE) | |
species <- data$ENTITY | |
data$id <- rep(paste(data$TRAPID,data$YEAR)) | |
site.metadata <- data[!duplicated(data$id),] | |
site.metadata <- with(site.metadata, | |
data.frame(id=id, year=YEAR, name=TRAPID, lat=NA,long=NA, address="Willamette National Forest Oregon USA",area=NA) | |
) | |
site <- rep(paste(data$TRAPID,data$YEAR), 17663) | |
abundance <- as.vector(data$NO_INDIV) | |
abundance[is.na(abundance)] <- 0 | |
return(.df.melt(species, site, abundance, | |
study.metadata=data.frame(units="#"), | |
site.metadata, | |
species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
)) | |
} | |
#' @export | |
.myster.2010 <- function(...){ | |
addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-luq/100/246250/f718e683c7c425207c7d1f7adeddf85f" | |
addr <- sub("^https","http", addr) | |
data <-read.csv(addr, header=F, skip=1, sep=",", col.names=c("date", "plot", "species", "percent.cover"), check.names=TRUE) | |
data$date <- format(as.Date(data$date, format="%d/%m/%Y"),"%Y") | |
data$plot.year <- with(data, paste(plot, date, sep="_")) | |
site.id <- unique(data$plot.year) | |
year <- data$date[!duplicated(data$plot.year)] | |
name <- data$plot[!duplicated(data$plot.year)] | |
return(.df.melt(data$species, data$plot.year, data$percent.cover, | |
data.frame(units="area"), | |
data.frame(id=site.id, year, name, lat="-65.8257", long="18.3382", address="Luquillo Experimental Forest, Puerto Rico, USA", area="2mX5m"), | |
data.frame(species=unique(data$species), taxonomy="Plantae"))) | |
} | |
#' @export | |
.nichols.2006 <- function(...) { | |
addr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-ntl/61/3/wgnhs_macrophyte_aquaplt2" | |
addr <- sub("^https","http",addr) | |
abundanceData <-read.csv(addr, header=F, skip=1, sep=",", quote='"', | |
col.names=c("mwbc", "lake_unique", "lakename", | |
"county", "county_id", "month", "year4", | |
"spcode", "aqstano", "visual_abundance"), | |
check.names=TRUE) | |
specAddr <- "https://pasta.lternet.edu/package/data/eml/knb-lter-ntl/61/3/wgnhs_macrophyte_pltname" | |
specAddr <- sub("^https","http",specAddr) | |
specData <-read.csv(specAddr, header=F, skip=1, sep=",", quote='"', | |
col.names=c("spcode", "spec_no", "scientific_name", | |
"common_name", "lifeform", "spec_category", | |
"genus"), check.names=TRUE) | |
abundanceData$site.year <- with(abundanceData, paste(lakename, year4, sep=">")) | |
abundanceData$species <- specData$scientific_name[match(abundanceData$spcode, specData$spcode)] | |
data <- with(abundanceData, tapply(visual_abundance, list(site.year, species), sum, na.rm = TRUE)) | |
data[is.na(data)] <- 0 | |
temp <- unlist(strsplit(rownames(data), ">", fixed=T)) | |
name <- temp[seq(1,length(temp), 2)] | |
year <- temp[seq(2,length(temp), 2)] | |
return(.matrix.melt(data, | |
data.frame(units="#"), | |
data.frame(id=rownames(data), year, name, lat=NA, long=NA, address=NA, area=NA), | |
data.frame(species=colnames(data), taxonomy=NA))) | |
} | |
#' @export | |
.lorite.2017<-function(...){ | |
expdata<-read.delim("https://doi.org/10.1371/journal.pone.0182414.s003", nrows=410) | |
lookup <- read.delim("https://doi.org/10.1371/journal.pone.0182414.s003", skip=414, nrows=34,as.is = TRUE,header = FALSE) | |
lookup<-lookup[,1:2] | |
expdata$new.site<-paste(expdata$Site,expdata$transect,expdata$quadrat,sep="_") | |
names(expdata)[7:40]<-lookup[,2] | |
comm<-cbind(id=expdata[,41],expdata[,7:40]) | |
#needs meta data, loc: scattered through paper/tables but existant. | |
return(.matrix.melt(comm, | |
data.frame(units="percent"), | |
data.frame(id=comm$id,year=NA), | |
data.frame(species=lookup[,2],taxonomy=NA) | |
)) | |
} | |
#' @export | |
.kaspari.2016 <- function(...) { | |
addr <- "https://pasta.lternet.edu/package/data/eml/msb-tempbiodev/1111170/1/cfd3a55deef52e3a93469057053f5404" | |
addr <- sub("^https", "http", addr) | |
data <-read.csv(addr, header=F, skip=1, sep=",", | |
col.names=c("location", "distance", "direction", | |
"plotcode", "taxon", "abundance"), | |
check.names=TRUE) | |
return(.df.melt(data$taxon, data$plotcode, data$abundance, | |
data.frame(units="#"), | |
data.frame(id=unique(data$plotcode), year="2016", name=unique(data$plotcode), lat=NA, long=NA, address=NA, area=NA), | |
data.frame(species=unique(data$taxon), taxonomy="Arthropoda"))) | |
} | |
#' @export | |
.johnson.2017 <- function(...){ | |
datam<-read.csv(suppdata("10.5061/dryad.cb13r","Species_x_SiteMatrix.csv"), as.is=TRUE) | |
sitedataA<-read.csv(suppdata("10.5061/dryad.cb13r","RawSoilData.csv"),as.is = TRUE) | |
sitedataB<-read.csv(suppdata("10.5061/dryad.cb13r","VacantLot_DemolitionDate.csv"),as.is = TRUE) | |
sppdata<-read.csv(suppdata("10.5061/dryad.cb13r","Species_x_TraitsMatrix.csv"),as.is = TRUE) | |
comm<-datam[,-(1:2)] | |
sitedataB <- rbind(sitedataB, sitedataB) | |
sitedataB$new.code <- paste(sitedataB$Code, rep(c("BF","RG"), each=nrow(sitedataB)/2), sep=".") | |
sitedataA$new.code <- paste(sitedataA$LotID, rep(c("BF","RG"), each=nrow(sitedataA)/2), sep=".") | |
sitedata<-merge(sitedataA,sitedataB,by="new.code",all.x=TRUE,all.y = TRUE) | |
names(sitedata)[c(1,27)] <- c("id","address") | |
sitedata$lat <- NA;sitedata$long <-NA; sitedata$area <- NA | |
sitedata$year <- "2012-2013" | |
sitedata$name <- sitedata$id | |
names(sppdata)[1:2] <- c("species","taxonomy") | |
return(.matrix.melt(comm, | |
data.frame(units="percent"), | |
sitedata, | |
sppdata) | |
) | |
} | |
#' @export | |
.hollibaugh.2017 <- function(...){ | |
data <- read.csv(file = "https://pasta.lternet.edu/package/data/eml/knb-lter-pal/114/2/3ab81d869107c4b3a7f0fb76fed55ed4", header = TRUE) | |
names(data)[7:8] <- c("latitude","longitude") | |
taxon <- rep(c("Eub","AOB","Archaea","Cren","AOA", "AOB","Eub","AOB","Archaea","Cren","AOA","AOB"), nrow(data)) | |
data$id <- paste(data$Station,data$Datetime.GMT) | |
site.metadata <- data[!duplicated(data$id),] | |
site.metadata <- with(site.metadata, | |
data.frame(id=id, year=Datetime.GMT, name=Station, lat=latitude, long=longitude, address=NA, area=NA) | |
) | |
site <- rep(paste(data$Station,data$Datetime.GMT), 12) | |
abundance <- unname(unlist(data[,10:21])) | |
return(.df.melt(taxon, site , abundance, | |
study.metadata=data.frame(units="#"), | |
site.metadata, | |
species.metadata=data.frame(species=unique(taxon), taxonomy=NA))) | |
} | |
#' @export | |
.harrower.2017<-function(...){ | |
birddata<-read.csv(suppdata("10.5061/dryad.365dr", "bird_data.csv"),as.is = TRUE) | |
envdata<-read.csv(suppdata("10.5061/dryad.365dr","envr_data.csv"),as.is=TRUE) | |
envdata$name<-paste(envdata$block,envdata$transect,sep="_") | |
birddata$id<-paste(birddata$block,birddata$transect,birddata$year,sep="_") | |
birddata$name<-paste(birddata$block,birddata$transect,sep="_") | |
birddata$lat<-"50o39'59\" N" | |
birddata$long<-"120o19'09\" W" | |
birddata$address<- "Lac du Bois Provincial Park near Kamloops, British Columbia, Canada" | |
birddata$area<-"20ha" | |
birddata$binom<-paste(birddata$genus,birddata$species,sep=".") | |
comm <- with(birddata, tapply(binom, list(binom, site), length)) | |
comm[is.na(comm)] <- 0 | |
comm<-t(comm) | |
birdsub<-birddata[!duplicated(birddata$site),] | |
envsub<-envdata[,c(3,8)] | |
envtest<-merge(birdsub,envsub,by="name") | |
envtest<-envtest[,-c(6:11,17)] | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
envtest, | |
data.frame(species=birddata$binom, taxonomy=NA) | |
) | |
) | |
} | |
#' @export | |
.franklin.2018 <- function(...) { | |
data <- read.xls("CopyofWESTCOSPPCOVER.xlsx", as.is=TRUE) | |
ground_data <- read.xls("WEST CO GROUND COVER.xlsx") | |
data$R4_SPP <- NULL | |
colnames(data) <- colnames(ground_data) | |
combined.data <- rbind(data, ground_data) | |
combined.data$year <- NA | |
for(i in seq_len(nrow(combined.data))){ | |
t <- as.numeric(regexpr("[0-9]{4}", combined.data$SITE_ID[i]))[1] | |
combined.data$year[i] <- substr(combined.data$SITE_ID[i], t, t+4) | |
} | |
metadata <- read.xls("WEST CO SAGEBRUSH PLOTS.xlsx", as.is=TRUE) | |
combined.data$SITE_ID <- gsub(" ", "", combined.data$SITE_ID) | |
combined.data$lat <- metadata$LATITUDE[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
combined.data$long <- metadata$LONGITUDE[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
combined.data$elevation.ft <- metadata$Elev..ft.[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
combined.data$aspect <- metadata$Aspect[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
combined.data$pct.slope <- metadata$Pct_Slope[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
combined.data$project <- metadata$PROJECT[match(combined.data$SITE_ID, metadata$SITE_ID)] | |
combined.data$COVER_PERCENT <- as.numeric(combined.data$COVER_PERCENT) | |
combined.data$COVER_PERCENT[is.na(combined.data$COVER_PERCENT)] <- 0 | |
return(.df.melt(combined.data$NAME, | |
combined.data$SITE_ID, | |
combined.data$COVER_PERCENT, | |
data.frame(units="area"), | |
data.frame(id=unique(combined.data$SITE_ID), | |
year=combined.data$year[!duplicated(combined.data$SITE_ID)], | |
name=unique(combined.data$SITE_ID), | |
lat=combined.data$lat[!duplicated(combined.data$SITE_ID)], | |
long=combined.data$long[!duplicated(combined.data$SITE_ID)], | |
address=NA, | |
area="0.1ha", | |
elevation.ft=combined.data$elevation.ft[!duplicated(combined.data$SITE_ID)], | |
aspect=combined.data$aspect[!duplicated(combined.data$SITE_ID)], | |
pct.slope=combined.data$pct.slope[!duplicated(combined.data$SITE_ID)], | |
project=combined.data$project[!duplicated(combined.data$SITE_ID)]), | |
data.frame(species=unique(combined.data$NAME), | |
taxonomy=NA, | |
other="Plant study; Percent cover of species and ground"))) | |
} | |
#' @export | |
.ellison.2017 <- function(...){ | |
data <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-hfr.97.23&entityid=a840ed1f4c891cd7e6abe660aecb797a", header=TRUE) | |
species <- data$species | |
data$id <- rep(paste(data$plot,data$date)) | |
site.metadata <- data[!duplicated(data$id),] | |
site.metadata <- with(site.metadata, | |
data.frame(id=id, year=date, name=plot, lat=NA,long=NA, address="North of West Point, New York, USA",area=NA) | |
) | |
site <- rep(paste(data$plot,data$date), 3120) | |
abundance <- as.vector(data$no.ants) | |
abundance[is.na(abundance)] <- 0 | |
return(.df.melt(species, site, abundance, | |
study.metadata=data.frame(units="#"), | |
site.metadata, | |
species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
)) | |
} | |
#' @export | |
.collins.2018 <- function(...) { | |
# The species in this dataset are not named; Generic identifiers are given (e.g. 'sp1') | |
# Species codes were added due to people not wanting scott to publish their data. | |
data <- read.csv("https://pasta.lternet.edu/package/data/eml/edi/15/5/f69c8fe563067164191d61b6e33eff03", as.is=TRUE) | |
names(data) <- tolower(names(data)) | |
metadata <- read.csv("https://pasta.lternet.edu/package/data/eml/edi/15/5/8284876afe3a1cb0a919d37e1164357f", as.is=TRUE) | |
names(metadata) <- tolower(names(metadata)) | |
data$site_year <- with(data, paste(data$sitesubplot, experiment_year, sep="_")) | |
data$latitude <- metadata$lat[match(data$site_project_comm, metadata$site_project_comm)] | |
data$longitude <- metadata$long[match(data$site_project_comm, metadata$site_project_comm)] | |
data$address <- metadata$location[match(data$site_project_comm, metadata$site_project_comm)] | |
data$area <- metadata$plot_size[match(data$site_project_comm, metadata$site_project_comm)] | |
return(.df.melt(data$species, | |
data$site_year, | |
data$relcover, | |
data.frame(units="%"), | |
data.frame(id=unique(data$site_year), | |
year=data$experiment_year[!duplicated(data$site_year)], | |
name=data$sitesubplot[!duplicated(data$site_year)], | |
lat=data$latitude[!duplicated(data$site_year)], | |
long=data$longitude[!duplicated(data$site_year)], | |
address=data$address[!duplicated(data$site_year)], | |
area=data$area[!duplicated(data$site_year)]), | |
data.frame(species=unique(data$species), taxonomy="Plantae"))) | |
} | |
#' @export | |
.coblentz.2015 <- function(...){ | |
# This won't work on Windows OS. I might be wrong but I think that it has | |
# something to do with the spaces in the file name. | |
data <- read.xls(suppdata("10.5061/dryad.j2c13", "Invert Community Data 2012 RAW.xlsx"), stringsAsFactors=FALSE) | |
colnames(data) <- with(data, paste(colnames(data), data[3,], sep="_")) | |
data <- data[-1:-3,] | |
species <- data[,1] | |
data <- data[,-1] | |
data <- sapply(data, as.numeric) | |
rownames(data) <- species | |
return(.matrix.melt(data)) | |
} | |
#' @export | |
.chamailleJammes.2016 <- function(...){ | |
data <- read.csv(suppdata("10.1371/journal.pone.0153639", 1), stringsAsFactors=FALSE) | |
year <- (1992:2005)[-6] # Study excluded the year of 1997 | |
data <- aggregate(. ~ WATERHOLE, data = data, FUN=sum) | |
species <- colnames(data) | |
data <- reshape(data, varying = list(names(data)[2:ncol(data)]), v.names = "Count", | |
idvar = "WATERHOLE", times = c("ELEPHANT", "GIRAFFE", "IMPALA","KUDU", | |
"ROAN", "SABLE", "WILDEBEEST", "ZEBRA"), timevar = "species", direction = "long") | |
rownames(data) <- NULL | |
id <- unique(data$WATERHOLE) | |
year <- rep(year, each=length(id)) | |
temp <- paste(data$WATERHOLE, year, sep="_") | |
return(.df.melt(data$species, | |
data$WATERHOLE, | |
data$Count, | |
data.frame(units="#"), | |
data.frame(id=, lat="18", long="26", address="Hwange National Park, Zimbabwe, Africa", area=NA), | |
data.frame(species=unique(data$species, taxonomy="Mammalia")))) | |
} | |
.brant.2018 <- function(...){ | |
tmp.file <- tempfile() | |
download.file("https://zenodo.org/record/1198846/files/template_MosquitoDataBrant77.xlsx", tmp.file) | |
DailyHLC <- read.xls(tmp.file, sheet=4, as.is=TRUE, skip=9) | |
lookup <- read.xls(tmp.file, sheet=3, as.is=TRUE) | |
lookup[,2] <- .sanitize.text(lookup[,2]) | |
#lookup[,2] <- sapply(strsplit(lookup[,2], " "), function(x) paste(x[1:2],collapse="_")) | |
lookup <- setNames(lookup[,2], lookup[,1]) | |
names(DailyHLC) <- gsub("_count", "", names(DailyHLC), fixed=TRUE) | |
names(lookup) <- gsub(".", "_", names(lookup), fixed=TRUE) | |
names(DailyHLC)[names(DailyHLC) %in% names(lookup)] <- lookup[names(DailyHLC)[names(DailyHLC) %in% names(lookup)]] | |
DailyHLC$site_year <- with(DailyHLC, paste(field_name, Location, Date, sep="_")) | |
#community matrix | |
comm <- as.matrix(DailyHLC[,c(-1:-7,-ncol(DailyHLC))]) | |
rownames(comm) <- DailyHLC$site_year | |
site.metadata <- DailyHLC[,1:7] | |
species.meta <- data.frame(species=colnames(comm), taxonomy="Insecta") | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
data.frame(id=DailyHLC$site_year, name=site.metadata$Location, year=site.metadata$Date, lat="4.6353 to 4.9654", long="116.9542 to 117.8004", address="SAFE project, Borneo", area="attracted to humans"), | |
species.meta)) | |
} | |
.lightfoot.2016 <- function(...) { | |
data <- read.table("http://sev.lternet.edu/sites/default/files/data/sev-106/sev106_hopperdynamics_20150826.txt", header=T, sep=",") | |
data$month.year <- format(as.Date(data$DATE, format="%m/%d/%Y"),"%m/%Y") | |
spec_codes <- c("ACPI","AGDE","AMCO","ARCO","ARPS","AUEL","AUFE","BOAR", | |
"BRMA","CIPA","COCR","COOC","COTE","DABI","ERSI","HATR", | |
"HERU","HEVI","HICA","LAAZ","LEWH","MEAR","MEAZ","MEBO", | |
"MEGL","MELA","MEOC","METE","OPOB","PAPA","PHQU","PHRO", | |
"PSDE","PSTE","SCNI","SYMO","TRCA","TRFO","TRKI","TRPA", | |
"TRPI","XACO","XAMO") | |
species <- c("Acantherus piperatus","Ageneotettix deorum", | |
"Amphitornus coloradus","Arphia conspersa", | |
"Arphia pseudonietana","Aulocara elliotti", | |
"Aulocara femoratum","Bootettix argentatus", | |
"Brachystola magna","Cibolacris parviceps", | |
"Cordillacris crenulata","Cordillacris occipitalis", | |
"Conozoa texana","Dactylotum bicolor", | |
"Eritettix simplex","Hadtrotettix trifasciatus", | |
"Heliaula rufa","Hesperotettix viridis", | |
"Hippopedon capito","Lactista azteca","Leprus wheeleri", | |
"Melanoplus aridus","Melanoplus arizonae", | |
"Melanoplus bowditchi","Melanoplus gladstoni", | |
"Melanoplus lakinus","Melanoplus occidentalis", | |
"Mermeria texana","Opeia obscura","Paropomala pallida", | |
"Phlibostroma quadrimaculatum","Phrynotettix robustus", | |
"Psoloessa delicatula","Psoloessa texana", | |
"Schistocerca nitens","Syrbula montezuma", | |
"Trimerotropis californicus","Tropidolophus formosus", | |
"Trachyrhachis kiowa","Trimerotropis pallidipennis", | |
"Trimerotropis pistrinaria","Xanthippus corallipes", | |
"Xanthippus montanus") | |
metadata <- data.frame(spec_codes, species) | |
data$SPECIES <- metadata$species[match(data$SPECIES, metadata$spec_codes)] | |
data <- with(data, tapply(CNT, list(site_year, SPECIES), sum, na.rm=TRUE)) | |
data$site_year <- with(data, paste(SITE, year, sep="_")) | |
temp <- strsplit(rownames(data), "_") | |
year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2] | |
year <- format(as.Date(data$DATE, format="%m/%Y"),"%Y") | |
name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1] | |
#needs some "burned" info?... | |
return(.df.melt(data$species, data$SITE, data$CNT, | |
data.frame(units="#"), | |
data.frame(id=unique(data$plot_year), year, name, lat=NA, long=NA, address="Sevilleta National Wildlife Refuge, New Mexico", area=NA), | |
data.frame(species=unique(data$species), taxonomy="Orthoptera"))) | |
} | |
.fia.2018 <- function(...){ | |
.get.fia <- function(state, var, select){ | |
t.zip <- tempfile() | |
download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip) | |
unzip(t.zip) | |
data <- fread(paste0(state,"_",var,".csv"), select=select) | |
unlink(paste0(state,"_",var,".csv")) | |
return(data) | |
} | |
states <- c("AK","AL","AZ","AR","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","ME","MD", | |
"MA","MI","MN","MS","MO","MT","NC","NE","NH","NV","NM","NJ","NY","ND","OH","OK","OR","PA","RI", | |
"SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY","VI","PR") | |
data <- vector("list", length(states)) | |
for(i in seq_along(states)){ | |
#Download/read in data | |
tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR")) | |
cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID")) | |
plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN")) | |
#Subset everything, remove sites with multiple/ambiguous codings, merge | |
tree <- tree[tree$DIA > 1.96,] | |
cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),] | |
data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN") | |
data[[i]]$state <- states[i] | |
} | |
data <- rbindlist(data) | |
t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN)) | |
data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)]) | |
uniq.site <- as.data.frame(unique(data[, 15:16])) | |
sample.sites <- as.data.frame(uniq.site %>% group_by(state) %>% sample_n(size = 30)) | |
data <- merge(sample.sites, data, by="site.id") | |
data$site.id <- paste0(data$site.id, "_", data$INVYR) | |
fia.spp <- read.csv("FIA_SppList.csv") #currently in raw_data folder | |
fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES)) | |
data <- merge(data, fia.spp, by.x="SPCD", by.y="V1") | |
data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV, | |
data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA) | |
names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter") | |
comm <- t(as.matrix(with(data, table(species,site.id)))) | |
dia <- aggregate(diameter~species, data, mean) | |
dia.count <- aggregate(diameter~species, data, length) | |
dia$diameter.n <- dia.count$diameter | |
site.df <- data[!duplicated(data$site.id),] | |
site.df <- site.df[,2:8] | |
sites <- rownames(comm) | |
site.df <- site.df[match(sites, site.df$site.id), ] | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat, | |
long=site.df$long, address=NA, area=NA, | |
elevation=site.df$elev, class=site.df$forestclass), | |
data.frame(species=dia$species, taxonomy=NA, diameter=dia$diameter))) | |
} | |
.tomasovych.2010a <- function(...){ | |
species <- read.xls(suppdata("10.5061/dryad.1225", "abundances-S California 1975.xls"), skip=1, header=TRUE) | |
species.clean <- species[,-1] | |
comm <- t(as.matrix(species.clean)) | |
rownames(comm) <- species$X | |
rownames(comm) | |
} | |
.mendonca.2018 <- function(...){ | |
# need to fix the years | |
tmp <- tempfile() | |
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp) | |
data <- read.csv(.unzip("CERRADO_SM_Capture.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
data <- data[!is.na(data$Individuals_captured),] | |
data$Year_finsh <- as.numeric(data$Year_finish) | |
data <- data[!is.na(data$Year_finish),] | |
ids <- paste(data$id, data$Year_finish) | |
#ids <- ids[-c(1513:1536)] | |
# lat/long data | |
tmp2 <- tempfile() | |
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp2) | |
ll_data <- read.csv(.unzip("CERRADO_SM_Study_Site.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
ll_data <- ll_data[,c(1,7,8)] | |
ll_data$id <- unique(ids) | |
names(ll_data) <- c("id", "lat", "long") | |
ll_data$year <- ll_data$id; ll_data$name <- ll_data$id | |
ll_data$address <- "Cerrado ecosystem: Brazil, Boliva, Paraguay"; ll_data$area <- "live_trap" | |
return(.df.melt(data$Actual_species_name, | |
ids, | |
data$Individuals_captured, | |
data.frame(units = "#"), | |
ll_data, | |
data.frame(species = unique(data$Actual_species_name), taxonomy = "Animalia") | |
) | |
) | |
} | |
# Error in data.frame(id = rownames(data), year = years, name = | |
# names, lat = NA, : arguments imply differing number of rows: 20, | |
# 24, 1 | |
.sepulveda.2016 <- function(...){ | |
tmp <- tempfile() | |
download.file("http://journals.plos.org/plosone/article/file?type=supplementary&id=info:doi/10.1371/journal.pone.0157910.s001", tmp) | |
data <- read.xls(tmp, 1, skip=1, fileEncoding="Latin1") | |
data <- data[1:20,] | |
years <- colnames(data)[2:25] | |
names(data) <- c("species", paste(rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4), names(data)[2:25], sep="_")) | |
d2 <- t(data) | |
names <- rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4) | |
return(.matrix.melt(data, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = years, name= names, lat= NA, long= NA, address="Southwestern Chilean coast", area = NA), | |
data.frame(species=colnames(data), taxonomy = NA) | |
) | |
) | |
} | |
# Metadata woes | |
.bried.2017 <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.151171/Dryad.data.xlsx?sequence=1", tmp) | |
data <- read.xls(tmp, 1) | |
n <- paste(data$Latitude, data$Longitude, sep = "_") | |
comm <- data[,-c(1:4)] | |
comm$Region <- n | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2017, name = , lat= , long = , address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Insecta") | |
) | |
) | |
} | |
# datasets on chiclids - each function downloads a community | |
# dataset for a different region | |
# Kigoma town | |
.britton.2017.a <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148126/BrittonEtAl2017_KigomaTown.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Kigoma deforested | |
.britton.2017.b <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148127/BrittonEtAl2017_KigomaDeforested.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Kalilani village | |
.britton.2017.c <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148128/BrittonEtAl2017_KalilaniVillage.csv?sequence=1", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Jakobsen's beach | |
.britton.2017.d <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148129/BrittonEtAl2017_Jakobsen%27sBeach.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Gombe stream | |
.britton.2017.e <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148130/BrittonEtAl2017_GombeNP.csv?sequence=1", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Mahale mountain 1 | |
.britton.2017.f <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148131/BrittonEtAl2017_MahaleNPS1.csv?sequence=1", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Mahale mountain 2 | |
.britton.2017.g <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148132/BrittonEtAl2017_MahaleNPS2.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
.drew.2015<-function(...){ | |
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.93108/Supplemental%201.csv?sequence=1",as.is = TRUE) | |
expdata$binom<-paste(expdata$Genus,expdata$species,sep=".") | |
comm<-t(expdata[,4:6]) | |
colnames(comm)<-expdata$binom | |
#meta data is basically not a thing, so this may need to be scrapped after all | |
return(.matrix.melt(comm, | |
data.frame(units="p/a"), | |
sitedata, | |
data.frame(speccies=expdata$binom,taxonomy=NA) | |
) | |
) | |
} | |
.osuri.2016<-function(...){ | |
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.109139/Osuri_Sanakran_2016_JAE_plot_data.csv?sequence=2",as.is = TRUE) | |
comm <- with(expdata, tapply(species, list(species, site.name), length)) | |
comm[is.na(comm)] <- 0 | |
comm<-t(comm) | |
#meta data is limited, what could be easily found is in the expdata data frame | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
sitedata, | |
data.frame(species=expdata$species,taxonomy=NA) | |
) | |
) | |
} | |
.helmus.2013 <- function(...){ | |
library(pez) # This isn't how we declare packages in 'real' | |
# packages for the time being this is sufficient | |
data(laja) | |
return(.matrix.melt(invert.sites)) | |
} | |
.jain.2017 <- function(...){ | |
species <- read.xls(suppdata("10.5061/dryad.177q4", "Jain_etal_2016_Butterfly%20abundance%20across%20sites_22Dec2016.xlsx"), skip=5, header=TRUE, as.is=TRUE) | |
species.clean <- species[,c(-1:-15,-38)] | |
comm <- t(as.matrix(species.clean)) | |
colnames(comm) <- species$Scientific.name | |
return(.matrix.melt(comm, | |
data.frame(units="#", treatment=NA), | |
data.frame(id=rownames(comm), year=site.metadata$Date, name=site.metadata$SiteCombo, lat=NA, long=NA, address = "British Columbia", area=site.metadata$HaSurveyed), | |
data.frame(species=colnames(comm), taxonomy=NA))) | |
} | |
.lightfoot.2016 <- function(...) { | |
data <- read.table("http://sev.lternet.edu/sites/default/files/data/sev-106/sev106_hopperdynamics_20150826.txt", header=T, sep=",") | |
data$month.year <- format(as.Date(data$DATE, format="%m/%d/%Y"),"%m/%Y") | |
spec_codes <- c("ACPI","AGDE","AMCO","ARCO","ARPS","AUEL","AUFE","BOAR", | |
"BRMA","CIPA","COCR","COOC","COTE","DABI","ERSI","HATR", | |
"HERU","HEVI","HICA","LAAZ","LEWH","MEAR","MEAZ","MEBO", | |
"MEGL","MELA","MEOC","METE","OPOB","PAPA","PHQU","PHRO", | |
"PSDE","PSTE","SCNI","SYMO","TRCA","TRFO","TRKI","TRPA", | |
"TRPI","XACO","XAMO") | |
species <- c("Acantherus piperatus","Ageneotettix deorum", | |
"Amphitornus coloradus","Arphia conspersa", | |
"Arphia pseudonietana","Aulocara elliotti", | |
"Aulocara femoratum","Bootettix argentatus", | |
"Brachystola magna","Cibolacris parviceps", | |
"Cordillacris crenulata","Cordillacris occipitalis", | |
"Conozoa texana","Dactylotum bicolor", | |
"Eritettix simplex","Hadtrotettix trifasciatus", | |
"Heliaula rufa","Hesperotettix viridis", | |
"Hippopedon capito","Lactista azteca","Leprus wheeleri", | |
"Melanoplus aridus","Melanoplus arizonae", | |
"Melanoplus bowditchi","Melanoplus gladstoni", | |
"Melanoplus lakinus","Melanoplus occidentalis", | |
"Mermeria texana","Opeia obscura","Paropomala pallida", | |
"Phlibostroma quadrimaculatum","Phrynotettix robustus", | |
"Psoloessa delicatula","Psoloessa texana", | |
"Schistocerca nitens","Syrbula montezuma", | |
"Trimerotropis californicus","Tropidolophus formosus", | |
"Trachyrhachis kiowa","Trimerotropis pallidipennis", | |
"Trimerotropis pistrinaria","Xanthippus corallipes", | |
"Xanthippus montanus") | |
metadata <- data.frame(spec_codes, species) | |
data$SPECIES <- metadata$species[match(data$SPECIES, metadata$spec_codes)] | |
data <- with(data, tapply(CNT, list(site_year, SPECIES), sum, na.rm=TRUE)) | |
data$site_year <- with(data, paste(SITE, year, sep="_")) | |
temp <- strsplit(rownames(data), "_") | |
year <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,2] | |
year <- format(as.Date(data$DATE, format="%m/%Y"),"%Y") | |
name <- matrix(unlist(temp), ncol=2, byrow=TRUE)[,1] | |
#needs some "burned" info?... | |
return(.df.melt(data$species, data$SITE, data$CNT, | |
data.frame(units="#"), | |
data.frame(id=unique(data$plot_year), year, name, lat=NA, long=NA, address="Sevilleta National Wildlife Refuge, New Mexico", area=NA), | |
data.frame(species=unique(data$species), taxonomy="Orthoptera"))) | |
} | |
.fia.2018 <- function(...){ | |
.get.fia <- function(state, var, select){ | |
t.zip <- tempfile() | |
download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip) | |
unzip(t.zip) | |
data <- fread(paste0(state,"_",var,".csv"), select=select) | |
unlink(paste0(state,"_",var,".csv")) | |
return(data) | |
} | |
states <- c("AK","AL","AZ","AR","CA","CO","CT","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","ME","MD", | |
"MA","MI","MN","MS","MO","MT","NC","NE","NH","NV","NM","NJ","NY","ND","OH","OK","OR","PA","RI", | |
"SC","SD","TN","TX","UT","VA","VT","WA","WI","WV","WY","VI","PR") | |
data <- vector("list", length(states)) | |
for(i in seq_along(states)){ | |
#Download/read in data | |
tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR")) | |
cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID")) | |
plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN")) | |
#Subset everything, remove sites with multiple/ambiguous codings, merge | |
tree <- tree[tree$DIA > 1.96,] | |
cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),] | |
data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN") | |
data[[i]]$state <- states[i] | |
} | |
data <- rbindlist(data) | |
t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN)) | |
data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)]) | |
uniq.site <- as.data.frame(unique(data[, 15:16])) | |
sample.sites <- as.data.frame(uniq.site %>% group_by(state) %>% sample_n(size = 30)) | |
data <- merge(sample.sites, data, by="site.id") | |
data$site.id <- paste0(data$site.id, "_", data$INVYR) | |
fia.spp <- read.csv("FIA_SppList.csv") #currently in raw_data folder | |
fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES)) | |
data <- merge(data, fia.spp, by.x="SPCD", by.y="V1") | |
data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV, | |
data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA) | |
names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter") | |
comm <- t(as.matrix(with(data, table(species,site.id)))) | |
dia <- aggregate(diameter~species, data, mean) | |
dia.count <- aggregate(diameter~species, data, length) | |
dia$diameter.n <- dia.count$diameter | |
site.df <- data[!duplicated(data$site.id),] | |
site.df <- site.df[,2:8] | |
sites <- rownames(comm) | |
site.df <- site.df[match(sites, site.df$site.id), ] | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat, | |
long=site.df$long, address=NA, area=NA, | |
elevation=site.df$elev, class=site.df$forestclass), | |
data.frame(species=dia$species, taxonomy=NA, diameter=dia$diameter))) | |
} | |
.tomasovych.2010a <- function(...){ | |
species <- read.xls(suppdata("10.5061/dryad.1225", "abundances-S California 1975.xls"), skip=1, header=TRUE) | |
species.clean <- species[,-1] | |
comm <- t(as.matrix(species.clean)) | |
rownames(comm) <- species$X | |
rownames(comm) | |
} | |
.mendonca.2018 <- function(...){ | |
# need to fix the years | |
tmp <- tempfile() | |
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp) | |
data <- read.csv(.unzip("CERRADO_SM_Capture.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
data <- data[!is.na(data$Individuals_captured),] | |
data$Year_finsh <- as.numeric(data$Year_finish) | |
data <- data[!is.na(data$Year_finish),] | |
ids <- paste(data$id, data$Year_finish) | |
#ids <- ids[-c(1513:1536)] | |
# lat/long data | |
tmp2 <- tempfile() | |
download.file("https://esajournals.onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1002%2Fecy.2367&attachmentId=2208200269", tmp2) | |
ll_data <- read.csv(.unzip("CERRADO_SM_Study_Site.csv", tmp), as.is=TRUE, fileEncoding = "Latin1") | |
ll_data <- ll_data[,c(1,7,8)] | |
ll_data$id <- unique(ids) | |
names(ll_data) <- c("id", "lat", "long") | |
ll_data$year <- ll_data$id; ll_data$name <- ll_data$id | |
ll_data$address <- "Cerrado ecosystem: Brazil, Boliva, Paraguay"; ll_data$area <- "live_trap" | |
return(.df.melt(data$Actual_species_name, | |
ids, | |
data$Individuals_captured, | |
data.frame(units = "#"), | |
ll_data, | |
data.frame(species = unique(data$Actual_species_name), taxonomy = "Animalia") | |
) | |
) | |
} | |
# Error in data.frame(id = rownames(data), year = years, name = | |
# names, lat = NA, : arguments imply differing number of rows: 20, | |
# 24, 1 | |
.sepulveda.2016 <- function(...){ | |
tmp <- tempfile() | |
download.file("http://journals.plos.org/plosone/article/file?type=supplementary&id=info:doi/10.1371/journal.pone.0157910.s001", tmp) | |
data <- read.xls(tmp, 1, skip=1, fileEncoding="Latin1") | |
data <- data[1:20,] | |
years <- colnames(data)[2:25] | |
names(data) <- c("species", paste(rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4), names(data)[2:25], sep="_")) | |
d2 <- t(data) | |
names <- rep(c("Cocholgue", "Hualpen", "Llico", "Mehuin", "La Mision", "Maicolpue"),each=4) | |
return(.matrix.melt(data, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = years, name= names, lat= NA, long= NA, address="Southwestern Chilean coast", area = NA), | |
data.frame(species=colnames(data), taxonomy = NA) | |
) | |
) | |
} | |
# Metadata woes | |
.bried.2017 <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.151171/Dryad.data.xlsx?sequence=1", tmp) | |
data <- read.xls(tmp, 1) | |
n <- paste(data$Latitude, data$Longitude, sep = "_") | |
comm <- data[,-c(1:4)] | |
comm$Region <- n | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2017, name = , lat= , long = , address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Insecta") | |
) | |
) | |
} | |
# datasets on chiclids - each function downloads a community | |
# dataset for a different region | |
# Kigoma town | |
.britton.2017.a <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148126/BrittonEtAl2017_KigomaTown.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Kigoma deforested | |
.britton.2017.b <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148127/BrittonEtAl2017_KigomaDeforested.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Kalilani village | |
.britton.2017.c <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148128/BrittonEtAl2017_KalilaniVillage.csv?sequence=1", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Jakobsen's beach | |
.britton.2017.d <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148129/BrittonEtAl2017_Jakobsen%27sBeach.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Gombe stream | |
.britton.2017.e <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148130/BrittonEtAl2017_GombeNP.csv?sequence=1", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Mahale mountain 1 | |
.britton.2017.f <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148131/BrittonEtAl2017_MahaleNPS1.csv?sequence=1", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
# Mahale mountain 2 | |
.britton.2017.g <- function(...){ | |
tmp <- tempfile() | |
download.file("https://datadryad.org/bitstream/handle/10255/dryad.148132/BrittonEtAl2017_MahaleNPS2.csv?sequence=3", tmp) | |
data <- read.csv(tmp, skip=1) | |
data <- data[-c(1,2),] | |
names(data)[1] <- "species" | |
comm <- t(data) | |
return(.matrix.melt(comm, | |
data.frame(units = "#"), | |
data.frame(id = rownames(data), year = 2016, name = , lat = NA, long = NA, address = "", area = NA), | |
data.frame(species = colnames(data), taxonomy = "Chiclidae") | |
)) | |
} | |
.drew.2015<-function(...){ | |
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.93108/Supplemental%201.csv?sequence=1",as.is = TRUE) | |
expdata$binom<-paste(expdata$Genus,expdata$species,sep=".") | |
comm<-t(expdata[,4:6]) | |
colnames(comm)<-expdata$binom | |
#meta data is basically not a thing, so this may need to be scrapped after all | |
return(.matrix.melt(comm, | |
data.frame(units="p/a"), | |
sitedata, | |
data.frame(speccies=expdata$binom,taxonomy=NA) | |
) | |
) | |
} | |
.osuri.2016<-function(...){ | |
expdata<-read.csv("https://datadryad.org/bitstream/handle/10255/dryad.109139/Osuri_Sanakran_2016_JAE_plot_data.csv?sequence=2",as.is = TRUE) | |
comm <- with(expdata, tapply(species, list(species, site.name), length)) | |
comm[is.na(comm)] <- 0 | |
comm<-t(comm) | |
#meta data is limited, what could be easily found is in the expdata data frame | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
sitedata, | |
data.frame(species=expdata$species,taxonomy=NA) | |
) | |
) | |
} | |
.helmus.2013 <- function(...){ | |
library(pez) # This isn't how we declare packages in 'real' | |
# packages for the time being this is sufficient | |
data(laja) | |
return(.matrix.melt(invert.sites)) | |
} | |
.jain.2017 <- function(...){ | |
species <- read.xls(suppdata("10.5061/dryad.177q4", "Jain_etal_2016_Butterfly%20abundance%20across%20sites_22Dec2016.xlsx"), skip=5, header=TRUE, as.is=TRUE) | |
species.clean <- species[,c(-1:-15,-38)] | |
comm <- t(as.matrix(species.clean)) | |
colnames(comm) <- species$Scientific.name | |
return(.matrix.melt(comm, | |
data.frame(units="#", treatment=NA), | |
data.frame(id=rownames(comm), year=site.metadata$Date, name=site.metadata$SiteCombo, lat=NA, long=NA, address = "British Columbia", area=site.metadata$HaSurveyed), | |
data.frame(species=colnames(comm), taxonomy=NA))) | |
} | |
## Error in data.frame(id = id, year = YEAR, name = Waterbody_Name, lat = lat, : arguments imply differing number of rows: 7556, 20027, 1 | |
## does not yet work | |
.rypel.2018 <- function(...){ | |
tmp.file <- tempfile() | |
download.file("https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=829ef0e4eea5e6392b19e595aa775832", tmp.file) | |
abun <- read.csv(tmp.file, header=TRUE) | |
taxon_inf <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=490295acdaf716c90b58a5a089ab9847",header=TRUE) | |
location <- read.csv(file="https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.356.3&entityid=3c23c7e39d30f047fe6b229d85df2a88",header=TRUE) | |
abun <- merge(abun, taxon_inf, by = "taxon_id") | |
data <- merge(abun, location, by = "WBIC") | |
species <- data$taxon_name | |
lat <- data$Latitude | |
long <- data$Longitude | |
data$id <- rep(paste(data$Waterbody_Name,data$YEAR)) | |
site.metadata <- data[!duplicated(data$id),] | |
site.metadata <- with(site.metadata, | |
data.frame(id=id, year=YEAR, name=Waterbody_Name, lat=lat,long=long, address="Wisconsin USA",area=NA) | |
) | |
site <- rep(paste(data$Waterbody_Name,data$Year), 7556) | |
data$site.id <- paste(data$Waterbody_Name,data$Year) | |
comm <- with(data, tapply(N, list(site.id, taxon_name), sum)) | |
comm[is.na(comm)] <- 0 | |
return(.df.melt(species, site, comm, | |
study.metadata=data.frame(units="#"), | |
site.metadata, | |
species.metadata=data.frame(species=unique(species), taxonomy=NA) | |
)) | |
} | |
################################ | |
# ARGON FUNCTIONS ############## | |
# - WORKING BUT NOT DATA RELEASE | |
################################ | |
if(FALSE){ | |
#' @export | |
.branstetter.2018 <- function(...) { | |
data <- read.csv("TableA3.csv") | |
metadata <- read.csv("TableA2.csv") | |
rownames(data) <- data[,1] | |
data[,1] <- NULL | |
colnames(data) <- gsub(".", "-", colnames(data), fixed=TRUE) | |
data <- t(data) | |
rownames(data) <- paste(rownames(data), year, sep="_") | |
metadata$year <- format(as.Date(metadata$datecollected, format="%d-%b-%Y"),"%Y") | |
year <- metadata$year[!duplicated(metadata$site)] | |
name <- unique(metadata$site) | |
lat <- metadata$latitude[!duplicated(metadata$site)] | |
long <- metadata$longitude[!duplicated(metadata$site)] | |
return(.matrix.melt(data, | |
data.frame(units="#"), | |
data.frame(id=rownames(data), year, name, lat, long, | |
address=NA, area=NA), | |
data.frame(species=colnames(data), taxonomy="Hymenoptera"))) | |
} | |
.cobb.2016 <- function(...) { | |
data <- read.xls("COMPLETE Dataset as of 4_recovery2.xlsx") | |
data$name <- with(data, paste("study.area", Study.Area, "site", Site, sep="_")) | |
data$month.year <- with(data, paste(Month, Year, sep="-")) | |
data$site.year <- with(data, paste(name, month.year, sep="_")) | |
metadata <- data[,c(1:11, 148, 149, 150)] | |
metadata$Longitude <- gsub("\342\200\223", "-", metadata$Longitude) | |
data[,1] <- data$site.year | |
data[,c(2:11, 148, 149, 150)] <- NULL | |
data <- aggregate(.~Sample.., data=data, FUN=sum) | |
rownames(data) <- data[,1] | |
data[,1] <- NULL | |
rownames <- rownames(data) | |
data <- apply(data, 2, as.numeric) | |
rownames(data) <- rownames | |
name <- metadata$name[match(rownames(data), metadata$site.year)] | |
year <- metadata$Year[match(rownames(data), metadata$site.year)] | |
lat <- metadata$Latitude[match(rownames(data), metadata$site.year)] | |
long <- metadata$Longitude[match(rownames(data), metadata$site.year)] | |
veg.type <- metadata$Veg.type[match(rownames(data), metadata$site.year)] | |
burned <- metadata$Burn[match(rownames(data), metadata$site.year)] | |
monsoon <- metadata$Monsoon[match(rownames(data), metadata$site.year)] | |
return(.matrix.melt(data, | |
data.frame(units="STD.#"), | |
data.frame(id=rownames(data), | |
year, | |
name, | |
lat, | |
long, | |
address=NA, | |
area=NA, | |
veg.type, | |
burned, | |
monsoon), | |
data.frame(species=colnames(data), taxonomy="Arthropoda"))) | |
} | |
.mooney.2018 <- function(...) { | |
#will need to loop through and do this for each year (sheet) in the dataset. | |
data <- read.xls("Insect Abundance Population Summaries.xlsx", sheet="#") | |
data <- data[which(data$Response == "Total"),] | |
#remove all rows that contain only NA values | |
data <- data[ ,!apply(data, 2, function(x) all(is.na(x)))] | |
data <- melt(data, id=c("Population", "Response")) | |
} | |
.dyer.2017 <- function(...) { | |
# Location is not always GPS coordinates in this dataset. Some are descriptions or titles of the locations. | |
# Some of the values in data are blank. These do not mean that the value is zero but that the data is not complete. (Lee has the code to complete it). | |
data <- read.xls("SWRS_plots_updated_nov_3_2017.xlsx") | |
data<-data[,1:24] | |
data$year <- format(as.Date(data$Date..D.M.Y., format="%Y-%m-%d"),"%Y") | |
data$plot.year <- with(data, paste(X.number, year, sep=".")) | |
return(.df.melt(data$plant.sp, | |
data$plot.year, | |
data$Leaf.area..cm.2., | |
data.frame(units="area"), | |
data.frame(id=unique(data$plot.year), | |
year=data$year[!duplicated(data$plot.year)], | |
name=data$X.number[!duplicated(data$plot.year)], | |
lat=NA, | |
long=NA, | |
address=NA, | |
area="cm2"), | |
data.frame(species=unique(data$plant.sp), taxonomy="Plantae"))) | |
} | |
} | |
#' @export | |
.fia.2018 <- function(...){ | |
.get.fia <- function(state, var, select){ | |
t.zip <- tempfile() | |
download.file(paste0("https://apps.fs.usda.gov/fia/datamart/CSV/",state,"_",var,".zip"), t.zip) | |
unzip(t.zip) | |
data <- fread(paste0(state,"_",var,".csv"), select=select) | |
unlink(paste0(state,"_",var,".csv")) | |
return(data) | |
} | |
states <- c("AL", "AK") #c("AL","AK","AZ","CA","CO","FL","GA","HI","KS","MD","MA","MI","NH","NM","ND","OK","TN","TX","UT","VA","WA","WI","WY") | |
data <- vector("list", length(states)) | |
for(i in seq_along(states)){ | |
#Download/read in data | |
tree <- .get.fia(states[i], "TREE", c("CN","PLT_CN","PLOT","SPCD","DIA","INVYR")) | |
cond <- .get.fia(states[i], "COND", c("PLT_CN","PLOT","STDAGE","FORTYPCD","CONDID")) | |
plot <- .get.fia(states[i], "PLOT", c("PLOT","LAT","LON","ELEV", "CN")) | |
#Subset everything, remove sites with multiple/ambiguous codings, merge | |
tree <- tree[tree$DIA > 1.96,] | |
cond <- cond[cond$PLT_CN %in% as.integer64(names(Filter(function(x) x==1, table(cond$PLT_CN)))),] | |
data[[i]] <- merge(tree, merge(cond, plot, by.x="PLT_CN", by.y="CN"), by.x="PLT_CN", by.y="PLT_CN") | |
data[[i]]$state <- states[i] | |
} | |
data <- rbindlist(data) | |
t <- setNames(seq_along(unique(data$PLT_CN)), unique(data$PLT_CN)) | |
data$site.id <- paste0(data$state, "_", t[as.character(data$PLT_CN)]) | |
data$site.id <- paste0(data$site.id, "_", data$INVYR) | |
rndata <- with(data, ave(data, state, FUN=function(x) {sample.int(length(x))})) | |
fia.spp <- read.csv("FIA_SppList.csv") #currently in the pglmm raw data folder | |
fia.spp <- data.table(fia.spp$SPCD, paste0(fia.spp$GENUS, "_", fia.spp$SPECIES)) | |
data <- merge(data, fia.spp, by.x="SPCD", by.y="V1") | |
data <- data.frame(data$V2, data$site.id, data$LAT, data$LON, data$ELEV, | |
data$STDAGE, data$FORTYPCD, data$CONDID, data$DIA) | |
names(data) <- c("species", "site.id", "lat", "long", "elev", "stdage", "forestclass", "condclass", "diameter") | |
comm <- t(as.matrix(with(data, table(species,site.id)))) | |
# To get mean diameter of each species at each site: | |
dia <- aggregate(diameter~species, data, mean) | |
# To get count of diameters : | |
dia.count <- aggregate(diameter~species, data, length) | |
# data frame with diameter mean and count per species-site combination | |
dia$diameter.n <- dia.count$diameter | |
site.df <- data[!duplicated(data$site.id),] | |
site.df <- site.df[,2:8] | |
#site.df$site.id <- as.character(site.df$site.id); dia$species <- as.character(dia$species) | |
sites <- rownames(comm) | |
site.df <- site.df[match(sites, site.df$site.id), ] | |
return(.matrix.melt(comm, | |
data.frame(units="#"), | |
data.frame(id=site.df$site.id, name=NA, year=NA, lat=site.df$lat, | |
long=site.df$long, address=NA, area=NA, | |
elevation=site.df$elev, class=site.df$forestclass), | |
data.frame(species=dia$species, taxonomy=NA))) | |
} | |
#' @export | |
.heidi.2018 <- function(...) { | |
data <- read.xls("Heidi_Species_Cover_2017_Final_121817.xlsx", sheet=2, stringsAsFactors=FALSE) | |
metadata <- read.xls("SiteSpeciesList_argon.xlsx", fileEncoding="latin1", stringsAsFactors=FALSE) | |
data$geo <- metadata$Lat[match(data$Site, metadata$Site.Name)] | |
data$lat <- NA | |
data$long <- NA | |
temp <- strsplit(data$geo, split=",") | |
data$lat[1:471] <- matrix(unlist(temp[1:471]), ncol=2, byrow=TRUE)[,1] | |
data$long[1:471] <- matrix(unlist(temp[1:471]), ncol=2, byrow=TRUE)[,2] | |
data$Date <- format(as.Date(data$Date, format="%Y-%m-%d"),"%Y") | |
data$site_plot <- with(data, paste(Site, Plot, Date, sep="_")) | |
site.id <- unique(data$site_plot) | |
year <- data$Date[!duplicated(data$site_plot)] | |
name <- data$Site[!duplicated(data$site_plot)] | |
return(.df.melt(data$Species.Ground.Cover, | |
data$site_plot, | |
data$Count, | |
data.frame(units="#"), | |
data.frame(id=unique(data$site_plot), year, name, lat=data$lat[!duplicated(data$site_plot)], long=data$long[!duplicated(data$site_plot)], address=NA, area=NA), | |
data.frame(species=unique(data$Species.Ground.Cover), taxonomy=NA))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment