-
-
Save kgturner/6643334 to your computer and use it in GitHub Desktop.
#Extract climate data from WorldClim.org tiles for several locations and make data table | |
#Kathryn Turner Sept 16, 2013 | |
#download and unzip all relevant WorldClim geoTIFF files into a single directory. | |
#I used the highest resolution (~1km2), but should work for other resolutions too. | |
#load packages: raster, rgdal, foreach | |
library(rgdal) | |
library(raster) | |
library(foreach) | |
#Read names of all files in directory into a list | |
#from http://stackoverflow.com/questions/5319839/read-multiple-csv-files-into-separate-data-frames | |
filenames <- list.files(path="~/data_directory/") | |
#Load all geoTIFF files | |
for(i in filenames){ | |
filepath <- file.path("~/data_directory/",i) | |
assign(i, raster(filepath)) | |
} | |
#check that all files loaded properly by raster | |
#from http://stackoverflow.com/questions/15387727/use-object-names-as-list-names-in-r | |
list <- mget(filenames, envir=globalenv()) | |
for(i in list){ | |
if (hasValues(i)==FALSE){ | |
print(i,"hasValues error") | |
} | |
if (inMemory(i)==TRUE){ | |
print(i, "inMemory error") | |
} | |
else{ | |
print("All checked out!") | |
} | |
} | |
#load table of latitude and longitude for locations of interest | |
pop <- read.table("pop.txt", header=T, sep="\t") | |
row.names(pop) <- pop$Pop | |
pop$Pop <- as.factor(pop$Pop) | |
> head(pop) | |
Latitude Longitude Pop | |
CA001 49.01794 -123.08185 CA001 | |
CA008 49.01208 -118.64571 CA008 | |
CA009 49.29610 -118.47383 CA009 | |
CA010 49.32018 -118.37044 CA010 | |
US014 40.12227 -101.28028 US014 | |
BG001 43.38943 28.45741 BG001 | |
####NOTE: WorldClim data, even at highest res, is averaged over 1 km2. | |
#If your location is too close to a coast (i.e. less than 1 km2), | |
#there will not be any information (nothing but NAs) in this data set. | |
#Therefore, it may be necessary to approximate some locations by moving | |
#them inland. I did this using Google Earth. | |
#load location coordinates as SpatialPoints | |
for(i in pop$Pop){ | |
assign(i,SpatialPoints(as.matrix(t(c(pop[i,2], pop[i,1]))))) | |
} | |
#check that SpatialPoints load correctly from geoTIFFs | |
poplist <- mget(levels(pop$Pop), envir=globalenv()) | |
tiffvector <- unlist(list) | |
#Optional quality check step. For smaller datasets, will tell you which population locations should be adjusted, | |
#in other words, which rows are all NA. See Note above, line 51. Or check after extracting data, see line | |
foreach(p=poplist, .combine='rbind') %:% | |
foreach(t=tiffvector, .combine='cbind') %do%{ | |
is.na(extract(t,p)) | |
} #may take a while | |
#make climate data table | |
climate <- foreach(p=poplist, .combine='rbind') %:% | |
foreach(t=tiffvector, .combine='cbind') %do%{ | |
myValue<-extract(t, p) | |
} #may take a while | |
#tidy table | |
popnames <- sort(as.character(pop$Pop)) | |
clim <- as.data.frame(climate, row.names=popnames) | |
colnames(clim) <- filenames | |
#To check for populations that need to be adjusted/rows that are all NAs (See note, line 51) | |
#find rows that are all NAs, these are likely populations too close to large bodies of water | |
movepops <- clim[rowSums(is.na(clim)) == ncol(clim),] | |
#adjust population coordinates in pop df and re-run from line 63 | |
> head(clim) | |
alt_07.tif alt_11.tif alt_12.tif alt_15.tif alt_16.tif alt_17.tif bio1_07.tif bio1_11.tif bio1_12.tif bio1_15.tif bio1_16.tif bio1_17.tif bio10_07.tif | |
BG001 NA NA NA NA 61 NA NA NA NA NA 121 NA NA | |
CA001 NA 24 NA NA NA NA NA 100 NA NA NA NA NA | |
CA008 NA NA 1308 NA NA NA NA NA 39 NA NA NA NA | |
CA009 NA NA 599 NA NA NA NA NA 73 NA NA NA NA | |
CA010 NA NA 935 NA NA NA NA NA 57 NA NA NA NA | |
GR001 NA NA NA NA 1 NA NA NA NA NA 159 NA NA | |
#write table | |
write.table(clim, file="bioclimdata.txt") | |
#load table | |
clim <- read.table("bioclimdata.txt", header=TRUE) | |
#now you have table of values and also a lot of NAs. So see "squishr" gist at https://gist.github.com/kgturner/6644150! |
Bug fixed (rows were named incorrectly in the final data frame).
pop data frame row names must be renamed to match population names in order for following for loop to work. Fixed.
Hi,
I am trying to extract some data from Worldclim.org.
I downloaded and unzip all relevant WorldClim geoTIFF files into a single directory and installed all packages.
However, when I try load all geoTIFF files, I get this error message:
Error in .local(.Object, ...) :
The selected file is an ESRI BIL header file, but to
open ESRI BIL datasets, the data file should be selected
instead of the .hdr file. Please try again selecting
the data file (often with the extension .bil) corresponding
to the header file: ~\prec1.hdr
Any ideas of what could be happening?
Thanks in advance.
Hi, Sorry for the delay, just saw this comment. This was originally written to work with Worldclim version 1, geoTIFF files, downloaded by tile (for example, the geoTIFF bioclim link here: http://www.worldclim.org/tiles.php?Zone=12). I'm not sure it will work with generic format (BIL) or ESRI files, or with Worldclim version 2. If I figure how to make it work with those, I will update.
There appears to be a bug in this code. Specifically, at least one location (CA008) is somehow being associated with data from the wrong tile. Working on it...