Last active
December 18, 2015 08:19
-
-
Save benmarwick/5753446 to your computer and use it in GitHub Desktop.
ARCHY 499 SP13 R snippet for plotting basic geoarchaeological data from our google data sheet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ############################################################ | |
| # Analysis of camsizer data on sediment samples | |
| # | |
| # clear the workspace | |
| rm(list = ls()) | |
| # See the bottom to import data if you're coming back | |
| # after having done all this previously, if this is the first | |
| # run, carry on from here! | |
| # | |
| # Tell R where to look for the .xls data files, change this | |
| # for your own computer | |
| setwd("C:/Users/marwick/UW-Dropbox/Dropbox/Marwick SP13/all_camsizser_xls") | |
| # | |
| # make a list of all xls files in working directory | |
| CamsizerFiles <- list.files(pattern="xls$") | |
| # check the list | |
| CamsizerFiles | |
| # make a list that includes the data from each of these xls files | |
| # this make take a minute or two... | |
| install.packages("gdata") # you may need to install Perl also | |
| library(gdata); library(plyr) | |
| CamsizerList <- llply(CamsizerFiles, read.xls, .progress = "text") | |
| # check that this read all the files in the list, if TRUE | |
| # all is well | |
| identical(length(CamsizerFiles), length(CamsizerList)) | |
| # find out how many files in list, should be the same as | |
| # the number of samples run through the machine | |
| length(CamsizerList) | |
| # loop through files to get vectors for all particle size | |
| # distributions from the 'retained' column for each sample | |
| install.packages("stringr") | |
| library(stringr) | |
| retained <- data.frame(sapply(seq(1:length(CamsizerList)), function(x) as.numeric(str_extract(CamsizerList[[x]][,3][c(16:66)], "[.0-9]+")))) | |
| # col names for the samples | |
| n <- 2 # number of replicate samples | |
| # vector of names taken from lab notebook | |
| IDs <- c(0.05, 0.25, 0.43, 0.65, 0.85, 1.05, 1.25, 1.50, # Tuesday 21 May, CG, 10-25 | |
| 0.1, 0.3, 0.5, 0.7, 0.9, 1.1, 1.3, 1.55, # Tuesday 21 May, RS, 26-41 | |
| 1.7, 1.75, 1.9, 1.95, 2.1, 2.15, 2.35, 2.4, # Friday 31 May CG, 42-57 (no 58, it's a triplicate) | |
| 0.15, 0.34, 0.54, 0.75, 0.95, 1.15, # Tuesday 4 June, SB, 59-70 | |
| 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, # Wednesday 5 June, VD 71-82 | |
| 1.4, 1.6, 1.8, 2.0, 2.2, 2.45, 1.45, 1.65, # Thursday 6 June, VFD, 83-106 | |
| 1.85, 2.05, 2.3, 2.5) # Thursday 6 June, VFD, 83-106 | |
| names(retained) <- rep(IDs, each = n, len = (ncol(retained))) | |
| # get averages of consecutive runs to get a per-sample | |
| # average values for 'retained', each col is a sample | |
| # takes averages of all cols with the same name | |
| retained.av <- data.frame(t(apply(retained, 1, tapply, names(retained), mean)) ) | |
| # get vector of size classes to use as plot axis labels | |
| SizeClasses <- as.numeric(str_extract(CamsizerList[[1]][,1][c(16:66)], "[.0-9]+")) | |
| # combine size classes with sample particle size data | |
| retained.av.t <- data.frame(t(retained.av)) | |
| names(retained.av.t) <- SizeClasses | |
| retained.av.t$sampleID <- as.numeric(str_extract(rownames(retained.av.t), "[.0-9]+")) | |
| # reshape for plotting... | |
| install.packages("reshape2") | |
| library(reshape2) | |
| retained.av.m <- melt(retained.av.t, id.vars = "sampleID") | |
| retained.av.m$variable <- as.numeric(as.character(retained.av.m$variable)) * 1000 | |
| # | |
| # plot particle size distributions by sample | |
| install.packages("ggplot2") | |
| library(ggplot2) | |
| # smoothed lines, but with linear x-axis scale, notice the bunching up at the LHS | |
| ggplot(retained.av.m, aes(variable, value, group = sampleID, colour = as.factor(sampleID)) )+ | |
| geom_smooth(span = 0.2, se = FALSE ) + | |
| xlab("Particle Size (um)") + | |
| ylab("percent retained") | |
| # not smoothed lines but with a log scale on the x-axis | |
| ggplot(retained.av.m, aes(variable, value, group = sampleID, colour = as.factor(sampleID)) )+ | |
| geom_line() + | |
| scale_x_log10() + | |
| xlab("Particle Size (um)") + | |
| ylab("percent retained") | |
| # | |
| # If you want to use this plot (or any of the others), as | |
| # in to inlcude in your lab report, the best way to | |
| # export it is to go to the plot window in RStudio | |
| # and click the little 'Export' button | |
| # and then 'save plot as image'. Don't copy-paste as this | |
| # captures a very low-res image. | |
| # | |
| # A more fancy plot ... notice the different way the legend is made.... | |
| xaxbreaks <- c(4000, 2000, 1000, 500, 250, 125, 63, 31, 16, 8, 4, 2, 0.5, 0.12, 0.04) | |
| # | |
| ggplot(retained.av.m, aes(group = sampleID)) + | |
| scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
| geom_rect(aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.05) + | |
| geom_rect(aes(xmin=4, xmax=63, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.03) + | |
| geom_rect(aes(xmin=63, xmax=Inf, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.001) + | |
| geom_vline(xintercept = c(63, 4), colour = 'grey40', size = 1) + | |
| xlab("Particle Size (um)") + | |
| geom_line(aes(x=variable, y = value, colour = sampleID)) + | |
| geom_text(aes(x=2,y=max(retained.av.m$value),label = "Clay")) + | |
| geom_text(aes(x=40,y=max(retained.av.m$value),label = "Silt")) + | |
| geom_text(aes(x=1900,y=max(retained.av.m$value),label = "Sand")) + | |
| coord_cartesian(xlim = c(30,4000)) + | |
| ylab("percent retained") | |
| # | |
| # If you just want one sample in the plot | |
| # choose your sample here, first see the avaiable | |
| # sample names you can select | |
| unique(retained.av.m$sampleID) | |
| # | |
| # Now take one of those sample names and put them in place | |
| # of 10 here | |
| i <- subset(retained.av.m, retained.av.m$sampleID=="0.1") | |
| # | |
| ggplot(i, aes(variable, value)) + | |
| scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
| geom_rect(aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.05) + | |
| geom_rect(aes(xmin=4, xmax=63, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.03) + | |
| geom_rect(aes(xmin=63, xmax=Inf, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.001) + | |
| geom_vline(xintercept = c(63, 4), colour = 'grey40', size = 1) + | |
| xlab("Particle Size (um)") + | |
| geom_line(aes(x=variable, y = value)) + | |
| geom_text(aes(x=2,y=max(retained.av.m$value),label = "Clay")) + | |
| geom_text(aes(x=40,y=max(retained.av.m$value),label = "Silt")) + | |
| geom_text(aes(x=1900,y=max(retained.av.m$value),label = "Sand")) + | |
| coord_cartesian(xlim = c(30,4000)) + | |
| ylab("percent retained") | |
| # | |
| # Now we will install grain size stats package, to numerically | |
| # summarise the particle size data | |
| install.packages("G2Sd") | |
| library("G2Sd") | |
| # | |
| # Now we need to get our data in shape for the statistical | |
| # analysis: rownames as sieve aperture sizes and samples as | |
| # columns | |
| # transpose... | |
| retained.av.t.t <- data.frame(t(retained.av.t)) | |
| # put sample names as column names | |
| names(retained.av.t.t) <- retained.av.t$sampleID | |
| # remove sample names from data (in last row) | |
| retained.av.t.t <- retained.av.t.t[-nrow(retained.av.t.t),] | |
| # convert size classes to proper units (um) to make rownames | |
| rownames(retained.av.t.t) <- sapply(SizeClasses, function(x) x*1000) | |
| # now do the calculations to get summary statistics on the | |
| # particle size distributions... If modes = TRUE then | |
| # the function will wait for | |
| # you to click on the modal point of each plot and then | |
| # press ESC to move to the next plot. Once you've picked | |
| # the modes of all plots it will finish. | |
| dat_granstat <- as.data.frame(t(granstat(retained.av.t.t, statistic="arithmetic", aggr=TRUE, modes=FALSE))) | |
| # | |
| # Now you've got the summary statistics which you might want | |
| # to present in a table in your report. You should look at | |
| # these to pick out samples that represent extremes, ie. | |
| # the highest and lowest means, st. devs, etc. | |
| # | |
| # subset for plotting | |
| dat_granstat_sub <- dat_granstat[,c(1:4)] | |
| # convert all columns to numeric | |
| dat_granstat_sub <- data.frame(apply(dat_granstat_sub, 2, as.numeric, as.character)) | |
| # set colnames for labelling the plot | |
| colnames(dat_granstat_sub) <- c("Mean (um)", "standard \ndeviation (um)", "skewness \n(um)", "kurtosis (um)") | |
| depths <- retained.av.t$sampleID | |
| # now plot these... | |
| install.packages("analogue", repos="http://R-Forge.R-project.org") | |
| library(analogue) | |
| Stratiplot(x = dat_granstat_sub, | |
| y = depths, | |
| type = c("h", "l"), | |
| ylab = "sample", | |
| varTypes = "absolute") | |
| # use a different style... | |
| # setup for plotting | |
| install.packages("rioja") | |
| library(rioja) | |
| #### plain plot | |
| strat.plot(dat_granstat_sub, | |
| yvar = depths, | |
| y.rev = TRUE, | |
| ylabel = "Depth below surface (m)", | |
| col.line = "grey", # try others that you like | |
| lwd.line = 2 # ditto | |
| ) | |
| ######################################################################## | |
| # Now combine camsizer data with pipette data # | |
| # connect to google sheet | |
| require(RCurl) | |
| options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE)) | |
| goog <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFhPRHYteGlZb0pqVWxFbWt4cWIzcGc&output=csv" | |
| library(RCurl) | |
| pipette <- read.csv(textConnection(getURL(goog)), stringsAsFactors = FALSE) | |
| # percent retained for camsiser size classes | |
| perc <- as.vector(na.omit(pipette[,124])) / 100 | |
| cam <- sweep(retained.av.t, MARGIN=1, perc, `*`) | |
| # to check rowSums(cam) should be similar to perc * 100 | |
| # percent retained for pipette size classes | |
| # just get the cols with pipette data and drop rows of NA | |
| pipette <- na.omit(pipette[,146:151]) | |
| # edit colnames | |
| names(pipette) <- as.numeric(str_extract(names(pipette), "[.0-9]+")) | |
| # combine camsizer and pipette | |
| comb <- cbind( pipette, cam[,-ncol(cam)] ) | |
| rowSums(comb) | |
| # plot | |
| # | |
| # That's the end of the analysis and visualisation. | |
| ############################################################ | |
| # To save all the data objects you've made here, | |
| # change the directory to some location on your computer, | |
| # note that R uses forward slashes instead of backslashes | |
| save.image("F:/My Documents/My UW/Teaching/.../camsizer_data.Rdata") | |
| # | |
| # The if you need to re-load the data objects you just run | |
| # this line, with the appropriate directory (ie. same as above) | |
| # | |
| load("F:/My Documents/My UW/Teaching/..../camsizer_data.Rdata") | |
| # | |
| # | |
| ########################################################### | |
| # For my future reference, below is a VBA macro I used to convert | |
| # all those xle files that the camsizer produces into xls files | |
| # This has to be run from within Excel as a module in a sheet | |
| ############################################################ | |
| Sub OpenAllFilesInFolder() | |
| Dim path As Variant | |
| Dim excelfile As Variant | |
| path = "F:\My Documents\My UW\Teaching\...\" | |
| excelfile = Dir(path & "*.xle") | |
| Do While excelfile <> "" | |
| Workbooks.Open Filename:=path & excelfile | |
| excelfile = Dir | |
| ActiveWorkbook.SaveAs Filename:="F:\My Documents\My UW\Teaching\... \" & excelfile & ".xls", _ | |
| FileFormat:=56, Password:="", WriteResPassword:="", _ | |
| ReadOnlyRecommended:=False, CreateBackup:=False | |
| ActiveWindow.Close | |
| Loop | |
| End Sub | |
| ############################################################ | |
| ############################################################ | |
| # R script for plotting of basic geoarchaeological data | |
| # pH, EC, Mag Sus, LOI | |
| #### get data | |
| # this block will connect to the internet to access our google sheet | |
| # then download it directly into a dataframe ready to work on | |
| # If this is the first time, a little preparation is necessary: | |
| # In google sheet: File -> Publish to web -> publish all sheets; | |
| # Get link to published data -> CSV, all sheets, copy-paste link here: | |
| googsheet <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFhPRHYteGlZb0pqVWxFbWt4cWIzcGc&single=true&gid=0&output=csv" | |
| install.packages("RCurl") # only need to do this once per computer | |
| require(RCurl) | |
| options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE)) | |
| myCsv <- getURL(googsheet) | |
| data <- read.csv(textConnection(myCsv), stringsAsFactors = FALSE) | |
| # extract just data for plotting: pH, SOM, CaCO3, MS-LF, MS-FD | |
| plotting_data <- na.omit(data[,c('Sample.ID', | |
| 'Average.pH', | |
| 'mean.EC..automatically.calcuated.', | |
| 'LOI.percent.organic.material..Average', | |
| 'LOI.Percent.carbonate.content..Average', | |
| 'MS.LF.reading.average.mass.susceptibility', | |
| 'MS.FD' | |
| )]) | |
| # rename the columns of the plotting data with shorter names and measurement units | |
| names(plotting_data) <- c("depth (m)", "pH", "EC", "Soil organic \nmatter \n(% mass)", "Carbonates \n(% mass)", "Magnetic \nsusceptibility \n(SI units)", "Frequency \ndependency \n(%)") | |
| # replace missing data in google sheet with 0 so it can be handled by the plotting function | |
| plotting_data[plotting_data == '#DIV/0!'] <- 0 | |
| # convert all the numbers to numbers (some columns are in character type) | |
| plotting_data <- as.data.frame(sapply(plotting_data, as.numeric, as.character)) | |
| # create vector for depth of samples | |
| depth <- na.omit(as.numeric(plotting_data$`depth (m)`)) | |
| # create data frame of eveything except depth | |
| plotting_data <- plotting_data[,-(names(plotting_data) == 'depth (m)')] | |
| # setup for plotting | |
| install.packages("rioja") | |
| library(rioja) | |
| #### plain plot | |
| x <- strat.plot(plotting_data, | |
| yvar = depth, | |
| y.rev = TRUE, | |
| ylabel = "Depth below surface (m)", | |
| col.line = "black", # try others that you like | |
| lwd.line = 1 # ditto | |
| ) | |
| dev.off() # clear that plot | |
| #### slightly more fancy plot: with dendrogram showing groups of similar samples | |
| ?chclust # to get information how the clustering is done | |
| # add a dendrogram from constrained cluster analysis | |
| # coniss is a stratigraphically constrained cluster analysis by | |
| # method of incremental sum of squares | |
| diss <- dist(plotting_data) | |
| clust <- chclust(diss, method = "coniss") | |
| # broken stick model to suggest significant zones, 3? | |
| bstick(clust) | |
| # plot with clusters | |
| x <- strat.plot(plotting_data, | |
| yvar = depth, | |
| y.rev = TRUE, | |
| ylabel = "Depth below surface (m)", | |
| col.line = "grey", # try others that you like | |
| lwd.line = 2, # ditto | |
| clust = clust, | |
| ) | |
| # add lines to indicate clusters, leave out if it looks odd | |
| addClustZone(x, clust, 6, col = "pink") | |
| dev.off() # clear that plot | |
| #### even more fancy plot: with dendrogram and Detrended Correspondence Analysis | |
| ?decorana # get information about DCA | |
| # make first half of the plot, just the data | |
| x <- strat.plot(plotting_data, | |
| yvar = depth, | |
| y.rev = TRUE, | |
| ylabel = "Depth below surface (m)", | |
| col.line = "grey", # try others that you like | |
| lwd.line = 2, # ditto | |
| xRight = 0.7) # shift to the right to make room for the next bit... | |
| # calculate curves for first two DCA components of our data | |
| dca <- decorana(abs(plotting_data), iweigh = 0) # | |
| sc <- scores(dca, display = "sites", choices = 1:2) | |
| # add second part of the plot, the DCA curves | |
| x <- strat.plot(sc, | |
| yvar = depth, | |
| y.rev = TRUE, | |
| ylabel = "Depth below surface (m)", | |
| col.line = "grey", # try others that you like | |
| lwd.line = 2, # ditto | |
| clust = clust, | |
| clust.width=0.08, | |
| add=TRUE, | |
| xLeft = 0.7, xRight = 0.99, | |
| y.axis=FALSE) | |
| dev.off() # clear that plot | |
| #### biplots: for plotting two of the variables to explore their relationship | |
| library(scales) | |
| library(ggplot2) | |
| library(stringr) | |
| # change what's in the "": lookback up to line 25 for the proper names | |
| x <- as.numeric(as.character(plotting_data [,which(colnames(plotting_data)=="Carbonates \n(% mass)")])) | |
| y <- as.numeric(as.character(plotting_data [,which(colnames(plotting_data )=="Soil organic \nmatter \n(% mass)")])) | |
| # | |
| # | |
| # change the xlab and ylab below | |
| tmp <- data.frame(cbind(x,y)) | |
| ggplot(tmp, aes(x, y, label=depth)) + # make the basic plot object | |
| scale_x_continuous(limit=c(min(tmp$x),max(tmp$x)), | |
| breaks=round(fivenum(tmp$x),1)) + # set the locations of the x-axis labels | |
| # change x label to match the data you've selected: | |
| xlab(paste("Carbonates (% mass, r = ", round(cor(x,y),4),", p-value = ", | |
| round(anova(lm(y~x))$'Pr(>F)'[1],4),")",sep="")) + # paste in the r and p values with the axis label | |
| scale_y_continuous(limit=c(min(tmp$y),max(tmp$y)), | |
| breaks=round(fivenum(tmp$y),1)) + # set the locations of the y-axis labels | |
| # change y label to match the data you've selected: | |
| ylab("Soil Organic Matter (% mass)") + | |
| geom_text() + # specify that the data points are the sample names | |
| geom_rug(size=0.1) + # specify that we want the rug plot | |
| geom_smooth(method="lm", se=FALSE, colour="grey80", aes(group=1)) + | |
| # specify a line of best fit calculated by a linear model | |
| # se is the standard error area, change to TRUE to display it | |
| theme(panel.background = element_blank(), # suppress default background | |
| panel.grid.major = element_blank(), # suppress default major gridlines | |
| panel.grid.minor = element_blank(), # suppress default minor gridlines | |
| axis.ticks = element_blank(), # suppress tick marks | |
| axis.title.x=element_text(size=12), # increase axis title size slightly | |
| axis.title.y=element_text(size=12, angle=90), # increase axis title size slightly and rotate | |
| axis.text.x=element_text(size=12), # increase size of numbers on x-axis | |
| axis.text.y=element_text(size=12), # increase size of numbers on y-axis | |
| aspect.ratio=1) # force the plot to be square | |
| ## | |
| # | |
| # You may end up making a lot of charts like this to explore your data. But do not put them all in your lab report. | |
| #### saving plots for use in other documents | |
| # if the plot function starts with ggplot, you can use this: | |
| # commonly recommended image resolution is 600 dpi | |
| width <- 17 | |
| dpi <- 600 | |
| ggsave( | |
| "filename.png", | |
| units = "cm", | |
| width = width, | |
| height = width, # for a square plot | |
| dpi = dpi) | |
| # find the location of this file on your computer | |
| getwd() | |
| # best method: save in vector graphics format | |
| svg('filename.svg') | |
| # make plot, run the relevant lines above (no plot will appear) | |
| # open in Inkscape, tweak, then 'Export Bitmap' | |
| dev.off() | |
| # to find the location of the file you just made: | |
| getwd() | |
| # quick and dirty method: | |
| png('filename.png', h = 4500, w = 4500*1.681, res = 600) # this looks good on my screen | |
| # you might fiddle with the h and w numbers to make it good on yours | |
| # make plot, run the relevant lines above (no plot will appear) | |
| dev.off() | |
| # to find the location of the file you just made: | |
| getwd() | |
| ############################################################### | |
| ############################################################### | |
| ####### working with Carbon Isotope Data from UW IsoLab ####### | |
| # Get the txt file that they emailed you and save it somewhere handy | |
| # Open Excel or similar spreadsheet program | |
| # In the spreadsheet program, go to File -> Open | |
| # find the txt file (you may need to change 'Excel Files' to | |
| # 'All Files' in the Open File dialog) and open it | |
| # choose 'delimited' and 'tab' as the separator | |
| # inspect the file in the spreadsheet program to see | |
| # that it looks ok | |
| # Save the file in the spreadsheet program as a CSV file | |
| # We'll use this CSV file in R | |
| # This line will pop up a 'file open' window | |
| carb <- read.csv(file.choose(), header=TRUE) | |
| # find the location of the CSV file, click on it | |
| # and click open | |
| # Let's just extract the sample names and the dC13VPDB values | |
| # by getting on the rows and columns with those data | |
| carb1 <- data.frame(Sample.ID = as.character(carb[44:74,3]), | |
| dC13VPDB = as.numeric(as.character(carb[44:74,'X.18']))) | |
| # Extract the depths from the sample IDs for plotting | |
| # first, remove text characters | |
| tmp <- sub("GHM_", "", as.character(carb1$Sample.ID)) | |
| # second, remove "_" to convert all to centimeters | |
| tmp1 <- sub("_", "", tmp) | |
| # third, conver to numeric | |
| tmp2 <- as.numeric(as.character(tmp1)) | |
| # fourth convert to meters | |
| tmp3 <- tmp2 / 100 | |
| # put back into table | |
| carb1$Sample.ID <- tmp3 | |
| # plot each data point to inspect within-sample variation | |
| require(ggplot2) # if you get an error here, see these | |
| # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
| # http://www.statmethods.net/interface/packages.html | |
| # http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
| ggplot(carb1, aes( dC13VPDB, Sample.ID)) + | |
| geom_point() + | |
| scale_y_reverse() + | |
| theme(axis.text.x = element_text(size = 10)) + | |
| theme(axis.text.y = element_text(size = 10)) + | |
| ylab("Depth below surface (m)") + | |
| xlab(bquote(delta^13*C[SOM]~"\u2030"~VPDB)) + | |
| coord_equal(ratio=2) | |
| # we can see that the replicates are very similar | |
| # and there does seem to be some trend in the data | |
| # Compute the mean value for replicates of each sample | |
| carb1.mean <- aggregate(dC13VPDB ~ Sample.ID, data = carb1, FUN = mean) | |
| # plot mean values | |
| require(ggplot2) | |
| ggplot(carb1.mean, aes( dC13VPDB, Sample.ID)) + | |
| geom_point() + | |
| scale_y_reverse() + | |
| theme(axis.text.x = element_text(size = 10)) + | |
| theme(axis.text.y = element_text(size = 10)) + | |
| ylab("Depth below surface (m)") + | |
| xlab(bquote(delta^13*C[SOM]~"\u2030"~VPDB)) + | |
| coord_equal(ratio=2) | |
| #### get other geoarch data for GMH | |
| # this block will connect to the internet to access our google sheet | |
| # then download it directly into a dataframe ready to work on | |
| # If this is the first time, a little preparation is necessary: | |
| # In google sheet: File -> Publish to web -> publish all sheets; | |
| # Get link to published data -> CSV, all sheets, copy-paste link here: | |
| googsheet <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFhPRHYteGlZb0pqVWxFbWt4cWIzcGc&single=true&gid=0&output=csv" | |
| require(RCurl) | |
| # if you get an error here see these | |
| # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
| # http://www.statmethods.net/interface/packages.html | |
| # http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
| options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE)) | |
| myCsv <- getURL(googsheet) | |
| data <- read.csv(textConnection(myCsv), stringsAsFactors = FALSE) | |
| # compute interpolated ages to use as the y-axis | |
| # interpolate age from loess line with depth | |
| dates <- c(0, as.numeric(na.omit(data$C14.dates.calibrated.median)), 7000) | |
| depths <- c(0, as.numeric(na.omit(data$date.depth)) ,2.5) | |
| sequ <- seq(0, 2.5, 0.01) | |
| age.lo <- loess(dates ~ depths) | |
| age.pr <- predict(age.lo, newdata = data.frame(depths = sequ)) | |
| plot(age.pr) | |
| age.pr <- data.frame(age = as.numeric(age.pr), depths = sequ) | |
| # merge interpolated ages back in with geoarch data | |
| data1 <- merge(age.pr, data, by.x = 'depths', by.y = 'Sample.ID') | |
| # extract just data for plotting: pH, SOM, CaCO3, MS-LF, MS-FD | |
| plotting_data <- (data1[,c('depths', 'age', | |
| 'Average.pH', | |
| 'mean.EC..automatically.calcuated.', | |
| 'LOI.percent.organic.material..Average', | |
| 'LOI.Percent.carbonate.content..Average', | |
| 'MS.LF.reading.average.mass.susceptibility', | |
| 'MS.FD', | |
| 'mean.dC13VPDB.of.duplicate.samples' | |
| )])[1:48,] | |
| # rename the columns of the plotting data with shorter names and measurement units | |
| names(plotting_data) <- c("depth (m)", "age", "pH", "EC", "Soil organic \nmatter \n(% mass)", "Carbonates \n(% mass)", "Magnetic \nsusceptibility \n(SI units)", "Frequency \ndependency \n(%)", "dC13VPDB") | |
| # replace missing data in google sheet with 0 so it can be handled by the plotting function | |
| plotting_data[plotting_data == '#DIV/0!'] <- 0 | |
| # convert all the numbers to numbers (some columns are in character type) | |
| plotting_data <- as.data.frame(sapply(plotting_data, as.numeric, as.character)) | |
| # create vector for depth of samples | |
| depth <- na.omit(as.numeric(plotting_data$`depth (m)`)) | |
| # create vector for age of samples | |
| age <- as.numeric(na.omit(as.numeric(plotting_data$age)) / 1000) | |
| # create data frame of eveything except depth | |
| plotting_data <- plotting_data[,-(names(plotting_data) == 'depth (m)')] | |
| # setup for plotting | |
| library(rioja) # if you get an error here, see these | |
| # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
| # http://www.statmethods.net/interface/packages.html | |
| # http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
| #### plain plot by age (best one to use) | |
| x <- strat.plot(plotting_data[1:length(age),-1], | |
| yvar = age, | |
| y.rev = TRUE, | |
| ylabel = "calibrated years BP x 1000", | |
| col.line = "black", # try others that you like | |
| lwd.line = 1, # ditto | |
| cex.xlabel = 0.75 | |
| ) | |
| dev.off() # clear that plot | |
| #### plain plot by depth | |
| x <- strat.plot(plotting_data[1:length(age),-1], | |
| yvar = depth, | |
| y.rev = TRUE, | |
| ylabel = "depth below surface (m)", | |
| col.line = "black", # try others that you like | |
| lwd.line = 1, # ditto | |
| cex.xlabel = 0.75 | |
| ) | |
| dev.off() # clear that plot | |
| #### slightly more fancy plot: with dendrogram showing groups of similar samples | |
| ?chclust # to get information how the clustering is done | |
| # add a dendrogram from constrained cluster analysis | |
| # coniss is a stratigraphically constrained cluster analysis by | |
| # method of incremental sum of squares | |
| diss <- dist(plotting_data[1:length(age),-1]) | |
| clust <- chclust(diss, method = "coniss") | |
| # broken stick model to suggest significant zones, 3? | |
| bstick(clust) | |
| # plot with clusters | |
| x <- strat.plot(plotting_data[1:length(age),-1], | |
| yvar = age, | |
| y.rev = TRUE, | |
| ylabel = "calibrated years BP x 1000", | |
| col.line = "black", # try others that you like | |
| lwd.line = 1, # ditto | |
| cex.xlabel = 0.75, | |
| clust = clust, | |
| ) | |
| # add lines to indicate clusters, leave out if it looks odd | |
| addClustZone(x, clust, 5, col = "red") | |
| dev.off() # clear that plot | |
| ############### end of working with carbon isotope data ############################ | |
| #################################################################################### | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment