benmarwick · December 18, 2015 08:19
diff --git a/SP13_ARCHY_499_Geoarch.R b/SP13_ARCHY_499_Geoarch.R
 ############################################################
 # Analysis of camsizer data on sediment samples
 #
 # clear the workspace
 rm(list = ls())
 #  See the bottom to import data if you're coming back 
 # after having done all this previously, if this is the first 
 # run, carry on from here!
 #
 # Tell R where to look for the .xls data files, change this 
 # for your own computer
 setwd("C:/Users/marwick/UW-Dropbox/Dropbox/Marwick SP13/all_camsizser_xls")
 #
 # make a list of all xls files in working directory
 CamsizerFiles <- list.files(pattern="xls$") 
 # check the list
 CamsizerFiles 
 # make a list that includes the data from each of these xls files
 # this make take a minute or two...
 install.packages("gdata") # you may need to install Perl also
 library(gdata); library(plyr)
 CamsizerList <- llply(CamsizerFiles, read.xls, .progress = "text")
 # check that this read all the files in the list, if TRUE
 # all is well
 identical(length(CamsizerFiles), length(CamsizerList))
 # find out how many files in list, should be the same as
 # the number of samples run through the machine
 length(CamsizerList) 
 # loop through files to get vectors for all particle size
 # distributions from the 'retained' column for each sample
 install.packages("stringr")
 library(stringr)
 retained <- data.frame(sapply(seq(1:length(CamsizerList)), function(x) as.numeric(str_extract(CamsizerList[[x]][,3][c(16:66)], "[.0-9]+"))))
 # col names for the samples 
 n <- 2 # number of replicate samples
 # vector of names taken from lab notebook
 IDs <- c(0.05, 0.25, 0.43, 0.65, 0.85, 1.05, 1.25, 1.50,   # Tuesday 21 May, CG, 10-25
         0.1,  0.3,  0.5,  0.7,  0.9,  1.1,  1.3,  1.55,   # Tuesday 21 May, RS, 26-41
         1.7,  1.75, 1.9,  1.95, 2.1,  2.15, 2.35, 2.4,    # Friday 31 May CG, 42-57 (no 58, it's a triplicate)
         0.15, 0.34, 0.54, 0.75, 0.95, 1.15,               # Tuesday 4 June, SB, 59-70
         0.2,  0.4,  0.6,  0.8,  1.0,  1.2,                # Wednesday 5 June, VD 71-82
         1.4,  1.6,  1.8,  2.0,  2.2,  2.45, 1.45, 1.65,   # Thursday 6 June, VFD, 83-106
         1.85, 2.05, 2.3,  2.5)                            # Thursday 6 June, VFD, 83-106


 names(retained) <- rep(IDs, each = n, len = (ncol(retained)))
 # get averages of consecutive runs to get a per-sample
 # average values for 'retained', each col is a sample
 # takes averages of all cols with the same name
 retained.av <- data.frame(t(apply(retained, 1, tapply, names(retained), mean)) )

 # get vector of size classes to use as plot axis labels
 SizeClasses <- as.numeric(str_extract(CamsizerList[[1]][,1][c(16:66)], "[.0-9]+"))
 # combine size classes with sample particle size data
 retained.av.t <- data.frame(t(retained.av))
 names(retained.av.t) <- SizeClasses
 retained.av.t$sampleID <- as.numeric(str_extract(rownames(retained.av.t), "[.0-9]+")) 

 # reshape for plotting...
 install.packages("reshape2")
 library(reshape2)
 retained.av.m <- melt(retained.av.t, id.vars = "sampleID")
 retained.av.m$variable <-  as.numeric(as.character(retained.av.m$variable)) * 1000

 #
 # plot particle size distributions by sample
 install.packages("ggplot2")
 library(ggplot2)

 # smoothed lines, but with linear x-axis scale, notice the bunching up at the LHS
 ggplot(retained.av.m, aes(variable, value, group = sampleID, colour = as.factor(sampleID)) )+ 
  geom_smooth(span = 0.2, se = FALSE ) +
  xlab("Particle Size (um)") + 
  ylab("percent retained")


 # not smoothed lines but with a log scale on the x-axis
 ggplot(retained.av.m, aes(variable, value, group = sampleID, colour = as.factor(sampleID)) )+ 
  geom_line() +
  scale_x_log10() + 
  xlab("Particle Size (um)") + 
  ylab("percent retained")
 #
 # If you want to use this plot (or any of the others), as 
 # in to inlcude in your lab report, the best way to 
 # export it is to go to the plot  window in RStudio 
 # and click the little 'Export' button
 # and then 'save plot as image'. Don't copy-paste as this
 # captures a very low-res image.
 #
 # A more fancy plot ... notice the different way the legend is made....
 xaxbreaks <-   c(4000, 2000, 1000, 500, 250, 125, 63, 31, 16, 8, 4, 2, 0.5, 0.12, 0.04)
 #
 ggplot(retained.av.m, aes(group = sampleID)) + 
  scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) +
  geom_rect(aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.05) +
  geom_rect(aes(xmin=4, xmax=63, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.03) +
  geom_rect(aes(xmin=63, xmax=Inf, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.001) +
  geom_vline(xintercept = c(63, 4), colour = 'grey40', size = 1) +
  xlab("Particle Size (um)") +   
  geom_line(aes(x=variable, y = value, colour = sampleID)) +
  geom_text(aes(x=2,y=max(retained.av.m$value),label = "Clay")) +
  geom_text(aes(x=40,y=max(retained.av.m$value),label = "Silt")) +
  geom_text(aes(x=1900,y=max(retained.av.m$value),label = "Sand")) +
  coord_cartesian(xlim = c(30,4000)) +
  ylab("percent retained")
 #
 # If you just want one sample in the plot
 # choose your sample here, first see the avaiable 
 # sample names you can select
 unique(retained.av.m$sampleID)
 #
 # Now take one of those sample names and put them in place
 # of 10 here
 i <- subset(retained.av.m, retained.av.m$sampleID=="0.1")
 #
 ggplot(i, aes(variable, value)) + 
  scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) +
  geom_rect(aes(xmin=0, xmax=4, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.05) +
  geom_rect(aes(xmin=4, xmax=63, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.03) +
  geom_rect(aes(xmin=63, xmax=Inf, ymin=-Inf, ymax=Inf), fill="white", alpha = 0.001) +
  geom_vline(xintercept = c(63, 4), colour = 'grey40', size = 1) +
  xlab("Particle Size (um)") +   
  geom_line(aes(x=variable, y = value)) +
  geom_text(aes(x=2,y=max(retained.av.m$value),label = "Clay")) +
  geom_text(aes(x=40,y=max(retained.av.m$value),label = "Silt")) +
  geom_text(aes(x=1900,y=max(retained.av.m$value),label = "Sand")) +
  coord_cartesian(xlim = c(30,4000)) +
  ylab("percent retained")
 #
 # Now we will install grain size stats package, to numerically
 # summarise the particle size data
 install.packages("G2Sd")
 library("G2Sd")
 #
 # Now we need to get our data in shape for the statistical 
 # analysis: rownames as sieve aperture sizes and samples as 
 # columns
 # transpose...

 retained.av.t.t <- data.frame(t(retained.av.t))

 # put sample names as column names
 names(retained.av.t.t) <- retained.av.t$sampleID

 # remove sample names from data (in last row)
 retained.av.t.t <- retained.av.t.t[-nrow(retained.av.t.t),]
 # convert size classes to proper units (um) to make rownames
 rownames(retained.av.t.t) <- sapply(SizeClasses, function(x) x*1000)
 # now do the calculations to get summary statistics on the 
 # particle size distributions... If modes = TRUE then
 # the function will wait for
 # you to click on the modal point of each plot and then
 # press ESC to move to the next plot. Once you've picked
 # the modes of all plots it will finish. 
 dat_granstat <- as.data.frame(t(granstat(retained.av.t.t, statistic="arithmetic", aggr=TRUE, modes=FALSE)))
 #
 # Now you've got the summary statistics which you might want
 # to present in a table in your report. You should look at 
 # these to pick out samples that represent extremes, ie.
 # the highest and lowest means, st. devs, etc.  
 #
 # subset for plotting
 dat_granstat_sub <- dat_granstat[,c(1:4)]
 # convert all columns to numeric
 dat_granstat_sub <- data.frame(apply(dat_granstat_sub, 2, as.numeric, as.character))
 # set colnames for labelling the plot 
 colnames(dat_granstat_sub) <- c("Mean (um)", "standard \ndeviation (um)", "skewness \n(um)", "kurtosis (um)")

 depths <- retained.av.t$sampleID
 # now plot these...
 install.packages("analogue", repos="http://R-Forge.R-project.org")
 library(analogue)
 Stratiplot(x = dat_granstat_sub,
           y = depths,
           type = c("h", "l"), 
           ylab = "sample",
           varTypes = "absolute")

 # use a different style...
 # setup for plotting
 install.packages("rioja")
 library(rioja)

 #### plain plot 

 strat.plot(dat_granstat_sub,
           yvar = depths, 
           y.rev = TRUE, 
           ylabel = "Depth below surface (m)", 
           col.line = "grey", # try others that you like
           lwd.line = 2       # ditto
 )


 ########################################################################
 # Now combine camsizer data with pipette data #

 # connect to google sheet
 require(RCurl)
 options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE))
 goog <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFhPRHYteGlZb0pqVWxFbWt4cWIzcGc&output=csv"
 library(RCurl)
 pipette <- read.csv(textConnection(getURL(goog)), stringsAsFactors = FALSE)

 # percent retained for camsiser size classes
 perc <- as.vector(na.omit(pipette[,124])) / 100
 cam <- sweep(retained.av.t, MARGIN=1, perc, `*`) 
 # to check rowSums(cam) should be similar to perc * 100

 # percent retained for pipette size classes

 # just get the cols with pipette data and drop rows of NA
 pipette <- na.omit(pipette[,146:151])
 # edit colnames
 names(pipette) <- as.numeric(str_extract(names(pipette), "[.0-9]+")) 
 # combine camsizer and pipette
 comb <- cbind( pipette, cam[,-ncol(cam)] )
 rowSums(comb)


 # plot



 #
 # That's the end of the analysis and visualisation. 
 ############################################################
 # To save all the data objects you've made here, 
 # change the directory to some location on your computer, 
 # note that R uses forward slashes instead of backslashes
 save.image("F:/My Documents/My UW/Teaching/.../camsizer_data.Rdata")
 #
 # The if you need to re-load the data objects you just run
 # this line, with the appropriate directory (ie. same as above)
 #
 load("F:/My Documents/My UW/Teaching/..../camsizer_data.Rdata")
 #
 #
 ###########################################################
 # For my future reference, below is a VBA macro I used to convert
 # all those xle files that the camsizer produces into xls files
 # This has to be run from within Excel as a module in a sheet
 ############################################################  
 Sub OpenAllFilesInFolder()
 Dim path As Variant
 Dim excelfile As Variant
 path = "F:\My Documents\My UW\Teaching\...\"     
 excelfile = Dir(path & "*.xle")
 Do While excelfile <> ""
 Workbooks.Open Filename:=path & excelfile
 excelfile = Dir
 ActiveWorkbook.SaveAs Filename:="F:\My Documents\My UW\Teaching\... \" & excelfile & ".xls", _
 FileFormat:=56, Password:="", WriteResPassword:="", _
 ReadOnlyRecommended:=False, CreateBackup:=False
 ActiveWindow.Close
 Loop
 End Sub
 ############################################################

 ############################################################
 # R script for plotting of basic geoarchaeological data
 # pH, EC, Mag Sus, LOI

 #### get data
 # this block will connect to the internet to access our google sheet
 # then download it directly into a dataframe ready to work on
 # If this is the first time, a little preparation is necessary:
 # In google sheet: File -> Publish to web -> publish all sheets; 
 # Get link to published data -> CSV, all sheets, copy-paste link here:
 googsheet <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFhPRHYteGlZb0pqVWxFbWt4cWIzcGc&single=true&gid=0&output=csv"
 install.packages("RCurl") # only need to do this once per computer
 require(RCurl)
 options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE))
 myCsv <- getURL(googsheet)
 data <- read.csv(textConnection(myCsv), stringsAsFactors = FALSE)

 # extract just data for plotting: pH, SOM, CaCO3, MS-LF, MS-FD
 plotting_data <- na.omit(data[,c('Sample.ID',
                                 'Average.pH', 
                                 'mean.EC..automatically.calcuated.',
                                 'LOI.percent.organic.material..Average', 
                                 'LOI.Percent.carbonate.content..Average',
                                 'MS.LF.reading.average.mass.susceptibility',
                                 'MS.FD'
                                 )])
 # rename the columns of the plotting data with shorter names and measurement units
 names(plotting_data) <- c("depth (m)", "pH", "EC", "Soil organic \nmatter \n(% mass)", "Carbonates \n(% mass)", "Magnetic \nsusceptibility \n(SI units)", "Frequency \ndependency \n(%)")
 # replace missing data in google sheet with 0 so it can be handled by the plotting function
 plotting_data[plotting_data == '#DIV/0!']  <- 0      
 # convert all the numbers to numbers (some columns are in character type)
 plotting_data <- as.data.frame(sapply(plotting_data, as.numeric, as.character))
 # create vector for depth of samples
 depth <- na.omit(as.numeric(plotting_data$`depth (m)`))
 # create data frame of eveything except depth
 plotting_data <- plotting_data[,-(names(plotting_data) == 'depth (m)')]

 # setup for plotting
 install.packages("rioja")
 library(rioja)

 #### plain plot 

 x <- strat.plot(plotting_data,
           yvar = depth, 
           y.rev = TRUE, 
           ylabel = "Depth below surface (m)", 
           col.line = "black", # try others that you like
           lwd.line = 1       # ditto
           )
 dev.off() # clear that plot

 #### slightly more fancy plot: with dendrogram showing groups of similar samples
 ?chclust # to get information how the clustering is done
 # add a dendrogram from constrained cluster analysis
 # coniss is a stratigraphically constrained cluster analysis by 
 # method of incremental sum of squares
 diss <- dist(plotting_data)
 clust <- chclust(diss, method = "coniss")
 # broken stick model to suggest significant zones, 3?
 bstick(clust)
 # plot with clusters
 x <- strat.plot(plotting_data,
                yvar = depth, 
                y.rev = TRUE, 
                ylabel = "Depth below surface (m)", 
                col.line = "grey", # try others that you like
                lwd.line = 2,      # ditto
                clust = clust, 
                ) 
 # add lines to indicate clusters, leave out if it looks odd
 addClustZone(x, clust, 6, col = "pink")
 dev.off() # clear that plot

 #### even more fancy plot: with dendrogram and Detrended Correspondence Analysis
 ?decorana # get information about DCA
 # make first half of the plot, just the data
 x <- strat.plot(plotting_data,
                yvar = depth, 
                y.rev = TRUE, 
                ylabel = "Depth below surface (m)", 
                col.line = "grey", # try others that you like
                lwd.line = 2,      # ditto 
                xRight = 0.7)      # shift to the right to make room for the next bit...

 # calculate curves for first two DCA components of our data
 dca <- decorana(abs(plotting_data), iweigh = 0) # 
 sc <- scores(dca, display = "sites", choices = 1:2)
 # add second part of the plot, the DCA curves
 x <- strat.plot(sc,
                yvar = depth, 
                y.rev = TRUE, 
                ylabel = "Depth below surface (m)", 
                col.line = "grey", # try others that you like
                lwd.line = 2,      # ditto
                clust = clust, 
                clust.width=0.08, 
                add=TRUE, 
                xLeft = 0.7, xRight = 0.99,
                y.axis=FALSE)
 dev.off() # clear that plot


 #### biplots: for plotting two of the variables to explore their relationship
 library(scales)
 library(ggplot2)
 library(stringr)
 # change what's in the "": lookback up to line 25 for the proper names
 x <- as.numeric(as.character(plotting_data [,which(colnames(plotting_data)=="Carbonates \n(% mass)")]))
 y <- as.numeric(as.character(plotting_data [,which(colnames(plotting_data )=="Soil organic \nmatter \n(% mass)")]))
 #
 #
 # change the xlab and ylab below
 tmp <- data.frame(cbind(x,y))
 ggplot(tmp, aes(x, y, label=depth)) +                            # make the basic plot object
  scale_x_continuous(limit=c(min(tmp$x),max(tmp$x)), 
                     breaks=round(fivenum(tmp$x),1)) +           # set the locations of the x-axis labels
  # change x label to match the data you've selected:
  xlab(paste("Carbonates (% mass, r = ", round(cor(x,y),4),", p-value = ", 
             round(anova(lm(y~x))$'Pr(>F)'[1],4),")",sep="")) +  # paste in the r and p values with the axis label
  scale_y_continuous(limit=c(min(tmp$y),max(tmp$y)),
                     breaks=round(fivenum(tmp$y),1)) +           # set the locations of the y-axis labels
  # change y label to match the data you've selected:
  ylab("Soil Organic Matter (% mass)") +                   
  geom_text() +                 # specify that the data points are the sample names
  geom_rug(size=0.1) +         # specify that we want the rug plot
  geom_smooth(method="lm", se=FALSE, colour="grey80", aes(group=1)) + 
  # specify a line of best fit calculated by a linear model
  # se is the standard error area, change to TRUE to display it
  theme(panel.background = element_blank(),          # suppress default background
       panel.grid.major = element_blank(),              # suppress default major gridlines
       panel.grid.minor = element_blank(),              # suppress default minor gridlines
       axis.ticks = element_blank(),                    # suppress tick marks
       axis.title.x=element_text(size=12),              # increase axis title size slightly  
       axis.title.y=element_text(size=12, angle=90),    # increase axis title size slightly and rotate
       axis.text.x=element_text(size=12),               # increase size of numbers on x-axis
       axis.text.y=element_text(size=12),               # increase size of numbers on y-axis
       aspect.ratio=1)                                  # force the plot to be square

 ##
 #
 # You may end up making a lot of charts like this to explore your data. But do not put them all in your lab report.


 #### saving plots for use in other documents

 # if the plot function starts with ggplot, you can use this:
 # commonly recommended image resolution is 600 dpi
 width <- 17
 dpi <- 600
 ggsave(
  "filename.png",
  units = "cm",
  width = width,
  height = width, # for a square plot
  dpi = dpi)
 # find the location of this file on your computer
 getwd()

 # best method: save in vector graphics format
 svg('filename.svg')
 # make plot,  run the relevant lines above (no plot will appear)
 # open in Inkscape, tweak, then 'Export Bitmap'
 dev.off()
 # to find the location of the file you just made:
 getwd()

 # quick and dirty method: 
 png('filename.png', h = 4500, w = 4500*1.681, res = 600) # this looks good on my screen
 # you might fiddle with the h and w numbers to make it good on yours 
 # make plot, run the relevant lines above (no plot will appear)
 dev.off()
 # to find the location of the file you just made:
 getwd()


 ###############################################################
 ###############################################################
 ####### working with Carbon Isotope Data from UW IsoLab #######
 # Get the txt file that they emailed you and save it somewhere handy
 # Open Excel or similar spreadsheet program
 # In the spreadsheet program, go to File -> Open
 # find the txt file (you may need to change 'Excel Files' to 
 # 'All Files' in the Open File dialog) and open it
 # choose 'delimited' and 'tab' as the separator
 # inspect the file in the spreadsheet program to see
 # that it looks ok
 # Save the file in the spreadsheet program as a CSV file
 # We'll use this CSV file in R

 # This line will pop up a 'file open' window
 carb <-  read.csv(file.choose(), header=TRUE)
 # find the location of the CSV file, click on it
 # and click open

 # Let's just extract the sample names and the dC13VPDB values
 # by getting on the rows and columns with those data
 carb1 <- data.frame(Sample.ID = as.character(carb[44:74,3]),
                    dC13VPDB = as.numeric(as.character(carb[44:74,'X.18'])))

 # Extract the depths from the sample IDs for plotting
 # first, remove text characters
 tmp <- sub("GHM_", "", as.character(carb1$Sample.ID))
 # second, remove "_" to convert all to centimeters
 tmp1 <- sub("_", "", tmp)
 # third, conver to numeric
 tmp2 <- as.numeric(as.character(tmp1))
 # fourth convert to meters
 tmp3 <- tmp2 / 100
 # put back into table
 carb1$Sample.ID <- tmp3 

 # plot each data point to inspect within-sample variation
 require(ggplot2) # if you get an error here, see these
 # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html
 # http://www.statmethods.net/interface/packages.html
 # http://www.youtube.com/watch?v=u1r5XTqrCTQ
 ggplot(carb1, aes( dC13VPDB, Sample.ID)) +
  geom_point() + 
  scale_y_reverse() +
  theme(axis.text.x = element_text(size = 10)) + 
  theme(axis.text.y = element_text(size = 10)) +
  ylab("Depth below surface (m)") +
  xlab(bquote(delta^13*C[SOM]~"\u2030"~VPDB)) + 
  coord_equal(ratio=2) 

 # we can see that the replicates are very similar
 # and there does seem to be some trend in the data

 # Compute the mean value for replicates of each sample
 carb1.mean <- aggregate(dC13VPDB ~ Sample.ID, data = carb1, FUN = mean)

 # plot mean values
 require(ggplot2)
 ggplot(carb1.mean, aes( dC13VPDB, Sample.ID)) +
  geom_point() + 
  scale_y_reverse() +
  theme(axis.text.x = element_text(size = 10)) + 
  theme(axis.text.y = element_text(size = 10)) +
  ylab("Depth below surface (m)") +
  xlab(bquote(delta^13*C[SOM]~"\u2030"~VPDB)) + 
  coord_equal(ratio=2)


 #### get other geoarch data for GMH
 # this block will connect to the internet to access our google sheet
 # then download it directly into a dataframe ready to work on
 # If this is the first time, a little preparation is necessary:
 # In google sheet: File -> Publish to web -> publish all sheets; 
 # Get link to published data -> CSV, all sheets, copy-paste link here:
 googsheet <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldFhPRHYteGlZb0pqVWxFbWt4cWIzcGc&single=true&gid=0&output=csv"
 require(RCurl)
 # if you get an error here see these
 # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html
 # http://www.statmethods.net/interface/packages.html
 # http://www.youtube.com/watch?v=u1r5XTqrCTQ
 options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE))
 myCsv <- getURL(googsheet)
 data <- read.csv(textConnection(myCsv), stringsAsFactors = FALSE)

 # compute interpolated ages to use as the y-axis
 # interpolate age from loess line with depth
 dates <- c(0, as.numeric(na.omit(data$C14.dates.calibrated.median)), 7000)
 depths <- c(0, as.numeric(na.omit(data$date.depth)) ,2.5)
 sequ <- seq(0, 2.5, 0.01)
 age.lo <- loess(dates ~ depths)
 age.pr <- predict(age.lo, newdata = data.frame(depths = sequ))
 plot(age.pr)
 age.pr <- data.frame(age = as.numeric(age.pr), depths = sequ)
 # merge interpolated ages back in with geoarch data
 data1 <- merge(age.pr, data, by.x = 'depths', by.y = 'Sample.ID')

 # extract just data for plotting: pH, SOM, CaCO3, MS-LF, MS-FD
 plotting_data <- (data1[,c('depths', 'age',
                           'Average.pH', 
                           'mean.EC..automatically.calcuated.',
                           'LOI.percent.organic.material..Average', 
                           'LOI.Percent.carbonate.content..Average',
                           'MS.LF.reading.average.mass.susceptibility',
                           'MS.FD', 
                           'mean.dC13VPDB.of.duplicate.samples' 
                           
 )])[1:48,]
 # rename the columns of the plotting data with shorter names and measurement units
 names(plotting_data) <- c("depth (m)", "age", "pH", "EC", "Soil organic \nmatter \n(% mass)", "Carbonates \n(% mass)", "Magnetic \nsusceptibility \n(SI units)", "Frequency \ndependency \n(%)", "dC13VPDB")
 # replace missing data in google sheet with 0 so it can be handled by the plotting function
 plotting_data[plotting_data == '#DIV/0!']  <- 0      
 # convert all the numbers to numbers (some columns are in character type)
 plotting_data <- as.data.frame(sapply(plotting_data, as.numeric, as.character))
 # create vector for depth of samples
 depth <- na.omit(as.numeric(plotting_data$`depth (m)`))
 # create vector for age of samples
 age <- as.numeric(na.omit(as.numeric(plotting_data$age)) / 1000)
 # create data frame of eveything except depth
 plotting_data <- plotting_data[,-(names(plotting_data) == 'depth (m)')]

 # setup for plotting
 library(rioja) # if you get an error here, see these
 # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html
 # http://www.statmethods.net/interface/packages.html
 # http://www.youtube.com/watch?v=u1r5XTqrCTQ

 #### plain plot by age (best one to use)
 x <- strat.plot(plotting_data[1:length(age),-1],
                yvar = age, 
                y.rev = TRUE, 
                ylabel = "calibrated years BP x 1000", 
                col.line = "black", # try others that you like
                lwd.line = 1,       # ditto
                cex.xlabel = 0.75
 )     
 dev.off() # clear that plot

 #### plain plot by depth
 x <- strat.plot(plotting_data[1:length(age),-1],
                yvar = depth, 
                y.rev = TRUE, 
                ylabel = "depth below surface (m)", 
                col.line = "black", # try others that you like
                lwd.line = 1,       # ditto
                cex.xlabel = 0.75
 )     
 dev.off() # clear that plot

 #### slightly more fancy plot: with dendrogram showing groups of similar samples
 ?chclust # to get information how the clustering is done
 # add a dendrogram from constrained cluster analysis
 # coniss is a stratigraphically constrained cluster analysis by 
 # method of incremental sum of squares
 diss <- dist(plotting_data[1:length(age),-1])
 clust <- chclust(diss, method = "coniss")
 # broken stick model to suggest significant zones, 3?
 bstick(clust)
 # plot with clusters
 x <- strat.plot(plotting_data[1:length(age),-1],
                yvar = age, 
                y.rev = TRUE, 
                ylabel = "calibrated years BP x 1000", 
                col.line = "black", # try others that you like
                lwd.line = 1,       # ditto
                cex.xlabel = 0.75,
                clust = clust, 
 ) 
 # add lines to indicate clusters, leave out if it looks odd
 addClustZone(x, clust, 5, col = "red")
 dev.off() # clear that plot


 ############### end of working with carbon isotope data ############################
 ####################################################################################
No results found