Last active
January 16, 2020 00:34
-
-
Save benmarwick/7151525 to your computer and use it in GitHub Desktop.
Code for working with Horbia LPSA output and Mag Sus data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########### working with the Horiba LA-950 data ###################### | |
# First step is to convert ngb files to txt, since ngb is not a format | |
# that R can read. | |
# 1. Open the LA-950 for windows program (I have a copy of the software you can have) | |
# 2. Go to the 'File List View' in the left panel, select all | |
# the files and right-click on them and select 'delete', then | |
# choose 'delete from file view' (not delete from computer) | |
# 3. Go to 'File List Utility' on the drop-down menus and click 'Import' | |
# 4. Click the checkbox 'include sub folders' then click 'change folder' | |
# and select the folder that contains other folders of ngb files | |
# 5 Select all the files in the 'Add/Update File List' window, then click 'Add/Update' | |
# Those files should appear in the 'File List View' to the left | |
# 6. Here's the painful bit... select each file, one by one and... | |
# you may be able to skip this if your graphs stop at 2000 um (because that's the seive size) | |
# On the main toolbar at the top | |
# of the screen select "Graph" then "Scale" and make | |
# sure it is scaled up to 3000. Make sure the 'reCalculation' | |
# box is checked also. After the graph is fixed, go | |
# to the main file menu and 'File' then 'Save As' | |
# for each ngb file. Go through each file, one by one | |
# and update them. Then look through the 'File List View', | |
# select all the non-converted files and right-click on | |
# one of the selected ones to delete them all. It's a real nuisance... | |
# 7. click on 'Export' in the main menu bar, then click | |
# 'Conversion', change 'Data Type' to Comman and the click the 'SetUp' | |
# button. Using the little arrow buttons, move everything from | |
# the RIGHT side to the LEFT side, except for 'Result File Name' and | |
# click 'OK'. Then click 'OK' on the 'Data Conversion' window. | |
# 8. In the 'File List View', select all the files you want to convert to txt | |
# Then click on 'Export' in the 'File List View' panel | |
# 9. Choose 'Unit File' then 'OK', this will make a single text file from | |
# all the selected ngb files | |
# Now we're ready to get these files into R! | |
## Get txt files from Dropbox into R | |
data <- read.csv("C:\\Users\\marwick\\UW-Dropbox\\Dropbox\\PL1_LPSA_data_WI14\\PL1_LPSA_data_WI14.txt", header = FALSE, stringsAsFactors = FALSE) | |
# delete first row and first 23 columns to get only sample | |
# names, size classes and sample data. We're also removing | |
# column 25 and the very last column because they are empty | |
data1 <- data[-1,-c(1:22, 25, ncol(data))] | |
# fill first column with numbers and give it a name, this | |
# will be useful later | |
# data1[,1] <- c(1:nrow(data1)) | |
# names(data1)[1]<-c("num") | |
# convert a few errant characters to numbers | |
data1[,c(3:ncol(data1))] <- as.numeric(as.character(unlist(data1[,c(3:ncol(data1))]))) | |
# reshape to long form | |
data1_l <- reshape(data1, idvar=1:2, | |
varying=list(size=colnames(data1[seq(from=3, to=ncol(data1), by=3)]), | |
meas=colnames(data1[seq(from=5, to=ncol(data1), by=3)])), | |
direction="long") | |
# extract just measurements, size classes and sample labels | |
size <- as.numeric(na.omit(unique(data1_l$V26))) | |
measurements <- data1_l[,c(which( colnames(data1_l)=="V24" ), which( colnames(data1_l)=="V27" ) : which( colnames(data1_l)=="V303"))] | |
names(measurements) <- c("sample.id", size) | |
require(reshape2) | |
data2 <- melt(measurements, id.vars = 'sample.id' ) | |
data2$variable <- as.numeric(as.character(data2$variable)) | |
data2$value <- as.numeric(as.character(data2$value)) | |
# plot to have a quick look, all samples overlaid | |
# No - this takes a very long time for a few hundred files... | |
# xaxbreaks <- c(4000, 2000, 1000, 500, 250, 125, 63, 31, 16, 8, 4, 2, 0.5, 0.12, 0.04) | |
require(ggplot2) # if you get an error here, see these | |
# http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# http://www.statmethods.net/interface/packages.html | |
# http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
# ggplot(data2, aes(group = sample.id, variable, value)) + | |
# geom_line() + | |
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
# scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
# geom_vline(xintercept = c(62, 4), colour = 'grey40', size = 1) + | |
# xlab("Particle Size (um)") + | |
# geom_text(aes(x=2,y=max(data2$value),label = "Clay")) + | |
# geom_text(aes(x=30,y=max(data2$value),label = "Silt")) + | |
# geom_text(aes(x=1900,y=max(data2$value),label = "Sand")) | |
# inspect for differences between replicates | |
# ggplot(data2, aes(sample.id, value)) + | |
# geom_boxplot() + | |
# scale_y_log10() | |
# any significant difference between the replicates? | |
# require(coin) # if you get an error here, see these | |
# http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# http://www.statmethods.net/interface/packages.html | |
# http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
# oneway_test(value ~ as.factor(sample.id), data = data2, | |
# distribution = approximate(B = 99)) | |
# # compute mean values for each sub-sample | |
# # replace the pattern of digit-rs with nothing to group replicates together | |
# measurements1 <- measurements | |
# measurements1$sample.id <- gsub("[[:digit:]]-rs", "", measurements1$sample.id) | |
# # get averages of multiple runs on the same sub-sample | |
# measurements1_means <- aggregate(. ~ sample.id,data = measurements1, mean) | |
# | |
# # rehsape for plotting | |
# require(reshape2) # if you get an error here, see these | |
# # http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# # http://www.statmethods.net/interface/packages.html | |
# # http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
# data3 <- melt(measurements1_means, id.vars = 'sample.id' ) | |
# data3$variable <- as.numeric(as.character(data3$variable)) | |
# data3$value <- as.numeric(as.character(data3$value)) | |
# | |
# # plot means of each sub-sample | |
# xaxbreaks <- c(4000, 2000, 1000, 500, 250, 125, 63, 31, 16, 8, 4, 2, 0.5, 0.12, 0.04) | |
# | |
# library(ggplot2) | |
# # ggplot(data3, aes(group = sample.id, variable, value)) + | |
# # geom_line() + | |
# # theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
# # scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
# # geom_vline(xintercept = c(62, 4), colour = 'grey40', size = 1) + | |
# # xlab("Particle Size (um)") + | |
# # geom_text(aes(x=2,y=max(data2$value),label = "Clay")) + | |
# # geom_text(aes(x=30,y=max(data2$value),label = "Silt")) + | |
# # geom_text(aes(x=1900,y=max(data2$value),label = "Sand")) | |
# | |
# compute mean values for each sample | |
# replace the pattern of digit-digit-rs with nothing to group replicates together | |
measurements2 <- measurements | |
############ ALERT ######### | |
############ ALERT ######### ############ ALERT ######### | |
# The regex on the next line is dependant on | |
# the sample labelling scheme -- pay atention here! The goal is to remove the | |
# sub-sample and machine run numbers from the row ID so we just are left | |
# with the bag label | |
# for MKII | |
# measurements2$sample.id <- gsub("[[:digit:]]-[[:digit:]]-rs", "", measurements2$sample.id) | |
# for PL1 | |
measurements2$sample.id <- gsub("-[[:digit:]]-[[:digit:]]", "", measurements2$sample.id) | |
# get averages of multiple runs on the same sub-sample | |
measurements2_means <- aggregate(. ~ sample.id, data = measurements2, mean) | |
install.packages("data.table",repos="http://R-Forge.R-project.org", type="source") | |
library(data.table) | |
DT <- data.table(measurements2) | |
measurements2_means <- DT[, lapply(.SD, mean), by = sample.id] | |
# reshape for plotting | |
require(reshape2) | |
data4 <- melt(measurements2_means, id.vars = 'sample.id' ) | |
data4$variable <- as.numeric(as.character(data4$variable)) | |
data4$value <- as.numeric(as.character(data4$value)) | |
# plot means of each sample | |
xaxbreaks <- c(4000, 2000, 1000, 500, 250, 125, 63, 31, 16, 8, 4, 2, 0.5, 0.12, 0.04) | |
library(ggplot2) | |
# ggplot(data4, aes(group = sample.id, variable, value)) + | |
# geom_line() + | |
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
# scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
# geom_vline(xintercept = c(62, 4), colour = 'grey40', size = 1) + | |
# xlab("Particle Size (um)") + | |
# geom_text(aes(x=2,y=max(data2$value),label = "Clay")) + | |
# geom_text(aes(x=30,y=max(data2$value),label = "Silt")) + | |
# geom_text(aes(x=1900,y=max(data2$value),label = "Sand")) | |
# plot by facets (one sample per panel) | |
ggplot(data4, aes(group = sample.id, variable, value)) + | |
geom_line() + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
geom_vline(xintercept = c(62, 4), colour = 'grey40', size = 1) + | |
xlab("Particle Size (um)") + | |
geom_text(aes(x=2,y=max(data2$value),label = "Clay")) + | |
geom_text(aes(x=30,y=max(data2$value),label = "Silt")) + | |
geom_text(aes(x=1900,y=max(data2$value),label = "Sand")) + | |
facet_wrap(~ sample.id) | |
# # how to plot just one sample at a time... | |
# samp <- "MKII-2012-SW-sec-S-1.00-" # change this to get a different sample | |
# data5 <- data4[ data4$sample.id == samp, ] | |
# | |
# library(ggplot2) | |
# ggplot(data5, aes(group = sample.id, variable, value)) + | |
# geom_line() + | |
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
# scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
# geom_vline(xintercept = c(62, 4), colour = 'grey40', size = 1) + | |
# xlab("Particle Size (um)") + ylab("Percent mass") + | |
# geom_text(aes(x=2,y=max(data2$value),label = "Clay")) + | |
# geom_text(aes(x=30,y=max(data2$value),label = "Silt")) + | |
# geom_text(aes(x=1900,y=max(data2$value),label = "Sand")) | |
# | |
# # how to plot just a few samples at a time... | |
# | |
# # change what's in the c() to get different samples | |
# samps <- c("MKII-2012-SW-sec-S-1.00-", "MKII-2012-SW-sec-S-4.20-") | |
# data6 <- data4[ data4$sample.id %in% samps, ] | |
# library(ggplot2) | |
# ggplot(data6, aes(colour = sample.id, variable, value)) + | |
# geom_line(size = 1) + | |
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
# scale_x_log10(breaks = xaxbreaks, labels = xaxbreaks) + | |
# geom_vline(xintercept = c(62, 4), colour = 'grey40', size = 1) + | |
# xlab("Particle Size (um)") + ylab("Percent mass") + | |
# geom_text(aes(x=2,y=max(data2$value),label = "Clay")) + | |
# geom_text(aes(x=30,y=max(data2$value),label = "Silt")) + | |
# geom_text(aes(x=1900,y=max(data2$value),label = "Sand")) + | |
# theme_minimal() | |
# compute summary stats per sample | |
require(G2Sd) | |
# need to transpose table so sample = column and size class = row names | |
stat <- data.frame(t(measurements2_means), stringsAsFactors = FALSE) | |
# put sample names as col names | |
tmp <- as.vector(as.matrix(stat[1,1:ncol(stat)])[1,]) | |
# delete row with text | |
stat <- data.frame(stat[-1,]) | |
# create column to order values | |
stat$size <- (as.numeric(size)) | |
# order | |
stat <- stat[ order(-as.numeric(stat$size)), ] | |
# convert to data frame of numeric type | |
stat <- data.frame(apply(stat, 2, function(x) as.numeric(as.character(x)))) | |
# delete size column, not needed anymore | |
stat <- stat[, -which(names(stat) == 'size'), drop=FALSE] | |
# put colnames back on again | |
colnames(stat) <- tmp | |
# add last size class of zero | |
stat <- rbind(stat, c(rep(0, ncol(stat)))) | |
# add size classes as row name | |
rownames(stat) <- round(c(rev(size),0),5) | |
# Here we can compute sand-silt-clay %s | |
# using Wentworth classes... | |
# sand: 2000 - 62.5 um | |
# silt: 62.5 - 4 um | |
# clay: < 4 um | |
# add size column to subset by | |
stat$size <- as.numeric(as.character(rownames(stat))) | |
sand <- colSums(stat[ stat$size >= 62.5 & stat$size < 2000, ] ) | |
silt <- colSums(stat[ stat$size >= 4 & stat$size < 62.5, ] ) | |
clay <- colSums(stat[ stat$size >= 0 & stat$size < 4, ] ) | |
# combine into one data frame | |
three_classes <- data.frame(CLAY = clay, | |
SILT = silt, | |
SAND = sand) | |
# remove size row at the last row | |
three_classes <- three_classes[-nrow(three_classes),] | |
# THIS IS SENSITIVE TO SAMPLE LABELLING - TAKE CARE! | |
# all we want here is the bag number, no site names | |
row.names(three_classes) <- substr(row.names(three_classes), nchar(row.names(three_classes))-2, nchar(row.names(three_classes))) | |
# remove size column | |
stat <- stat[,!names(stat) == 'size'] | |
# plot samples in ternary plot | |
# see here for more details: | |
# http://cran.r-project.org/web/packages/soiltexture/vignettes/soiltexture_vignette.pdf | |
require(soiltexture) | |
dev.off() # clear plotting area | |
# draw triangle with samples plotted | |
# Display the USDA texture triangle: | |
geo <- TT.plot(class.sys="USDA.TT", | |
cex.main = 0.75, # these are font sizes, adjust as you like | |
cex.axis = 0.75, | |
cex.lab = 0.75) | |
# Display the text | |
TT.text( | |
tri.data = three_classes, | |
geo = geo, | |
labels = row.names(three_classes), | |
font = 2, | |
col = "blue", | |
tri.sum.tst = FALSE, | |
cex = 0.75 # size of point labels, feel free to adjust | |
) # | |
# get age values to use on y-axis of stratigraphic plot | |
# depths <- as.numeric(row.names(three_classes)) | |
# V_interpolator <- Vectorize(interpolator) | |
# ages <- c(0, unlist(V_interpolator(depths-1))) | |
# stratigraphic plot | |
names(three_classes) <- tolower(names(three_classes)) | |
three_classes <- three_classes[c("sand", "silt", "clay")] | |
dev.off() # clear plotting area | |
par(oma=c(2,2,2,2)) | |
require(rioja) | |
# if we have ages | |
# strat.plot(three_classes, | |
# yvar = ages, | |
# y.rev = TRUE, | |
# ylabel = "Cal. years BP (x 1000)", | |
# col.line = "black", # try others that you like | |
# lwd.line = 1, | |
# scale.percent=TRUE) | |
# | |
# dev.off() # clear plotting area | |
# par(oma=c(2,2,2,2)) | |
# | |
# strat.plot(three_classes, | |
# yvar = as.numeric(row.names(three_classes))-1, | |
# y.rev = TRUE, | |
# ylabel = "Depth below surface (m)", | |
# col.line = "black", # try others that you like | |
# lwd.line = 1, | |
# scale.percent=TRUE) | |
# when unsure about depths or ages | |
strat.plot(three_classes, | |
yvar = 1:nrow(three_classes), | |
y.rev = TRUE, | |
ylabel = "Depth below surface (m)", | |
col.line = "black", # try others that you like | |
lwd.line = 1, | |
scale.percent=TRUE) | |
# if the sample IDs are not in order, put them in order | |
three_classes$ID <- as.numeric(gsub("\\D", "", row.names(three_classes))) | |
library(dplyr) | |
# sort by ID | |
three_classes <- arrange(three_classes, ID) | |
# remove ID and put as row name | |
row.names(three_classes) <- three_classes$ID | |
three_classes <- three_classes[,!(names(three_classes) %in% "ID")] | |
# now plot with sample IDs in order | |
strat.plot(three_classes, | |
yvar = as.numeric(gsub("\\D", "", row.names(three_classes))), | |
y.rev = TRUE, | |
ylabel = "Depth below surface (m)", | |
col.line = "black", # try others that you like | |
lwd.line = 1, | |
scale.percent=TRUE) | |
#### slightly more fancy plot: with dendrogram showing groups of similar samples | |
?chclust # to get information how the clustering is done | |
# add a dendrogram from constrained cluster analysis | |
# coniss is a stratigraphically constrained cluster analysis by | |
# method of incremental sum of squares | |
diss <- dist(three_classes) | |
clust <- chclust(diss, method = "coniss") | |
# broken stick model to suggest significant zones, 3? | |
bstick(clust) | |
# plot with clusters | |
dev.off() # clear plotting area | |
par(oma=c(2,2,2,2)) | |
x <- strat.plot(three_classes, | |
yvar = ages, | |
y.rev = TRUE, | |
ylabel = "Cal. years BP (x 1000)", | |
col.line = "black", # try others that you like | |
lwd.line = 1, # ditto | |
clust = clust | |
) | |
# add lines to indicate clusters, leave out if it looks odd | |
addClustZone(x, clust, 3, col = "red") | |
dev.off() # clear that plot | |
# or if no dates | |
dev.off() # clear plotting area | |
par(oma=c(2,2,2,2)) | |
x <- strat.plot(three_classes, | |
yvar = as.numeric(gsub("\\D", "", row.names(three_classes))), | |
y.rev = TRUE, | |
ylabel = "Sample ID", | |
col.line = "black", # try others that you like | |
lwd.line = 1, | |
scale.percent=TRUE, | |
clust = clust) | |
# add lines to indicate clusters, leave out if it looks odd | |
addClustZone(x, clust, 5, col = "red") | |
dev.off() # clear that plot | |
# Now compute mean, sd and kurtosis | |
# Now compute mean, sd and kurtosis | |
# only allowed these sizes to compute the stats | |
gransize <- c(25000, 20000, 16000, 12500, 10000, 8000, | |
6300, 5000, 4000, 2500, 2000, 1600, 1250, 1000, 800, | |
630, 500, 400, 315, 250, 200, 160, 125, 100, 80, | |
63, 50, 40, 0) | |
# Interpolate from data to determine values at allowed sizes | |
revsize <- c(rev(size), 0) | |
# compute loess function for each sample | |
loes <- lapply(1:ncol(stat), function(x) loess(stat[,x] ~ revsize, stat)) | |
# generate predictions of values at allowed sizes for each sample | |
predi <- lapply(loes, function(i) predict(i, data.frame(revsize = gransize[gransize < 3000]))) | |
# draw plots | |
dev.off() # clear previous plot | |
# setup grid of plots | |
par( mfrow = c( 2, 3 ) ) | |
sapply(predi, function(i) plot(i)) | |
# make data frame | |
interp_table <- data.frame(predi) | |
# give row names | |
row.names(interp_table) <- gransize[gransize < 3000] | |
# remove column of sizes | |
# interp_table <- interp_table[, -which(names(interp_table) == 'revsize'), drop=FALSE] | |
# change -ve numbers to zero | |
interp_table[interp_table < 0 ] <- 0 | |
# give column names back for sample IDs | |
colnames(interp_table) <- tmp | |
# compute particle size stats | |
# This next line will make a table of commonly used | |
# grain size stats. We want mean, sd and skewness. | |
# Note that when modes is set to TRUE, | |
# then you have to use mouse to click on the modal | |
# point for each plot, then press escape to move on to | |
# the next sample's plot. Currently I've set the function | |
# to modes=FALSE so you don't have to worry about that. | |
# After the last sample all the data will be generated. | |
# Definitions of the terms used can be found here | |
# http://cran.r-project.org/web/packages/G2Sd/G2Sd.pdf or by | |
# typing ?granstat at the R prompt | |
stats <- as.data.frame(t(granstat(interp_table, statistic="all", aggr=TRUE, modes=FALSE))) | |
# subset table | |
stats <- stats[,c('mean.arith.um', | |
'sd.arith.um' , | |
'skewness.arith.um', | |
'kurtosis.arith.um')] | |
# convert to data frame of numeric type | |
stats <- data.frame(apply(stats, 2, function(x) as.numeric(as.character(x)))) | |
# give sensible row names | |
# rownames(stats) <- as.numeric(unlist(regmatches( colnames(psd_t),gregexpr("[[:digit:]]+\\.*[[:digit:]]*", colnames(psd_t))))) | |
# | |
require(rioja) | |
# ?chclust # to get information how the clustering is done | |
# add a dendrogram from constrained cluster analysis | |
# coniss is a stratigraphically constrained cluster analysis by | |
# method of incremental sum of squares | |
diss <- dist(stats) | |
clust <- chclust(diss, method = "coniss") | |
# broken stick model to suggest significant zones, 3? | |
# bstick(clust) | |
# plot with clusters | |
# par(ask = T) # prompt for user | |
x <- strat.plot(stats, | |
yvar = ages, | |
y.rev = TRUE, | |
ylabel = "Cal. years BP (x 1000)", | |
col.line = "black", # try others that you like | |
lwd.line = 1, # ditto | |
clust = clust, | |
) | |
# add lines to indicate clusters, leave out if it looks odd | |
addClustZone(x, clust, 3, col = "red") | |
# Now save the output of the statistical analysis | |
# as a csv file to open in Excel for easy copy-pasting. Note | |
# that the output is very extensive and you must not | |
# include all of it in your lab report. Be selective about | |
# what you take from this table to include in your lab report. | |
write.csv(stats, file="grain_size_stats.csv") | |
# | |
# find the folder where this new file was created | |
getwd() | |
############################################################################### | |
############ Interpolate age from depth using local fitting (aka loess) ####### | |
# Assuming that we have a data frame called 'dates' that has a numeric variable called 'Depth.m' | |
# which is depth in meters and another called 'cal.median' which is an age determination in kya... | |
span <- 0.45 # this has a big influence on the shape of the curve, experiment with it! | |
cal.date.lo <- loess(cal.median ~ Depth.m, dates, span = span) | |
cal.date.pr <- predict(cal.date.lo, data.frame(Depth.m = seq(0, 5, 0.01))) | |
plot(cal.date.pr) # inspect to seee that it looks right | |
cal.date.pr <- data.frame(age = unname(cal.date.pr), depth = as.numeric(names(cal.date.pr))) | |
# Function to determine an age from a known depth | |
# Function to determine an age from a known depth | |
interpolator <- function(i){ | |
cal.date.pr[cal.date.pr$depth == round(i*100),]$age # returns an estimated age in ky BP from loess interpolation | |
} | |
# Examples of use (replace the number in brackets with the depth you're | |
# interested in) | |
interpolator(0.4) # where 0.4 is the depth in meters | |
interpolator(1.0) # where 1.20 is the depth in meters | |
# and so on, note that you can't get the age of a depth below the lowest date | |
max(cal.date.pr$depth)/100 # depth of lowest date, in meters | |
interpolator( max(cal.date.pr$depth)/100 ) # age at max depth | |
############### End of working with Horiba LPSA data ########################################## | |
############################################################################################### | |
############## Working with Magnetic susceptibility data ###################################### | |
# get data from google sheet | |
# connect to google sheet | |
require(RCurl) # if you get an error here, see these | |
# http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# http://www.statmethods.net/interface/packages.html | |
# http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
options(RCurlOptions = list(capath = system.file("CurlSSL", "cacert.pem", package = "RCurl"), ssl.verifypeer = FALSE)) | |
#in google spreadsheet, go to file-> publish to web -> get link to publish to web -> get csv file | |
goog <- "https://docs.google.com/spreadsheet/pub?key=0As7CmPqGXTzldG1qZ2Z6UHJnZ2lyWUlFOGFpRVdTWHc&single=true&gid=0&output=csv" | |
data <- read.csv(textConnection(getURL(goog)), stringsAsFactors = FALSE) | |
# First, analyse low frequency mass suceptibility | |
data$LF1 <- with(data, LF.1st.round * (10/Mass.g.of.LF.1st.round)) | |
data$LF2 <- with(data, LF.2nd.round * (10/Mass.g.of.LF.2nd.round)) | |
data$LF3 <- with(data, LF.3rd..round * (10/Mass.g.of.LF.3rd.round)) | |
# reshape for plotting | |
require(reshape2) # if you get an error here, see these | |
# http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# http://www.statmethods.net/interface/packages.html | |
# http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
data_melt <- melt(data, id.vars = "Sample.ID", measure.vars = c("LF1", "LF2", "LF3")) | |
# plot all measurements | |
require(ggplot2) # if you get an error here, see these | |
# http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# http://www.statmethods.net/interface/packages.html | |
# http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
ggplot(data_melt, aes(Sample.ID, value, colour = variable)) + | |
geom_point() | |
# compute averages for the three replicates | |
data_means <- aggregate(. ~ Sample.ID, data = data_melt, mean) | |
# plot mean value per sample | |
require(ggplot2) | |
ggplot(data_means, aes(Sample.ID, value)) + | |
geom_point() + | |
stat_smooth(se = T, span = 0.15) | |
# change point detection | |
require(changepoint) # if you get an error here, see these | |
# http://www.dummies.com/how-to/content/how-to-install-load-and-unload-packages-in-r.html | |
# http://www.statmethods.net/interface/packages.html | |
# http://www.youtube.com/watch?v=u1r5XTqrCTQ | |
data_change <- cpt.mean(data_means$value, method='BinSeg') | |
# data_change <- cpt.meanvar(data_means$value,test.stat='Normal',method='PELT', penalty="BIC") | |
# plot results | |
dev.off() | |
plot(data_change, type='l', cpt.col='red', xlab='Depth below surface (m)', | |
ylab='Average MS', cpt.width=2, main="Changepoints in Magnetic Susceptibility values", ,xaxt='n') | |
axis(1, at = round(seq(0, nrow(data_means), length.out = 10),2), | |
labels = round(seq(0, max(data_means$Sample.ID)-1, length.out = 10), 2)) | |
# what are the depts of the change points? Remember that 'Sample.ID' is | |
# the actual depth below the surface plus 1 m. | |
data_means[slot(data_change, "cpts"),] | |
# Now look at frequency dependancy, first look at HF data... | |
data$HF1 <- with(data, HF.1st.round * (10/Mass.g.of.HF.1st.round)) | |
data$HF2 <- with(data, HF.2nd.round * (10/Mass.g.of.HF.2nd.round)) | |
data$HF3 <- with(data, HF.3rd..round * (10/Mass.g.of.HF.3rd.round)) | |
# reshape for plotting | |
require(reshape2) | |
data_melt1 <- melt(data, id.vars = "Sample.ID", measure.vars = c("HF1", "HF2", "HF3")) | |
# plot all measurements | |
require(ggplot2) | |
ggplot(data_melt1, aes(Sample.ID, value, colour = variable)) + | |
geom_point() | |
# compute averages for the three replicates | |
data_means1 <- aggregate(. ~ Sample.ID, data = data_melt1, mean) | |
# plot mean value per sample | |
require(ggplot2) | |
ggplot(data_means1, aes(Sample.ID, value)) + | |
geom_point() + | |
stat_smooth(se = T, span = 0.15) | |
# compute mass specific dual frequency dependent susceptibility | |
data$LF_mean <- rowMeans( data[ , c("LF1", "LF2", "LF3")] ) | |
data$HF_mean <- rowMeans( data[ , c("HF1", "HF2", "HF3")] ) | |
data$FD <- with(data, (100 * ((LF_mean - HF_mean) / LF_mean))) | |
data$msFD <- with(data, (LF_mean - HF_mean)/ 10/16) # approx mean sample mass | |
ggplot(data, aes(Sample.ID, msFD)) + | |
geom_point() + | |
stat_smooth(se = T, span = 0.15) + | |
ylab("mass specific Frequency dependency") | |
# plot FD and LF | |
ggplot(data, aes(LF_mean, msFD)) + | |
geom_point() + | |
ylab("Frequency dependency") + | |
xlab("Magnetic susceptibility") + | |
theme_minimal(base_size = 18) | |
############################################################################################### | |
############## End of working with Magnetic susceptibility data ############################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment