-
-
Save benmarwick/1525543 to your computer and use it in GitHub Desktop.
R code related to graphical analysis of a sieved soil sample.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load Data | |
grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' ) | |
# Calculate Derived Sample Values | |
grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] ) | |
totalWeight <- sum( grainData[['Sample Weight']] ) | |
grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight | |
grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] ) | |
grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']] | |
# Create Grain Diameter vs. Percent Finer plot | |
library(ggplot2) | |
# qplot() is the basic plotting function in ggplot2. We pass the name | |
# of the X variable and then the name of the Y variable-- since we are | |
# also passing the grainData data frame, the X any Y values are interpreted | |
# to be the names of columns in grainData. Otherwise the environment would | |
# be searched for variables matching X and Y. In this case, the names must | |
# be enclosed by backtick quotes-- " ` " as the column names contain spaces. | |
diamVFiner <- qplot( `Grain Diameter`, `Percent Finer`, data = grainData, | |
xlab = 'Grain Diameter (mm)', ylab = 'Cumulative Weight Passed (%)' ) | |
# qplot() returns an "object" which we have stored as diamVFiner. The real | |
# power of ggplot2 now comes into play as we wish to modify the plot by | |
# adding things such as logarithmic and percentage scales. This is as | |
# simple as adding objects together: | |
# By default the object produced by qplot() contains a scatterplot. Lines | |
# connecting the points may be added using a geom_line() object: | |
diamVFiner <- diamVFiner + geom_line() | |
# A scale object may be added to affect the way in which plot axes are | |
# scaled: | |
diamVFiner <- diamVFiner + scale_x_log10() + scale_y_continuous(formatter = "percent") | |
# The final plot is output to the current plotting device by running it through the | |
# print() function: | |
dev.new() | |
print( diamVFiner ) | |
# Notice that ggplot automatically discards data points where one of the | |
# coordinates is "NA"-- such as the wash weight (the material that passed | |
# all the sieves). The wash really doesn't have a well-defined diameter | |
# so NA is an appropriate value for it. | |
# If you don't like the grey background, try adding theme_bw(): | |
dev.new() | |
print( diamVFiner + theme_bw() ) | |
# Add some interpolated data, such as the location of d10, d50 and d90: | |
diamStats <- with( grainData, approx( `Percent Finer`, log10(`Grain Diameter`), | |
xout = c( 0.1, 0.5, 0.9 ), method = 'linear' )) | |
diamStats <- as.data.frame( diamStats ) | |
names( diamStats ) <- c( 'Percent Finer', 'Grain Diameter' ) | |
diamStats[['label']] <- c( 'd[10]', 'd[50]', 'd[90]' ) | |
# The transformation to log10() and then reverse transformation using | |
# 10 ^ is done to correct for the effects of round-off error which | |
# would cause the interpolated points to no longer lie exactly on the | |
# lines connecting the data after ggplot applies scale_x_log10() | |
diamStats[['Grain Diameter']] <- 10 ^ diamStats[['Grain Diameter']] | |
diamVFiner <- diamVFiner + geom_point( data = diamStats, color = 'red' ) + | |
geom_text( aes( x = 0.95 * `Grain Diameter`, | |
y = 1.005 * `Percent Finer`, | |
label = label ), data = diamStats, | |
hjust = 1, vjust = 0, parse = TRUE ) + | |
theme_bw() | |
dev.new() | |
print( diamVFiner ) | |
# For the Cumulative Probability Curve, we plot Cumulative Percent | |
# versus phi diameter and use a probability scale for the Y axis: | |
# | |
# Note: | |
# scale_y_probit basically runs all the data values through the | |
# quantile function for the normal distribution, qnorm(), and | |
# uses the transformed values as plotting coordinates while | |
# retaining the untransformed values for use as labels. | |
# Therefore it is equivalent to: | |
# | |
# qplot( `Phi Diameter`, qnorm( grainData[['Cumulative Percent']] ), | |
# data = grainData ) | |
# | |
# Except the above won't have nice percentage-based labels. | |
cumProbPlot <- qplot( `Phi Diameter`, `Cumulative Percent`, data = grainData, | |
xlab = expression(paste("Phi Diameter ", -log[2](d),)), | |
ylab = 'Cumulative Weight Retained (%)' ) + | |
geom_line() + | |
scale_y_probit( formatter = 'percent' ) + | |
theme_bw() | |
# Add interpolated data, such as the location of phi84, phi75, ect. | |
phiDiams <- with( grainData, approx( qnorm(`Cumulative Percent`), `Phi Diameter`, | |
xout = qnorm(c( 0.05, 0.16, 0.25, 0.50, 0.75, 0.84, 0.95 )), method = 'linear' )) | |
phiDiams <- as.data.frame( phiDiams ) | |
names( phiDiams ) <- c( 'Cumulative Percent', 'Phi Diameter' ) | |
phiDiams[['label']] <- paste( 'phi[', c(5,16,25,50,75,84,95), ']', sep = '' ) | |
# Again the qnorm() pnorm() business is to eliminate round-off | |
# error so that the points line up with the lines on the graph. | |
# | |
# Question: Which method actually contains the "error": | |
# | |
# - Interpolating Phi Diameter from directly from Cumulative Percent. | |
# | |
# - Interpolating Phi Diameter from qnorm( Cumulative Percent )-- | |
# what you would be doing if you plotted the points on | |
# probability paper and tried to read the diameters off. | |
phiDiams[['Cumulative Percent']] <- pnorm( phiDiams[['Cumulative Percent']] ) | |
cumProbPlot <- cumProbPlot + | |
geom_point( data = phiDiams, color = 'red' ) + | |
geom_text( aes(label = label ), data = phiDiams, | |
hjust = 1, vjust = 0, parse = TRUE ) | |
dev.new() | |
print( cumProbPlot ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sieve Number | Grain Diameter | Sample Weight | |
---|---|---|---|
5/16 | 8.000 | 24 | |
3 1/2 | 5.600 | 3 | |
5 | 4.000 | 19 | |
7 | 2.800 | 19.6 | |
10 | 2 | 27.2 | |
18 | 1.000 | 84.7 | |
35 | 0.500 | 90.1 | |
45 | 0.355 | 115.5 | |
60 | 0.250 | 93.5 | |
wash | -- | 19.3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment