benmarwick · December 28, 2011 00:16
diff --git a/grainAnalysis.R b/grainAnalysis.R
 # Load Data
 grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )

 # Calculate Derived Sample Values
 grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
 totalWeight <- sum( grainData[['Sample Weight']] )
 grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
 grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
 grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]

 # Create Grain Diameter vs. Percent Finer plot

 library(ggplot2)

 # qplot() is the basic plotting function in ggplot2.  We pass the name
 # of the X variable and then the name of the Y variable-- since we are
 # also passing the grainData data frame, the X any Y values are interpreted
 # to be the names of columns in grainData.  Otherwise the environment would
 # be searched for variables matching X and Y. In this case, the names must 
 # be enclosed by backtick quotes-- " ` " as the column names contain spaces.

 diamVFiner <- qplot( `Grain Diameter`, `Percent Finer`, data = grainData,
  xlab = 'Grain Diameter (mm)', ylab = 'Cumulative Weight Passed (%)' ) 

 # qplot() returns an "object" which we have stored as diamVFiner. The real
 # power of ggplot2 now comes into play as we wish to modify the plot by
 # adding things such as logarithmic and percentage scales.  This is as
 # simple as adding objects together:

 # By default the object produced by qplot() contains a scatterplot. Lines
 # connecting the points may be added using a geom_line() object:

 diamVFiner <- diamVFiner + geom_line()

 # A scale object may be added to affect the way in which plot axes are
 # scaled:

 diamVFiner <- diamVFiner + scale_x_log10() + scale_y_continuous(formatter = "percent")

 # The final plot is output to the current plotting device by running it through the
 # print() function:

 dev.new()
 print( diamVFiner )

 # Notice that ggplot automatically discards data points where one of the 
 # coordinates is "NA"-- such as the wash weight (the material that passed
 # all the sieves).  The wash really doesn't have a well-defined diameter
 # so NA is an appropriate value for it.

 # If you don't like the grey background, try adding theme_bw():
 dev.new()
 print( diamVFiner + theme_bw() )


 # Add some interpolated data, such as the location of d10, d50 and d90:


 diamStats <- with( grainData, approx( `Percent Finer`, log10(`Grain Diameter`),
  xout = c( 0.1, 0.5, 0.9 ), method = 'linear' ))
 diamStats <- as.data.frame( diamStats )
 names( diamStats ) <- c( 'Percent Finer', 'Grain Diameter' )
 diamStats[['label']] <- c( 'd[10]', 'd[50]', 'd[90]' )

 # The transformation to log10() and then reverse transformation using
 # 10 ^ is done to correct for the effects of round-off error which
 # would cause the interpolated points to no longer lie exactly on the 
 # lines connecting the data after ggplot applies scale_x_log10()
 diamStats[['Grain Diameter']] <- 10 ^ diamStats[['Grain Diameter']]

 diamVFiner <- diamVFiner + geom_point( data = diamStats, color = 'red' ) +
  geom_text( aes( x = 0.95 * `Grain Diameter`,
    y = 1.005 * `Percent Finer`,
    label = label ), data = diamStats,
    hjust = 1, vjust = 0, parse = TRUE ) +
  theme_bw()

 dev.new()
 print( diamVFiner )

 # For the Cumulative Probability Curve, we plot Cumulative Percent
 # versus phi diameter and use a probability scale for the Y axis:
 #
 # Note:
 # scale_y_probit basically runs all the data values through the
 # quantile function for the normal distribution, qnorm(), and 
 # uses the transformed values as plotting coordinates while 
 # retaining the untransformed values for use as labels. 
 # Therefore it is equivalent to:
 #
 #  qplot( `Phi Diameter`, qnorm( grainData[['Cumulative Percent']] ),
 #    data = grainData )
 #
 # Except the above won't have nice percentage-based labels.

 cumProbPlot <- qplot( `Phi Diameter`, `Cumulative Percent`, data = grainData,
  xlab = expression(paste("Phi Diameter  ", -log[2](d),)),
  ylab = 'Cumulative Weight Retained (%)' ) +
  geom_line() +
  scale_y_probit( formatter = 'percent' ) +
  theme_bw()

 # Add interpolated data, such as the location of phi84, phi75, ect.

 phiDiams <- with( grainData, approx( qnorm(`Cumulative Percent`), `Phi Diameter`,
  xout = qnorm(c( 0.05, 0.16, 0.25, 0.50, 0.75, 0.84, 0.95 )), method = 'linear' ))
 phiDiams <- as.data.frame( phiDiams )
 names( phiDiams ) <- c( 'Cumulative Percent', 'Phi Diameter' )
 phiDiams[['label']] <- paste( 'phi[', c(5,16,25,50,75,84,95), ']', sep = '' )

 # Again the qnorm() pnorm() business is to eliminate round-off
 # error so that the points line up with the lines on the graph.
 #
 # Question: Which method actually contains the "error":
 #
 #   - Interpolating Phi Diameter from directly from Cumulative Percent.
 #
 #   - Interpolating Phi Diameter from qnorm( Cumulative Percent )--
 #     what you would be doing if you plotted the points on
 #     probability paper and tried to read the diameters off.

 phiDiams[['Cumulative Percent']] <- pnorm( phiDiams[['Cumulative Percent']] )

 cumProbPlot <- cumProbPlot +
  geom_point( data = phiDiams, color = 'red' ) +
  geom_text( aes(label = label ), data = phiDiams,
    hjust = 1, vjust = 0, parse = TRUE )

 dev.new()
 print( cumProbPlot )
diff --git a/grainSize.csv b/grainSize.csv
	# Load Data
	grainData <- read.csv('grainSize.csv', check.names=F, na.strings='--' )

	# Calculate Derived Sample Values
	grainData[['Phi Diameter']] <- -log2( grainData[['Grain Diameter']] )
	totalWeight <- sum( grainData[['Sample Weight']] )
	grainData[["Percent Retained"]] <- grainData[['Sample Weight']] / totalWeight
	grainData[["Cumulative Percent"]] <- cumsum( grainData[["Percent Retained"]] )
	grainData[['Percent Finer']] <- 1 - grainData[['Cumulative Percent']]

	# Create Grain Diameter vs. Percent Finer plot

	library(ggplot2)

	# qplot() is the basic plotting function in ggplot2. We pass the name
	# of the X variable and then the name of the Y variable-- since we are
	# also passing the grainData data frame, the X any Y values are interpreted
	# to be the names of columns in grainData. Otherwise the environment would
	# be searched for variables matching X and Y. In this case, the names must
	# be enclosed by backtick quotes-- " ` " as the column names contain spaces.

	diamVFiner <- qplot( `Grain Diameter`, `Percent Finer`, data = grainData,
	xlab = 'Grain Diameter (mm)', ylab = 'Cumulative Weight Passed (%)' )

	# qplot() returns an "object" which we have stored as diamVFiner. The real
	# power of ggplot2 now comes into play as we wish to modify the plot by
	# adding things such as logarithmic and percentage scales. This is as
	# simple as adding objects together:

	# By default the object produced by qplot() contains a scatterplot. Lines
	# connecting the points may be added using a geom_line() object:

	diamVFiner <- diamVFiner + geom_line()

	# A scale object may be added to affect the way in which plot axes are
	# scaled:

	diamVFiner <- diamVFiner + scale_x_log10() + scale_y_continuous(formatter = "percent")

	# The final plot is output to the current plotting device by running it through the
	# print() function:

	dev.new()
	print( diamVFiner )

	# Notice that ggplot automatically discards data points where one of the
	# coordinates is "NA"-- such as the wash weight (the material that passed
	# all the sieves). The wash really doesn't have a well-defined diameter
	# so NA is an appropriate value for it.

	# If you don't like the grey background, try adding theme_bw():
	dev.new()
	print( diamVFiner + theme_bw() )


	# Add some interpolated data, such as the location of d10, d50 and d90:


	diamStats <- with( grainData, approx( `Percent Finer`, log10(`Grain Diameter`),
	xout = c( 0.1, 0.5, 0.9 ), method = 'linear' ))
	diamStats <- as.data.frame( diamStats )
	names( diamStats ) <- c( 'Percent Finer', 'Grain Diameter' )
	diamStats[['label']] <- c( 'd[10]', 'd[50]', 'd[90]' )

	# The transformation to log10() and then reverse transformation using
	# 10 ^ is done to correct for the effects of round-off error which
	# would cause the interpolated points to no longer lie exactly on the
	# lines connecting the data after ggplot applies scale_x_log10()
	diamStats[['Grain Diameter']] <- 10 ^ diamStats[['Grain Diameter']]

	diamVFiner <- diamVFiner + geom_point( data = diamStats, color = 'red' ) +
	geom_text( aes( x = 0.95 * `Grain Diameter`,
	y = 1.005 * `Percent Finer`,
	label = label ), data = diamStats,
	hjust = 1, vjust = 0, parse = TRUE ) +
	theme_bw()

	dev.new()
	print( diamVFiner )

	# For the Cumulative Probability Curve, we plot Cumulative Percent
	# versus phi diameter and use a probability scale for the Y axis:
	#
	# Note:
	# scale_y_probit basically runs all the data values through the
	# quantile function for the normal distribution, qnorm(), and
	# uses the transformed values as plotting coordinates while
	# retaining the untransformed values for use as labels.
	# Therefore it is equivalent to:
	#
	# qplot( `Phi Diameter`, qnorm( grainData[['Cumulative Percent']] ),
	# data = grainData )
	#
	# Except the above won't have nice percentage-based labels.

	cumProbPlot <- qplot( `Phi Diameter`, `Cumulative Percent`, data = grainData,
	xlab = expression(paste("Phi Diameter ", -log[2](d),)),
	ylab = 'Cumulative Weight Retained (%)' ) +
	geom_line() +
	scale_y_probit( formatter = 'percent' ) +
	theme_bw()

	# Add interpolated data, such as the location of phi84, phi75, ect.

	phiDiams <- with( grainData, approx( qnorm(`Cumulative Percent`), `Phi Diameter`,
	xout = qnorm(c( 0.05, 0.16, 0.25, 0.50, 0.75, 0.84, 0.95 )), method = 'linear' ))
	phiDiams <- as.data.frame( phiDiams )
	names( phiDiams ) <- c( 'Cumulative Percent', 'Phi Diameter' )
	phiDiams[['label']] <- paste( 'phi[', c(5,16,25,50,75,84,95), ']', sep = '' )

	# Again the qnorm() pnorm() business is to eliminate round-off
	# error so that the points line up with the lines on the graph.
	#
	# Question: Which method actually contains the "error":
	#
	# - Interpolating Phi Diameter from directly from Cumulative Percent.
	#
	# - Interpolating Phi Diameter from qnorm( Cumulative Percent )--
	# what you would be doing if you plotted the points on
	# probability paper and tried to read the diameters off.

	phiDiams[['Cumulative Percent']] <- pnorm( phiDiams[['Cumulative Percent']] )

	cumProbPlot <- cumProbPlot +
	geom_point( data = phiDiams, color = 'red' ) +
	geom_text( aes(label = label ), data = phiDiams,
	hjust = 1, vjust = 0, parse = TRUE )

	dev.new()
	print( cumProbPlot )
Sieve Number	Grain Diameter	Sample Weight
5/16	8.000	24
3 1/2	5.600	3
5	4.000	19
7	2.800	19.6
10	2	27.2
18	1.000	84.7
35	0.500	90.1
45	0.355	115.5
60	0.250	93.5
wash	--	19.3