Skip to content

Instantly share code, notes, and snippets.

@jlisic
Last active October 28, 2015 15:27
Show Gist options
  • Save jlisic/a3b11359a2c052c485aa to your computer and use it in GitHub Desktop.
Save jlisic/a3b11359a2c052c485aa to your computer and use it in GitHub Desktop.
A tool to get CDL state level accuracies from R.
#Copyright (c) 2015, Jonathan Lisic
#All rights reserved.
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of the FreeBSD Project.
#
# this is a quick function to get cdl accuracy information
cdlAccuracy <- function( state, year ) {
require(RCurl)
require(XML)
html <- getURL(
sprintf("http://www.nass.usda.gov/Research_and_Science/Cropland/metadata/metadata_%s%02d.htm",state,year %% 100)
)
htmlParsed <- htmlParse( html, asText=TRUE)
getTable <- xpathSApply( htmlParsed, "//pre", xmlValue)[2]
x <- strsplit( getTable, "\\r\\n")[[1]]
# init
accuracyTable <- c()
#handling for 2010 and later
if (year >= 2007) {
for( i in 13:300 ) {
if( nchar( x[i] ) == 0 ) break
parseString <- substr(x[i],25,10000)
parseString <- strsplit( gsub("%","",parseString), "[ ]+" )[[1]]
cropNumber <- parseString[2]
if( length( grep( "n/a", cropNumber ) ) == 0 ) {
cropNumber <- as.numeric( cropNumber)
} else {
cropNumber <- NA
}
producerAccuracy <- parseString[4]
if( length( grep( "n/a", producerAccuracy ) ) == 0 ) {
producerAccuracy <- as.numeric( producerAccuracy)
} else {
producerAccuracy <- NA
}
Kappa <- parseString[6]
if( length( grep( "n/a", Kappa ) ) == 0 ) {
Kappa <- as.numeric( Kappa)
} else {
Kappa <- NA
}
userAccuracy <- parseString[7]
if( length( grep( "n/a", userAccuracy ) ) == 0 ) {
userAccuracy <- as.numeric( userAccuracy)
} else {
userAccuracy <- NA
}
accuracyTable <- rbind( accuracyTable, c( cropNumber, producerAccuracy, Kappa, userAccuracy) )
}
accuracyTable <- cbind( accuracyTable, 99 )
} else {
for( i in 1:length(x) ) {
if( length(grep("ANALYSIS DISTRICT AD",x[i])) > 0) {
district <- as.numeric(substr(x[i], 21, 23))
next
}
# 2005 - 2006
if( year %in% c(2005,2006) ) {
lineStart <- substr(x[i],1,3)
if( length( grep( "[0-9]", lineStart ) ) > 0 ) {
parseString <- substr(x[i],32,10000)
parseString <- strsplit( gsub("%","",parseString), "[ ]+" )[[1]]
cropNumber <- as.numeric(lineStart)
producerAccuracy <- as.numeric(parseString[3])
userAccuracy <- 100 - as.numeric(parseString[4])
Kappa <- as.numeric(parseString[5])
accuracyTable <- rbind( accuracyTable, c( cropNumber, producerAccuracy, Kappa, userAccuracy, district) )
}
# before 2005
} else {
lineStart <- substr(x[i],1,3)
if( length( grep( "[0-9]", lineStart ) ) > 0 ) {
parseString <- substr(x[i],32,10000)
parseString <- strsplit( gsub("%","",parseString), "[ ]+" )[[1]]
cropNumber <- as.numeric(lineStart)
producerAccuracy <- as.numeric(parseString[4])
userAccuracy <- 100 - as.numeric(parseString[5])
Kappa <- as.numeric(parseString[6])
accuracyTable <- rbind( accuracyTable, c( cropNumber, producerAccuracy, Kappa, userAccuracy, district) )
}
}
}
}
colnames(accuracyTable) <- c('crop', 'producer', 'kappa', 'user','district')
return(accuracyTable)
}
# example
#x <- cdlAccuracy("in", 2003)
#print(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment