Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Created November 6, 2016 17:22
Show Gist options
  • Save BioSciEconomist/0d3edf04338b7e326b1c3cbf525416b1 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/0d3edf04338b7e326b1c3cbf525416b1 to your computer and use it in GitHub Desktop.
Visualizing Agricultural Subsidies
# -------------------------------------------------------------
# | PROGRAM NAME: Bubble_Crops
# | DATE: 12-4-2010
# | CREATED BY: Matt Bogard
# | PROJECT FILE: http://econometricsense.blogspot.com/2010/12/visualizing-agricultural-subsidies-by.html
# |-------------------------------------------------------------
# | PURPOSE: Intitially to create bubble charts to demonstrate
# | allocation
# | of farm subsides to producers by KY county, but expanded to
# | include
# | numerous other visualization tools available using R
# |
# |-------------------------------------------------------------
# | COMMENTS:
# |
# | 1: Final data set for analysis is read in in the section
# | labeled 'basic statistical analysis'
# | 2: In analysis section data for acres planted and subsidies
# | are rescaled for
# | maps generated later in the program
# | 3: Code for bubble charts adapted from:
# | http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/
# | 4: Code for spatial analysis and mapping adapted from
# | Harvard Applied Spatial Statistics workshop at:
# |
# |http://www.people.fas.harvard.edu/~zhukov/spatial.html
# | 5:
# |
# |
# |-------------------------------------------------------------
# | DATA USED:
# |
# | 1) various data sets downloaded from USDA NASS
# |http://www.nass.usda.gov/# |
# | 2) data copied and pasted and formated in excel from EWG
# |subsidy data base: http://www.nass.usda.gov/
# | 3) spatial data frame data used in the Harvard Applied
# |Spatial Statistics workshop at:
# |
# | http://www.people.fas.harvard.edu/~zhukov/spatial.html
# | 4) final data set located in project file:
# |crops_and_subsidies.csv
# |
# |-------------------------------------------------------------
# | CONTENTS:
# |
# | PART 1: get data
# | PART 2: merge data sets
# | PART 3: recode and aggregate data sets
# | PART 4: basic statisticl analysis
# | PART 5: densisty plots
# | PART 6: bubble charts
# | PART 7: spatial analysis- Maps
# |
# |
# |-------------------------------------------------------------
# | UPDATES:
# |
# |
# |
# -------------------------------------------------------------
setwd('/Users/wkuuser/Desktop/R Data Sets')
rm(list=ls()) # get rid of any existing data
ls() # view open data sets
# *-------------------------------------------------------------
# |
# |
# |
# | get data
# |
# |
# |
# *-------------------------------------------------------------
# *-------------------------------------------------------------
# | get farm # data
# *-------------------------------------------------------------
farms <- read.csv("KyFarms.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
names(farms)
# get only the most recentl available year (2007)
farms07 <- farms[farms$Year =="2007",]
dim(farms07) # n=120
print(farms07)
names(farms07)
# keep only variables you want, convert 'Value' to numeric 'Operations' and rename county code for common merge key
farms07 <-farms07[c("Value","County.Code")]
farms07<-transform(farms07, Operations=as.numeric(paste(farms07$Value)))
farms07 <- rename(farms07, c(County.Code="CoFips"))
dim(farms07)
names(farms07)
farms07
# *-------------------------------------------------------------
# | get corn data
# *-------------------------------------------------------------
corn <- read.csv("CR08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
names(corn)
# subset- get only KY counties data
kycorn <- corn[corn$State =="Kentucky",]
dim(kycorn) # n=90
print(kycorn$County)
names(kycorn)
# keep and rename only relevant varaibles
# library(reshape)
kycorn <-kycorn[c("County","Yield","Harvested","CoFips","Planted.All.Purposes")]
names(kycorn)
kycorn <- rename(kycorn, c(Yield="cornYield",Harvested="cornHarvested",Planted.All.Purposes="cornPlanted"))
names(kycorn)
kycorn
# *-------------------------------------------------------------
# | get soybean data
# *-------------------------------------------------------------
soybeans <- read.csv("SB08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
names(soybeans)
# subset- get only KY counties data
kysoybeans <- soybeans[soybeans$State =="Kentucky",]
dim(kysoybeans) # n=79
print(kysoybeans$County)
names(kysoybeans)
# keep and rename only relevant varaibles
library(reshape)
kysoybeans <-kysoybeans[c("County","Yield","Harvested","Planted.All.Purposes")]
names(kysoybeans)
kysoybeans <- rename(kysoybeans, c(Yield="soybeanYield",Harvested="soybeanHarvested",Planted.All.Purposes="soybeanPlanted"))
names(kysoybeans)
kysoybeans
# *-------------------------------------------------------------
# | get wheat data
# *-------------------------------------------------------------
wheat <- read.csv("AW08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
names(wheat)
# subset- get only KY counties data
kywheat <- wheat[wheat$State =="Kentucky",]
dim(kywheat) # n= 70
print(kywheat$County)
names(kywheat)
# keep and rename only relevant varaibles
# library(reshape)
kywheat <-kywheat[c("County","Yield","Harvested","Planted.All.Purposes")]
names(kywheat)
kywheat <- rename(kywheat, c(Yield="wheatYield",Harvested="wheatHarvested",Planted.All.Purposes="wheatPlanted"))
names(kywheat)
# *-------------------------------------------------------------
# | get EWG subsidy data
# *-------------------------------------------------------------
kysubsidies <- read.csv("EWGKYSubsidies.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
names(kysubsidies)
kysubsidies
# *-------------------------------------------------------------
# |
# |
# |
# | merge data sets
# |
# |
# |
# *-------------------------------------------------------------
# corn and soybeans left join
# note this will keep all corn growing counties (n=90) and add info about soybeans
# this will be the driver file for all subsequent merges, as a result, the staring base
# data is corn growing counties, i.e. the end data set will be based only on counties that
# grow corn in addition to other crops, but as a result counties that do not grow corn will be excluded
# from the analysis
corn_and_soybeans <- merge(kycorn,kysoybeans, by.kycorn=County,by.kysoybeans=County, all=FALSE, all.x=TRUE, all.y=FALSE)
dim(corn_and_soybeans)
names(corn_and_soybeans)
corn_and_soybeans
# all 3 crops- left join with wheat data
allKyCrops <- merge(corn_and_soybeans,kywheat, by.corn_and_soybeans=County,by.kywheat=County, all=FALSE, all.x=TRUE, all.y=FALSE)
dim(allKyCrops)
names(allKyCrops)
# left join with farm operations data on CoFips as key
# since County won't match due to case differences
farmAndCrops <- merge(allKyCrops,farms07, by.allKyCrops=CoFips,by.farms07=CoFips, all=FALSE, all.x=TRUE, all.y=FALSE)
dim(farmAndCrops)
names(farmAndCrops)
farmAndCrops
# crops and subsidies- left join with subsidy data
KyCropsAndSubsidies <- merge(farmAndCrops,kysubsidies, by.farmAndCrops=County,by.kysubsidies=County, all=FALSE, all.x=TRUE, all.y=FALSE)
dim(KyCropsAndSubsidies)
names(KyCropsAndSubsidies)
# quick report
KyCropsAndSubsidies[ c("County","cornPlanted", "soybeanPlanted","wheatPlanted","cornHarvested","soybeanHarvested","wheatHarvested","Operations","Subsidy")]
# *-------------------------------------------------------------
# |
# |
# |
# | recode and aggregate variables
# |
# |
# |
# *-------------------------------------------------------------
# recode missing values-acres planted
KyCropsAndSubsidies$SoybeanAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$soybeanPlanted) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresPlanted <- 0),(KyCropsAndSubsidies$SoybeanAcresPlanted <- KyCropsAndSubsidies$soybeanPlanted))
KyCropsAndSubsidies[ c("County","SoybeanAcresPlanted")]
KyCropsAndSubsidies$WheatAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$wheatPlanted) == 'TRUE', (KyCropsAndSubsidies$WheatAcresPlanted <- 0),(KyCropsAndSubsidies$WheatAcresPlanted <- KyCropsAndSubsidies$wheatPlanted))
KyCropsAndSubsidies[ c("County","WheatAcresPlanted")]
# recode missing values - acres harvested
KyCropsAndSubsidies$SoybeanAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$soybeanHarvested) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresHarvested <- 0),(KyCropsAndSubsidies$SoybeanAcresHarvested <- KyCropsAndSubsidies$soybeanHarvested))
KyCropsAndSubsidies[ c("County","SoybeanAcresHarvested")]
KyCropsAndSubsidies$WheatAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$wheatHarvested) == 'TRUE', (KyCropsAndSubsidies$WheatAcresHarvested <- 0),(KyCropsAndSubsidies$WheatAcresHarvested <- KyCropsAndSubsidies$wheatHarvested))
KyCropsAndSubsidies[ c("County","WheatAcresHarvested")]
# aggregations
KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, AcresPlanted = cornPlanted + SoybeanAcresPlanted + WheatAcresPlanted,
AcresHarvested = cornHarvested + SoybeanAcresHarvested + WheatAcresHarvested)
KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, LogAcres =log(AcresPlanted))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment