Created
November 6, 2016 17:22
-
-
Save BioSciEconomist/0d3edf04338b7e326b1c3cbf525416b1 to your computer and use it in GitHub Desktop.
Visualizing Agricultural Subsidies
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------- | |
# | PROGRAM NAME: Bubble_Crops | |
# | DATE: 12-4-2010 | |
# | CREATED BY: Matt Bogard | |
# | PROJECT FILE: http://econometricsense.blogspot.com/2010/12/visualizing-agricultural-subsidies-by.html | |
# |------------------------------------------------------------- | |
# | PURPOSE: Intitially to create bubble charts to demonstrate | |
# | allocation | |
# | of farm subsides to producers by KY county, but expanded to | |
# | include | |
# | numerous other visualization tools available using R | |
# | | |
# |------------------------------------------------------------- | |
# | COMMENTS: | |
# | | |
# | 1: Final data set for analysis is read in in the section | |
# | labeled 'basic statistical analysis' | |
# | 2: In analysis section data for acres planted and subsidies | |
# | are rescaled for | |
# | maps generated later in the program | |
# | 3: Code for bubble charts adapted from: | |
# | http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/ | |
# | 4: Code for spatial analysis and mapping adapted from | |
# | Harvard Applied Spatial Statistics workshop at: | |
# | | |
# |http://www.people.fas.harvard.edu/~zhukov/spatial.html | |
# | 5: | |
# | | |
# | | |
# |------------------------------------------------------------- | |
# | DATA USED: | |
# | | |
# | 1) various data sets downloaded from USDA NASS | |
# |http://www.nass.usda.gov/# | | |
# | 2) data copied and pasted and formated in excel from EWG | |
# |subsidy data base: http://www.nass.usda.gov/ | |
# | 3) spatial data frame data used in the Harvard Applied | |
# |Spatial Statistics workshop at: | |
# | | |
# | http://www.people.fas.harvard.edu/~zhukov/spatial.html | |
# | 4) final data set located in project file: | |
# |crops_and_subsidies.csv | |
# | | |
# |------------------------------------------------------------- | |
# | CONTENTS: | |
# | | |
# | PART 1: get data | |
# | PART 2: merge data sets | |
# | PART 3: recode and aggregate data sets | |
# | PART 4: basic statisticl analysis | |
# | PART 5: densisty plots | |
# | PART 6: bubble charts | |
# | PART 7: spatial analysis- Maps | |
# | | |
# | | |
# |------------------------------------------------------------- | |
# | UPDATES: | |
# | | |
# | | |
# | | |
# ------------------------------------------------------------- | |
setwd('/Users/wkuuser/Desktop/R Data Sets') | |
rm(list=ls()) # get rid of any existing data | |
ls() # view open data sets | |
# *------------------------------------------------------------- | |
# | | |
# | | |
# | | |
# | get data | |
# | | |
# | | |
# | | |
# *------------------------------------------------------------- | |
# *------------------------------------------------------------- | |
# | get farm # data | |
# *------------------------------------------------------------- | |
farms <- read.csv("KyFarms.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8") | |
names(farms) | |
# get only the most recentl available year (2007) | |
farms07 <- farms[farms$Year =="2007",] | |
dim(farms07) # n=120 | |
print(farms07) | |
names(farms07) | |
# keep only variables you want, convert 'Value' to numeric 'Operations' and rename county code for common merge key | |
farms07 <-farms07[c("Value","County.Code")] | |
farms07<-transform(farms07, Operations=as.numeric(paste(farms07$Value))) | |
farms07 <- rename(farms07, c(County.Code="CoFips")) | |
dim(farms07) | |
names(farms07) | |
farms07 | |
# *------------------------------------------------------------- | |
# | get corn data | |
# *------------------------------------------------------------- | |
corn <- read.csv("CR08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8") | |
names(corn) | |
# subset- get only KY counties data | |
kycorn <- corn[corn$State =="Kentucky",] | |
dim(kycorn) # n=90 | |
print(kycorn$County) | |
names(kycorn) | |
# keep and rename only relevant varaibles | |
# library(reshape) | |
kycorn <-kycorn[c("County","Yield","Harvested","CoFips","Planted.All.Purposes")] | |
names(kycorn) | |
kycorn <- rename(kycorn, c(Yield="cornYield",Harvested="cornHarvested",Planted.All.Purposes="cornPlanted")) | |
names(kycorn) | |
kycorn | |
# *------------------------------------------------------------- | |
# | get soybean data | |
# *------------------------------------------------------------- | |
soybeans <- read.csv("SB08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8") | |
names(soybeans) | |
# subset- get only KY counties data | |
kysoybeans <- soybeans[soybeans$State =="Kentucky",] | |
dim(kysoybeans) # n=79 | |
print(kysoybeans$County) | |
names(kysoybeans) | |
# keep and rename only relevant varaibles | |
library(reshape) | |
kysoybeans <-kysoybeans[c("County","Yield","Harvested","Planted.All.Purposes")] | |
names(kysoybeans) | |
kysoybeans <- rename(kysoybeans, c(Yield="soybeanYield",Harvested="soybeanHarvested",Planted.All.Purposes="soybeanPlanted")) | |
names(kysoybeans) | |
kysoybeans | |
# *------------------------------------------------------------- | |
# | get wheat data | |
# *------------------------------------------------------------- | |
wheat <- read.csv("AW08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8") | |
names(wheat) | |
# subset- get only KY counties data | |
kywheat <- wheat[wheat$State =="Kentucky",] | |
dim(kywheat) # n= 70 | |
print(kywheat$County) | |
names(kywheat) | |
# keep and rename only relevant varaibles | |
# library(reshape) | |
kywheat <-kywheat[c("County","Yield","Harvested","Planted.All.Purposes")] | |
names(kywheat) | |
kywheat <- rename(kywheat, c(Yield="wheatYield",Harvested="wheatHarvested",Planted.All.Purposes="wheatPlanted")) | |
names(kywheat) | |
# *------------------------------------------------------------- | |
# | get EWG subsidy data | |
# *------------------------------------------------------------- | |
kysubsidies <- read.csv("EWGKYSubsidies.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8") | |
names(kysubsidies) | |
kysubsidies | |
# *------------------------------------------------------------- | |
# | | |
# | | |
# | | |
# | merge data sets | |
# | | |
# | | |
# | | |
# *------------------------------------------------------------- | |
# corn and soybeans left join | |
# note this will keep all corn growing counties (n=90) and add info about soybeans | |
# this will be the driver file for all subsequent merges, as a result, the staring base | |
# data is corn growing counties, i.e. the end data set will be based only on counties that | |
# grow corn in addition to other crops, but as a result counties that do not grow corn will be excluded | |
# from the analysis | |
corn_and_soybeans <- merge(kycorn,kysoybeans, by.kycorn=County,by.kysoybeans=County, all=FALSE, all.x=TRUE, all.y=FALSE) | |
dim(corn_and_soybeans) | |
names(corn_and_soybeans) | |
corn_and_soybeans | |
# all 3 crops- left join with wheat data | |
allKyCrops <- merge(corn_and_soybeans,kywheat, by.corn_and_soybeans=County,by.kywheat=County, all=FALSE, all.x=TRUE, all.y=FALSE) | |
dim(allKyCrops) | |
names(allKyCrops) | |
# left join with farm operations data on CoFips as key | |
# since County won't match due to case differences | |
farmAndCrops <- merge(allKyCrops,farms07, by.allKyCrops=CoFips,by.farms07=CoFips, all=FALSE, all.x=TRUE, all.y=FALSE) | |
dim(farmAndCrops) | |
names(farmAndCrops) | |
farmAndCrops | |
# crops and subsidies- left join with subsidy data | |
KyCropsAndSubsidies <- merge(farmAndCrops,kysubsidies, by.farmAndCrops=County,by.kysubsidies=County, all=FALSE, all.x=TRUE, all.y=FALSE) | |
dim(KyCropsAndSubsidies) | |
names(KyCropsAndSubsidies) | |
# quick report | |
KyCropsAndSubsidies[ c("County","cornPlanted", "soybeanPlanted","wheatPlanted","cornHarvested","soybeanHarvested","wheatHarvested","Operations","Subsidy")] | |
# *------------------------------------------------------------- | |
# | | |
# | | |
# | | |
# | recode and aggregate variables | |
# | | |
# | | |
# | | |
# *------------------------------------------------------------- | |
# recode missing values-acres planted | |
KyCropsAndSubsidies$SoybeanAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$soybeanPlanted) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresPlanted <- 0),(KyCropsAndSubsidies$SoybeanAcresPlanted <- KyCropsAndSubsidies$soybeanPlanted)) | |
KyCropsAndSubsidies[ c("County","SoybeanAcresPlanted")] | |
KyCropsAndSubsidies$WheatAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$wheatPlanted) == 'TRUE', (KyCropsAndSubsidies$WheatAcresPlanted <- 0),(KyCropsAndSubsidies$WheatAcresPlanted <- KyCropsAndSubsidies$wheatPlanted)) | |
KyCropsAndSubsidies[ c("County","WheatAcresPlanted")] | |
# recode missing values - acres harvested | |
KyCropsAndSubsidies$SoybeanAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$soybeanHarvested) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresHarvested <- 0),(KyCropsAndSubsidies$SoybeanAcresHarvested <- KyCropsAndSubsidies$soybeanHarvested)) | |
KyCropsAndSubsidies[ c("County","SoybeanAcresHarvested")] | |
KyCropsAndSubsidies$WheatAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$wheatHarvested) == 'TRUE', (KyCropsAndSubsidies$WheatAcresHarvested <- 0),(KyCropsAndSubsidies$WheatAcresHarvested <- KyCropsAndSubsidies$wheatHarvested)) | |
KyCropsAndSubsidies[ c("County","WheatAcresHarvested")] | |
# aggregations | |
KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, AcresPlanted = cornPlanted + SoybeanAcresPlanted + WheatAcresPlanted, | |
AcresHarvested = cornHarvested + SoybeanAcresHarvested + WheatAcresHarvested) | |
KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, LogAcres =log(AcresPlanted)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment