BioSciEconomist · November 6, 2016 17:22
diff --git a/bubble crops.r b/bubble crops.r
 #   -------------------------------------------------------------
 #  | PROGRAM NAME: Bubble_Crops
 #  | DATE: 12-4-2010
 #  | CREATED BY: Matt Bogard
 #  | PROJECT FILE: http://econometricsense.blogspot.com/2010/12/visualizing-agricultural-subsidies-by.html           
 #  |-------------------------------------------------------------
 #  | PURPOSE: Intitially to create bubble charts to demonstrate  
 #  |  allocation             
 #  | of farm subsides to producers by KY county, but expanded to 
 #  | include
 #  | numerous other visualization tools available using R
 #  |
 #  |-------------------------------------------------------------
 #  | COMMENTS:              
 #  |
 #  |     1: Final data set for analysis is read in in the section
 #  |     labeled 'basic statistical analysis'
 #  | 2: In analysis section data for acres planted and subsidies
 #  | are rescaled for
 #  |     maps generated later in the program
 #  |     3: Code for bubble charts adapted from:
 #  | http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/
 #  |     4: Code for spatial analysis and mapping adapted from 
 #  |  Harvard Applied Spatial Statistics workshop at:
 #  |        
 #  |http://www.people.fas.harvard.edu/~zhukov/spatial.html
 #  |     5:
 #  |
 #  |
 #  |-------------------------------------------------------------
 #  | DATA USED:               
 #  |     
 #  |      1) various data sets downloaded from USDA NASS
 #  |http://www.nass.usda.gov/#  |
 #  |          2) data copied and pasted and formated in excel from EWG 
 #  |subsidy data base: http://www.nass.usda.gov/
 #  |      3) spatial data frame data used in the Harvard Applied 
 #  |Spatial Statistics workshop at:
 #  |       
 #  | http://www.people.fas.harvard.edu/~zhukov/spatial.html
 #  |      4) final data set located in project file:
 #  |crops_and_subsidies.csv
 #  |
 #  |-------------------------------------------------------------
 #  | CONTENTS:              
 #  |
 #  |          PART 1: get data    
 #  |          PART 2: merge data sets
 #  |          PART 3: recode and aggregate data sets
 #  |          PART 4: basic statisticl analysis
 #  |          PART 5: densisty plots
 #  |          PART 6: bubble charts
 #  |    PART 7: spatial analysis- Maps
 #  |    
 #  |
 #  |-------------------------------------------------------------
 #  | UPDATES:              
 #  |
 #  |
 #  |
 #   -------------------------------------------------------------


 setwd('/Users/wkuuser/Desktop/R Data Sets')
 rm(list=ls()) # get rid of any existing data
     ls() # view open data sets
    
 #  *-------------------------------------------------------------
 #  |
 #  |
 #  |
 #  | get data
 #  |
 #  |
 #  |
 #  *-------------------------------------------------------------    
 #  *-------------------------------------------------------------
 #  | get farm # data
 #  *-------------------------------------------------------------

 farms <- read.csv("KyFarms.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
 names(farms)

 # get only the most recentl available year (2007)
 farms07 <- farms[farms$Year =="2007",]
 dim(farms07) # n=120
 print(farms07)
 names(farms07)

 # keep only variables you want, convert 'Value' to numeric 'Operations' and rename county code for common merge key
 farms07 <-farms07[c("Value","County.Code")]
 farms07<-transform(farms07, Operations=as.numeric(paste(farms07$Value)))
 farms07 <- rename(farms07, c(County.Code="CoFips"))
 dim(farms07)
 names(farms07)
 farms07



 #  *-------------------------------------------------------------
 #  | get corn data
 #  *-------------------------------------------------------------

 corn <- read.csv("CR08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
 names(corn)

 # subset- get only KY counties data

 kycorn <- corn[corn$State =="Kentucky",]
 dim(kycorn) # n=90
 print(kycorn$County)
 names(kycorn)

 # keep and rename only relevant varaibles
 # library(reshape)

 kycorn <-kycorn[c("County","Yield","Harvested","CoFips","Planted.All.Purposes")]
 names(kycorn)
 kycorn <- rename(kycorn, c(Yield="cornYield",Harvested="cornHarvested",Planted.All.Purposes="cornPlanted"))
 names(kycorn)
 kycorn

 #  *-------------------------------------------------------------
 #  | get soybean data
 #  *-------------------------------------------------------------

 soybeans <- read.csv("SB08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
 names(soybeans)


 # subset- get only KY counties data

 kysoybeans <- soybeans[soybeans$State =="Kentucky",]
 dim(kysoybeans) # n=79
 print(kysoybeans$County)
 names(kysoybeans)

 # keep and rename only relevant varaibles

 library(reshape)

 kysoybeans <-kysoybeans[c("County","Yield","Harvested","Planted.All.Purposes")]
 names(kysoybeans)
 kysoybeans <- rename(kysoybeans, c(Yield="soybeanYield",Harvested="soybeanHarvested",Planted.All.Purposes="soybeanPlanted"))
 names(kysoybeans)
 kysoybeans

 #  *-------------------------------------------------------------
 #  | get wheat data
 #  *-------------------------------------------------------------

 wheat <- read.csv("AW08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
 names(wheat)

 # subset- get only KY counties data

 kywheat <- wheat[wheat$State =="Kentucky",]
 dim(kywheat) # n= 70
 print(kywheat$County)
 names(kywheat)

 # keep and rename only relevant varaibles
 # library(reshape)

 kywheat <-kywheat[c("County","Yield","Harvested","Planted.All.Purposes")]
 names(kywheat)
 kywheat <- rename(kywheat, c(Yield="wheatYield",Harvested="wheatHarvested",Planted.All.Purposes="wheatPlanted"))
 names(kywheat)

 #  *-------------------------------------------------------------
 #  | get EWG subsidy data
 #  *-------------------------------------------------------------

 kysubsidies <- read.csv("EWGKYSubsidies.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
 names(kysubsidies)
 kysubsidies


 #  *-------------------------------------------------------------
 #  |
 #  |
 #  |
 #  | merge data sets
 #  |
 #  |
 #  |
 #  *-------------------------------------------------------------

 # corn and soybeans left join
 # note this will keep all corn growing counties (n=90) and add info about soybeans
 # this will be the driver file for all subsequent merges, as a result, the staring base
 # data is corn growing counties, i.e. the end data set will be based only on counties that
 # grow corn in addition to other crops, but as a result counties that do not grow corn will be excluded
 # from the analysis

 corn_and_soybeans <- merge(kycorn,kysoybeans, by.kycorn=County,by.kysoybeans=County, all=FALSE, all.x=TRUE, all.y=FALSE)
 dim(corn_and_soybeans)
 names(corn_and_soybeans)
 corn_and_soybeans

 # all 3 crops- left join with wheat data
 allKyCrops <- merge(corn_and_soybeans,kywheat, by.corn_and_soybeans=County,by.kywheat=County, all=FALSE, all.x=TRUE, all.y=FALSE)
 dim(allKyCrops)
 names(allKyCrops)

 # left join with farm operations data on CoFips as key
 # since County won't match due to case differences
 farmAndCrops <- merge(allKyCrops,farms07, by.allKyCrops=CoFips,by.farms07=CoFips, all=FALSE, all.x=TRUE, all.y=FALSE)
 dim(farmAndCrops)
 names(farmAndCrops)
 farmAndCrops

 # crops and subsidies- left join with subsidy data
 KyCropsAndSubsidies <- merge(farmAndCrops,kysubsidies, by.farmAndCrops=County,by.kysubsidies=County, all=FALSE, all.x=TRUE, all.y=FALSE)
 dim(KyCropsAndSubsidies)
 names(KyCropsAndSubsidies)

 # quick report
 KyCropsAndSubsidies[ c("County","cornPlanted", "soybeanPlanted","wheatPlanted","cornHarvested","soybeanHarvested","wheatHarvested","Operations","Subsidy")]



 #  *-------------------------------------------------------------
 #  |
 #  |
 #  |
 #  | recode and aggregate variables
 #  |
 #  |
 #  |
 #  *-------------------------------------------------------------

 # recode missing values-acres planted

 KyCropsAndSubsidies$SoybeanAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$soybeanPlanted) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresPlanted <- 0),(KyCropsAndSubsidies$SoybeanAcresPlanted <- KyCropsAndSubsidies$soybeanPlanted))

 KyCropsAndSubsidies[ c("County","SoybeanAcresPlanted")]

 KyCropsAndSubsidies$WheatAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$wheatPlanted) == 'TRUE', (KyCropsAndSubsidies$WheatAcresPlanted <- 0),(KyCropsAndSubsidies$WheatAcresPlanted <- KyCropsAndSubsidies$wheatPlanted))

 KyCropsAndSubsidies[ c("County","WheatAcresPlanted")]

 # recode missing values - acres harvested

 KyCropsAndSubsidies$SoybeanAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$soybeanHarvested) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresHarvested <- 0),(KyCropsAndSubsidies$SoybeanAcresHarvested <- KyCropsAndSubsidies$soybeanHarvested))

 KyCropsAndSubsidies[ c("County","SoybeanAcresHarvested")]

 KyCropsAndSubsidies$WheatAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$wheatHarvested) == 'TRUE', (KyCropsAndSubsidies$WheatAcresHarvested <- 0),(KyCropsAndSubsidies$WheatAcresHarvested <- KyCropsAndSubsidies$wheatHarvested))

 KyCropsAndSubsidies[ c("County","WheatAcresHarvested")]

 # aggregations

 KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, AcresPlanted = cornPlanted + SoybeanAcresPlanted + WheatAcresPlanted,
                                             AcresHarvested = cornHarvested + SoybeanAcresHarvested + WheatAcresHarvested)

 KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, LogAcres =log(AcresPlanted))
	# -------------------------------------------------------------
	# \| PROGRAM NAME: Bubble_Crops
	# \| DATE: 12-4-2010
	# \| CREATED BY: Matt Bogard
	# \| PROJECT FILE: http://econometricsense.blogspot.com/2010/12/visualizing-agricultural-subsidies-by.html
	# \|-------------------------------------------------------------
	# \| PURPOSE: Intitially to create bubble charts to demonstrate
	# \| allocation
	# \| of farm subsides to producers by KY county, but expanded to
	# \| include
	# \| numerous other visualization tools available using R
	# \|
	# \|-------------------------------------------------------------
	# \| COMMENTS:
	# \|
	# \| 1: Final data set for analysis is read in in the section
	# \| labeled 'basic statistical analysis'
	# \| 2: In analysis section data for acres planted and subsidies
	# \| are rescaled for
	# \| maps generated later in the program
	# \| 3: Code for bubble charts adapted from:
	# \| http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/
	# \| 4: Code for spatial analysis and mapping adapted from
	# \| Harvard Applied Spatial Statistics workshop at:
	# \|
	# \|http://www.people.fas.harvard.edu/~zhukov/spatial.html
	# \| 5:
	# \|
	# \|
	# \|-------------------------------------------------------------
	# \| DATA USED:
	# \|
	# \| 1) various data sets downloaded from USDA NASS
	# \|http://www.nass.usda.gov/# \|
	# \| 2) data copied and pasted and formated in excel from EWG
	# \|subsidy data base: http://www.nass.usda.gov/
	# \| 3) spatial data frame data used in the Harvard Applied
	# \|Spatial Statistics workshop at:
	# \|
	# \| http://www.people.fas.harvard.edu/~zhukov/spatial.html
	# \| 4) final data set located in project file:
	# \|crops_and_subsidies.csv
	# \|
	# \|-------------------------------------------------------------
	# \| CONTENTS:
	# \|
	# \| PART 1: get data
	# \| PART 2: merge data sets
	# \| PART 3: recode and aggregate data sets
	# \| PART 4: basic statisticl analysis
	# \| PART 5: densisty plots
	# \| PART 6: bubble charts
	# \| PART 7: spatial analysis- Maps
	# \|
	# \|
	# \|-------------------------------------------------------------
	# \| UPDATES:
	# \|
	# \|
	# \|
	# -------------------------------------------------------------


	setwd('/Users/wkuuser/Desktop/R Data Sets')
	rm(list=ls()) # get rid of any existing data
	ls() # view open data sets

	# *-------------------------------------------------------------
	# \|
	# \|
	# \|
	# \| get data
	# \|
	# \|
	# \|
	# *-------------------------------------------------------------
	# *-------------------------------------------------------------
	# \| get farm # data
	# *-------------------------------------------------------------

	farms <- read.csv("KyFarms.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
	names(farms)

	# get only the most recentl available year (2007)
	farms07 <- farms[farms$Year =="2007",]
	dim(farms07) # n=120
	print(farms07)
	names(farms07)

	# keep only variables you want, convert 'Value' to numeric 'Operations' and rename county code for common merge key
	farms07 <-farms07[c("Value","County.Code")]
	farms07<-transform(farms07, Operations=as.numeric(paste(farms07$Value)))
	farms07 <- rename(farms07, c(County.Code="CoFips"))
	dim(farms07)
	names(farms07)
	farms07



	# *-------------------------------------------------------------
	# \| get corn data
	# *-------------------------------------------------------------

	corn <- read.csv("CR08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
	names(corn)

	# subset- get only KY counties data

	kycorn <- corn[corn$State =="Kentucky",]
	dim(kycorn) # n=90
	print(kycorn$County)
	names(kycorn)

	# keep and rename only relevant varaibles
	# library(reshape)

	kycorn <-kycorn[c("County","Yield","Harvested","CoFips","Planted.All.Purposes")]
	names(kycorn)
	kycorn <- rename(kycorn, c(Yield="cornYield",Harvested="cornHarvested",Planted.All.Purposes="cornPlanted"))
	names(kycorn)
	kycorn

	# *-------------------------------------------------------------
	# \| get soybean data
	# *-------------------------------------------------------------

	soybeans <- read.csv("SB08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
	names(soybeans)


	# subset- get only KY counties data

	kysoybeans <- soybeans[soybeans$State =="Kentucky",]
	dim(kysoybeans) # n=79
	print(kysoybeans$County)
	names(kysoybeans)

	# keep and rename only relevant varaibles

	library(reshape)

	kysoybeans <-kysoybeans[c("County","Yield","Harvested","Planted.All.Purposes")]
	names(kysoybeans)
	kysoybeans <- rename(kysoybeans, c(Yield="soybeanYield",Harvested="soybeanHarvested",Planted.All.Purposes="soybeanPlanted"))
	names(kysoybeans)
	kysoybeans

	# *-------------------------------------------------------------
	# \| get wheat data
	# *-------------------------------------------------------------

	wheat <- read.csv("AW08.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
	names(wheat)

	# subset- get only KY counties data

	kywheat <- wheat[wheat$State =="Kentucky",]
	dim(kywheat) # n= 70
	print(kywheat$County)
	names(kywheat)

	# keep and rename only relevant varaibles
	# library(reshape)

	kywheat <-kywheat[c("County","Yield","Harvested","Planted.All.Purposes")]
	names(kywheat)
	kywheat <- rename(kywheat, c(Yield="wheatYield",Harvested="wheatHarvested",Planted.All.Purposes="wheatPlanted"))
	names(kywheat)

	# *-------------------------------------------------------------
	# \| get EWG subsidy data
	# *-------------------------------------------------------------

	kysubsidies <- read.csv("EWGKYSubsidies.csv", na.strings=c(".", "NA", "", "?"), encoding="UTF-8")
	names(kysubsidies)
	kysubsidies


	# *-------------------------------------------------------------
	# \|
	# \|
	# \|
	# \| merge data sets
	# \|
	# \|
	# \|
	# *-------------------------------------------------------------

	# corn and soybeans left join
	# note this will keep all corn growing counties (n=90) and add info about soybeans
	# this will be the driver file for all subsequent merges, as a result, the staring base
	# data is corn growing counties, i.e. the end data set will be based only on counties that
	# grow corn in addition to other crops, but as a result counties that do not grow corn will be excluded
	# from the analysis

	corn_and_soybeans <- merge(kycorn,kysoybeans, by.kycorn=County,by.kysoybeans=County, all=FALSE, all.x=TRUE, all.y=FALSE)
	dim(corn_and_soybeans)
	names(corn_and_soybeans)
	corn_and_soybeans

	# all 3 crops- left join with wheat data
	allKyCrops <- merge(corn_and_soybeans,kywheat, by.corn_and_soybeans=County,by.kywheat=County, all=FALSE, all.x=TRUE, all.y=FALSE)
	dim(allKyCrops)
	names(allKyCrops)

	# left join with farm operations data on CoFips as key
	# since County won't match due to case differences
	farmAndCrops <- merge(allKyCrops,farms07, by.allKyCrops=CoFips,by.farms07=CoFips, all=FALSE, all.x=TRUE, all.y=FALSE)
	dim(farmAndCrops)
	names(farmAndCrops)
	farmAndCrops

	# crops and subsidies- left join with subsidy data
	KyCropsAndSubsidies <- merge(farmAndCrops,kysubsidies, by.farmAndCrops=County,by.kysubsidies=County, all=FALSE, all.x=TRUE, all.y=FALSE)
	dim(KyCropsAndSubsidies)
	names(KyCropsAndSubsidies)

	# quick report
	KyCropsAndSubsidies[ c("County","cornPlanted", "soybeanPlanted","wheatPlanted","cornHarvested","soybeanHarvested","wheatHarvested","Operations","Subsidy")]



	# *-------------------------------------------------------------
	# \|
	# \|
	# \|
	# \| recode and aggregate variables
	# \|
	# \|
	# \|
	# *-------------------------------------------------------------

	# recode missing values-acres planted

	KyCropsAndSubsidies$SoybeanAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$soybeanPlanted) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresPlanted <- 0),(KyCropsAndSubsidies$SoybeanAcresPlanted <- KyCropsAndSubsidies$soybeanPlanted))

	KyCropsAndSubsidies[ c("County","SoybeanAcresPlanted")]

	KyCropsAndSubsidies$WheatAcresPlanted <- ifelse (is.na(KyCropsAndSubsidies$wheatPlanted) == 'TRUE', (KyCropsAndSubsidies$WheatAcresPlanted <- 0),(KyCropsAndSubsidies$WheatAcresPlanted <- KyCropsAndSubsidies$wheatPlanted))

	KyCropsAndSubsidies[ c("County","WheatAcresPlanted")]

	# recode missing values - acres harvested

	KyCropsAndSubsidies$SoybeanAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$soybeanHarvested) == 'TRUE', (KyCropsAndSubsidies$SoybeanAcresHarvested <- 0),(KyCropsAndSubsidies$SoybeanAcresHarvested <- KyCropsAndSubsidies$soybeanHarvested))

	KyCropsAndSubsidies[ c("County","SoybeanAcresHarvested")]

	KyCropsAndSubsidies$WheatAcresHarvested <- ifelse (is.na(KyCropsAndSubsidies$wheatHarvested) == 'TRUE', (KyCropsAndSubsidies$WheatAcresHarvested <- 0),(KyCropsAndSubsidies$WheatAcresHarvested <- KyCropsAndSubsidies$wheatHarvested))

	KyCropsAndSubsidies[ c("County","WheatAcresHarvested")]

	# aggregations

	KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, AcresPlanted = cornPlanted + SoybeanAcresPlanted + WheatAcresPlanted,
	AcresHarvested = cornHarvested + SoybeanAcresHarvested + WheatAcresHarvested)

	KyCropsAndSubsidies <- transform(KyCropsAndSubsidies, LogAcres =log(AcresPlanted))