Last active
September 20, 2017 12:12
-
-
Save MartinMacharia/95998154852b327f8d0de2daf78e95e8 to your computer and use it in GitHub Desktop.
Rwanda Impact
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data=read.csv(file.choose(), header=T) # Import data called data | |
View(data)# View data | |
duplicated(data$Dup) #Check for duplicates on column Dup, not a very efficient way for large dataset | |
data$Dup[duplicated(data1$Dup)] #Show duplicate entries, somewhat difficult to read results | |
unique(data1[duplicated(data1$Dup),])#Better way of showing unique repeat entries | |
#PSM with different matching criteria, select best based on visual inspection of common support graphics | |
setwd("C:/users/machariam/Desktop/Rwanda PW Impact report") | |
getwd() | |
RwandaData=read.csv("CleanV6.csv") | |
na.omit(RwandaData) | |
RwandaData=as.data.frame(na.omit(RwandaData)) #Omit missing values | |
sapply(RwandaData, function(x) sum(is.na(x)))#Inspect missing values | |
RwandaData=read.csv("CleanV5.csv", header=T, na.strings=c(""," ","NA")) #Insert NAs on missing values | |
na.omit(RwandaData) | |
length(HHID) | |
detach(RwandaData) | |
attach(RwandaData) | |
RwandaData[1:10,] | |
RwandaData[75:79,] | |
install.packages("MatchIt") | |
library(MatchIt) | |
names(RwandaData) | |
str(RwandaData) | |
#Nearest Neighbor | |
m.out = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "nearest", | |
ratio = 1) | |
summary(m.out) | |
plot(m.out, type = "jitter") | |
plot(m.out, type = "hist") | |
#Nearest Neighbor with caliper | |
m.out1 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "nearest", caliper=0.16) | |
plot(m.out1, type = "hist") | |
summary(m.out1) | |
plot(m.out1, type = "jitter") | |
#Exact Matching | |
m.out2 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "exact") | |
summary(m.out2) | |
plot(m.out2, type = "hist") | |
#Subclassification | |
m.out3 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "subclass", subclass=4) | |
summary(m.out3) | |
plot(m.out3, type = "hist") | |
#Optimal Matching | |
install.packages("optmatch") | |
library(optmatch) | |
m.out4 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "optimal", ratio=1) | |
#Coarsened Exact Matching | |
install.packages("cem") | |
library(cem) | |
m.out5 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "cem") | |
summary(m.out5) | |
plot(m.out5, type = "hist") | |
m.out6 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "genetic") | |
plot(m.out6, type = "hist") | |
m.out7 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU, | |
data = RwandaData, method = "nearest", | |
distance ="logit") | |
plot(m.out7, type = "hist") | |
m.out8 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio, | |
data = RwandaData, method = "nearest", caliper=0.16) | |
plot(m.out8,type = "hist") | |
summary(m.out8) | |
summary(m.out8, standardize = TRUE) | |
install.packages("cobalt") | |
library(cobalt) | |
bal.tab(m.out8) | |
m.data9 <- match.data(m.out8) | |
write.csv(m.data9, file = ("C:/users/machariam/Desktop/Rwanda PW Impact report/RwandaMatched.csv")) | |
#Merge data if PSM was run on a subset | |
RwandaPSMData=read.csv("RwandaMatched.csv") | |
length(RwandaPSMData$Count) | |
RwandaMaster=read.csv("ToMerge.csv") | |
length(RwandaMaster$HHID) | |
RwandaMerged <- merge(RwandaPSMData, RwandaMaster, by=c("HHID")) | |
write.csv(RwandaMerged, file = ("C:/users/machariam/Desktop/Rwanda PW Impact report/MergedRwanda.csv")) | |
#Ttest tables, non parametric tests etc | |
#Load data | |
dat=read.csv(file.choose(), header=T) | |
names(dat) | |
summary(dat$TotalCostPerHa) | |
#Boxplot without outliers | |
res=boxplot(dat$SeedCostPerHA ~dat$Use, outline=FALSE) #Boxplot without outliers | |
res$out | |
#Check values above 90/75 percentile and below 25/10 percentile, systematic way to handle outliers | |
Percentiles <- quantile(dat$HHSize, probs=c(0.25, 0.75)) | |
Percentiles | |
#Subset based on percentiles | |
cleanTC <- subset(dat, (HHSize <= 58) & (HHSize >= 35), select=c(HHSize, Use)) | |
summary(cleanTC$HHSize) | |
cleanTC | |
#Run ttest | |
t.test(cleanTC$Age ~ cleanTC$Use) | |
#Run a non parametric for the descrete and categorical variable | |
Percentiles <- quantile(dat$EducCode, probs=c(0.25, 0.75)) | |
Percentiles | |
cleanTC <- subset(dat, (EducCode<= 1.1) & (EducCode >= 0.9), select=c(EducCode, Use)) | |
summary(cleanTC$EducCode) | |
wilcox.test(cleanTC$EducCode~cleanTC$Use) | |
#lOGIT | |
#Probit regression. Probit analysis will produce results similar logistic regression. | |
#The choice of probit versus logit depends largely on individual preferences. | |
#Avoid important collinearity between variables as this will cause over-adjustement. | |
reg = glm(Y~X1+X2+X3,family=binomial,data=db) | |
summary(reg) | |
setwd("c:/Users/machariam/Desktop/Logit") | |
list.files() | |
Dat=read.csv("Dat.csv") | |
names(Dat) | |
str(Dat) | |
hist(LandPercapita) | |
hist(MaizeLand) | |
hist(PPI) | |
hist(Labour) | |
hist(InputRecieved) | |
hist(Yield) | |
hist(InputCosts) | |
hist(TotalShocks) | |
hist(OwnExperience) | |
hist(RadioTv) | |
hist(Extension) | |
hist(Plantclinics) | |
summary(LandPercapita) | |
summary(MaizeLand) | |
summary(Yield) | |
summary(InputCosts) | |
PYield <- quantile(Yield, probs=c(0.10, 0.9)) | |
PYield | |
PInputCost<-quantile(InputCosts, prob=c(0.10,0.9)) | |
PInputCost | |
class(Dat) | |
Dat.sub=subset(Dat,(Yield<= 4000) & (Yield >= 330) & (InputCosts<=207) & (InputCosts>=7)) | |
View(Dat.sub) | |
length(Dat.sub$HHID) | |
hist(Dat.sub$Yield) | |
hist(Dat.sub$InputCosts) | |
hist(Dat.sub$Labour) | |
str(Dat.sub) | |
cor(Dat.sub[sapply(Dat.sub, is.numeric)]) | |
cor(subset(Dat.sub, select = c(LandPercapita,MaizeLand,Yield, InputCosts,Labour))) | |
fCulturalPA=as.factor(Dat.sub$CulturalPA) | |
fGender=as.factor(GenderCode) | |
#Cultural prevention/ avoidance | |
reg = glm(fCulturalPA~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg) | |
reg1 = glm(fCulturalPA~LandPercapita+Yield+InputCosts+Gender+EducCode+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg1) | |
anova(reg1, test="Chisq") | |
exp(coef(reg1)) | |
exp(confint(reg1)) | |
#Cultural planting / cropping patterns | |
names(Dat.sub) | |
#lOGIT | |
#Probit regression. Probit analysis will produce results similar logistic regression. | |
#The choice of probit versus logit depends largely on individual preferences. | |
#Avoid important collinearity between variables as this will cause over-adjustement. | |
reg = glm(Y~X1+X2+X3,family=binomial,data=db) | |
summary(reg) | |
setwd("c:/Users/machariam/Desktop/Logit") | |
list.files() | |
Dat=read.csv("Dat.csv") | |
names(Dat) | |
str(Dat) | |
hist(LandPercapita) | |
hist(MaizeLand) | |
hist(PPI) | |
hist(Labour) | |
hist(InputRecieved) | |
hist(Yield) | |
hist(InputCosts) | |
hist(TotalShocks) | |
hist(OwnExperience) | |
hist(RadioTv) | |
hist(Extension) | |
hist(Plantclinics) | |
summary(LandPercapita) | |
summary(MaizeLand) | |
summary(Yield) | |
summary(InputCosts) | |
PYield <- quantile(Yield, probs=c(0.10, 0.9)) | |
PYield | |
PInputCost<-quantile(InputCosts, prob=c(0.10,0.9)) | |
PInputCost | |
class(Dat) | |
Dat.sub=subset(Dat,(Yield<= 4000) & (Yield >= 330) & (InputCosts<=207) & (InputCosts>=7)) | |
View(Dat.sub) | |
length(Dat.sub$HHID) | |
hist(Dat.sub$Yield) | |
hist(Dat.sub$InputCosts) | |
hist(Dat.sub$Labour) | |
hist(Dat.sub$EducCode) | |
hist(Dat.sub$GenderCode) | |
str(Dat.sub) | |
cor(Dat.sub[sapply(Dat.sub, is.numeric)]) | |
cor(subset(Dat.sub, select = c(LandPercapita,MaizeLand,Yield, InputCosts,Labour))) | |
fCulturalPA=as.factor(Dat.sub$CulturalPA) | |
fGender=as.factor(GenderCode) | |
#Cultural prevention/ avoidance | |
reg0 = glm(fCulturalPA~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg0) | |
reg1 = glm(fCulturalPA~LandPercapita+Yield+InputCosts+Gender+EducCode+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg1) | |
anova(reg1, test="Chisq") | |
exp(coef(reg1)) | |
exp(confint(reg1)) | |
#Cultural planting / cropping patterns | |
names(Dat.sub) | |
fCulturalPCP=as.factor(Dat.sub$CulturalPCP) | |
class(fCulturalPCP) | |
reg2 = glm(fCulturalPCP~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg2) | |
reg3 = glm(fCulturalPCP~InputCosts+Gender+PPI+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg3) | |
#Cultural resistance / Tolerant varieties | |
names(Dat.sub) | |
fCulturalRTV=as.factor(Dat.sub$CulturalRTV) | |
reg4 = glm(fCulturalRTV~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg4) | |
reg5 = glm(fCulturalRTV~LandPercapita+Yield+InputCosts+EducCode+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg5) | |
#Mechanical late stage intervention | |
fMechanicalLS=as.factor(Dat.sub$MechanicalLS) | |
names(Dat.sub) | |
reg6 = glm(fMechanicalLS~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg6) | |
reg7 = glm(fMechanicalLS~LandPercapita+MaizeLand+InputCosts+Gender+EducCode+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg7) | |
#Mechanical early stage intervention | |
names(Dat.sub) | |
fMechanicalES=as.factor(Dat.sub$MechanicalES) | |
reg8 = glm(fMechanicalES~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg8) | |
reg9 = glm(fMechanicalES~MaizeLand+InputCosts+EducCode+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg9) | |
#Biological -NA | |
#Pesticides | |
names(Dat.sub) | |
fPesticides=as.factor(Dat.sub$Pesticides) | |
reg10 = glm(fPesticides~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg10) | |
reg11 = glm(fPesticides~LandPercapita+MaizeLand+InputCosts+Gender+PPI+InputRecieved+TotalShocks+OwnExperience+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg11) | |
#Monitoring | |
names(Dat.sub) | |
fMonitoring=as.factor(Dat.sub$Monitoring) | |
reg12 = glm(fMonitoring~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg12) | |
reg12 = glm(fMonitoring~MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg12) | |
####Option 2 | |
#Application of pesticides | |
names(Dat.sub) | |
fPesticides2=as.factor(Dat.sub$Pesticides2) | |
reg13 = glm(fPesticides2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg13) | |
reg14 = glm(fPesticides2~MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg14) | |
#Remove crop residues | |
names(Dat.sub) | |
fCropResidues2=as.factor(Dat.sub$CropResidues2) | |
reg15 = glm(fCropResidues2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg15) | |
reg16 = glm(fCropResidues2~MaizeLand+Gender+EducCode+InputRecieved+OwnExperience+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg16) | |
#Uproot and burn | |
names(Dat.sub) | |
fUproot2=as.factor(Dat.sub$Uproot2) | |
reg17 = glm(fCropResidues2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg17) | |
reg18 = glm(fCropResidues2~LandPercapita+MaizeLand+Gender+EducCode+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg18) | |
#Hand picking | |
names(Dat.sub) | |
fHandPick2=as.factor(Dat.sub$HandPick2) | |
reg19 = glm(fHandPick2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg19) | |
reg20 = glm(fHandPick2~Yield+InputCosts+PPI+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg20) | |
#Monitoring | |
names(Dat.sub) | |
fMonitor2=as.factor(Dat.sub$Monitor2) | |
reg21 = glm(fMonitor2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg21) | |
reg22 = glm(fMonitor2~MaizeLand+Yield+InputCosts+Gender+EducCode+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub) | |
summary(reg22) | |
#Test of equality in proportions | |
prop.test(x = c(45,64), n = c(346,499), correct = TRUE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment