Skip to content

Instantly share code, notes, and snippets.

@MartinMacharia
Last active September 20, 2017 12:12
Show Gist options
  • Save MartinMacharia/95998154852b327f8d0de2daf78e95e8 to your computer and use it in GitHub Desktop.
Save MartinMacharia/95998154852b327f8d0de2daf78e95e8 to your computer and use it in GitHub Desktop.
Rwanda Impact
data=read.csv(file.choose(), header=T) # Import data called data
View(data)# View data
duplicated(data$Dup) #Check for duplicates on column Dup, not a very efficient way for large dataset
data$Dup[duplicated(data1$Dup)] #Show duplicate entries, somewhat difficult to read results
unique(data1[duplicated(data1$Dup),])#Better way of showing unique repeat entries
#PSM with different matching criteria, select best based on visual inspection of common support graphics
setwd("C:/users/machariam/Desktop/Rwanda PW Impact report")
getwd()
RwandaData=read.csv("CleanV6.csv")
na.omit(RwandaData)
RwandaData=as.data.frame(na.omit(RwandaData)) #Omit missing values
sapply(RwandaData, function(x) sum(is.na(x)))#Inspect missing values
RwandaData=read.csv("CleanV5.csv", header=T, na.strings=c(""," ","NA")) #Insert NAs on missing values
na.omit(RwandaData)
length(HHID)
detach(RwandaData)
attach(RwandaData)
RwandaData[1:10,]
RwandaData[75:79,]
install.packages("MatchIt")
library(MatchIt)
names(RwandaData)
str(RwandaData)
#Nearest Neighbor
m.out = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "nearest",
ratio = 1)
summary(m.out)
plot(m.out, type = "jitter")
plot(m.out, type = "hist")
#Nearest Neighbor with caliper
m.out1 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "nearest", caliper=0.16)
plot(m.out1, type = "hist")
summary(m.out1)
plot(m.out1, type = "jitter")
#Exact Matching
m.out2 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "exact")
summary(m.out2)
plot(m.out2, type = "hist")
#Subclassification
m.out3 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "subclass", subclass=4)
summary(m.out3)
plot(m.out3, type = "hist")
#Optimal Matching
install.packages("optmatch")
library(optmatch)
m.out4 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "optimal", ratio=1)
#Coarsened Exact Matching
install.packages("cem")
library(cem)
m.out5 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "cem")
summary(m.out5)
plot(m.out5, type = "hist")
m.out6 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "genetic")
plot(m.out6, type = "hist")
m.out7 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + NumberBtw14To65 + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio+TLU,
data = RwandaData, method = "nearest",
distance ="logit")
plot(m.out7, type = "hist")
m.out8 = matchit(Use ~ FarmingImportanceCode + TotalHHMembers + RespSexCode +RespAge+RespEducCode+TotalMaizeLand +YoungDepRatio,
data = RwandaData, method = "nearest", caliper=0.16)
plot(m.out8,type = "hist")
summary(m.out8)
summary(m.out8, standardize = TRUE)
install.packages("cobalt")
library(cobalt)
bal.tab(m.out8)
m.data9 <- match.data(m.out8)
write.csv(m.data9, file = ("C:/users/machariam/Desktop/Rwanda PW Impact report/RwandaMatched.csv"))
#Merge data if PSM was run on a subset
RwandaPSMData=read.csv("RwandaMatched.csv")
length(RwandaPSMData$Count)
RwandaMaster=read.csv("ToMerge.csv")
length(RwandaMaster$HHID)
RwandaMerged <- merge(RwandaPSMData, RwandaMaster, by=c("HHID"))
write.csv(RwandaMerged, file = ("C:/users/machariam/Desktop/Rwanda PW Impact report/MergedRwanda.csv"))
#Ttest tables, non parametric tests etc
#Load data
dat=read.csv(file.choose(), header=T)
names(dat)
summary(dat$TotalCostPerHa)
#Boxplot without outliers
res=boxplot(dat$SeedCostPerHA ~dat$Use, outline=FALSE) #Boxplot without outliers
res$out
#Check values above 90/75 percentile and below 25/10 percentile, systematic way to handle outliers
Percentiles <- quantile(dat$HHSize, probs=c(0.25, 0.75))
Percentiles
#Subset based on percentiles
cleanTC <- subset(dat, (HHSize <= 58) & (HHSize >= 35), select=c(HHSize, Use))
summary(cleanTC$HHSize)
cleanTC
#Run ttest
t.test(cleanTC$Age ~ cleanTC$Use)
#Run a non parametric for the descrete and categorical variable
Percentiles <- quantile(dat$EducCode, probs=c(0.25, 0.75))
Percentiles
cleanTC <- subset(dat, (EducCode<= 1.1) & (EducCode >= 0.9), select=c(EducCode, Use))
summary(cleanTC$EducCode)
wilcox.test(cleanTC$EducCode~cleanTC$Use)
#lOGIT
#Probit regression. Probit analysis will produce results similar logistic regression.
#The choice of probit versus logit depends largely on individual preferences.
#Avoid important collinearity between variables as this will cause over-adjustement.
reg = glm(Y~X1+X2+X3,family=binomial,data=db)
summary(reg)
setwd("c:/Users/machariam/Desktop/Logit")
list.files()
Dat=read.csv("Dat.csv")
names(Dat)
str(Dat)
hist(LandPercapita)
hist(MaizeLand)
hist(PPI)
hist(Labour)
hist(InputRecieved)
hist(Yield)
hist(InputCosts)
hist(TotalShocks)
hist(OwnExperience)
hist(RadioTv)
hist(Extension)
hist(Plantclinics)
summary(LandPercapita)
summary(MaizeLand)
summary(Yield)
summary(InputCosts)
PYield <- quantile(Yield, probs=c(0.10, 0.9))
PYield
PInputCost<-quantile(InputCosts, prob=c(0.10,0.9))
PInputCost
class(Dat)
Dat.sub=subset(Dat,(Yield<= 4000) & (Yield >= 330) & (InputCosts<=207) & (InputCosts>=7))
View(Dat.sub)
length(Dat.sub$HHID)
hist(Dat.sub$Yield)
hist(Dat.sub$InputCosts)
hist(Dat.sub$Labour)
str(Dat.sub)
cor(Dat.sub[sapply(Dat.sub, is.numeric)])
cor(subset(Dat.sub, select = c(LandPercapita,MaizeLand,Yield, InputCosts,Labour)))
fCulturalPA=as.factor(Dat.sub$CulturalPA)
fGender=as.factor(GenderCode)
#Cultural prevention/ avoidance
reg = glm(fCulturalPA~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg)
reg1 = glm(fCulturalPA~LandPercapita+Yield+InputCosts+Gender+EducCode+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg1)
anova(reg1, test="Chisq")
exp(coef(reg1))
exp(confint(reg1))
#Cultural planting / cropping patterns
names(Dat.sub)
#lOGIT
#Probit regression. Probit analysis will produce results similar logistic regression.
#The choice of probit versus logit depends largely on individual preferences.
#Avoid important collinearity between variables as this will cause over-adjustement.
reg = glm(Y~X1+X2+X3,family=binomial,data=db)
summary(reg)
setwd("c:/Users/machariam/Desktop/Logit")
list.files()
Dat=read.csv("Dat.csv")
names(Dat)
str(Dat)
hist(LandPercapita)
hist(MaizeLand)
hist(PPI)
hist(Labour)
hist(InputRecieved)
hist(Yield)
hist(InputCosts)
hist(TotalShocks)
hist(OwnExperience)
hist(RadioTv)
hist(Extension)
hist(Plantclinics)
summary(LandPercapita)
summary(MaizeLand)
summary(Yield)
summary(InputCosts)
PYield <- quantile(Yield, probs=c(0.10, 0.9))
PYield
PInputCost<-quantile(InputCosts, prob=c(0.10,0.9))
PInputCost
class(Dat)
Dat.sub=subset(Dat,(Yield<= 4000) & (Yield >= 330) & (InputCosts<=207) & (InputCosts>=7))
View(Dat.sub)
length(Dat.sub$HHID)
hist(Dat.sub$Yield)
hist(Dat.sub$InputCosts)
hist(Dat.sub$Labour)
hist(Dat.sub$EducCode)
hist(Dat.sub$GenderCode)
str(Dat.sub)
cor(Dat.sub[sapply(Dat.sub, is.numeric)])
cor(subset(Dat.sub, select = c(LandPercapita,MaizeLand,Yield, InputCosts,Labour)))
fCulturalPA=as.factor(Dat.sub$CulturalPA)
fGender=as.factor(GenderCode)
#Cultural prevention/ avoidance
reg0 = glm(fCulturalPA~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg0)
reg1 = glm(fCulturalPA~LandPercapita+Yield+InputCosts+Gender+EducCode+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg1)
anova(reg1, test="Chisq")
exp(coef(reg1))
exp(confint(reg1))
#Cultural planting / cropping patterns
names(Dat.sub)
fCulturalPCP=as.factor(Dat.sub$CulturalPCP)
class(fCulturalPCP)
reg2 = glm(fCulturalPCP~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg2)
reg3 = glm(fCulturalPCP~InputCosts+Gender+PPI+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg3)
#Cultural resistance / Tolerant varieties
names(Dat.sub)
fCulturalRTV=as.factor(Dat.sub$CulturalRTV)
reg4 = glm(fCulturalRTV~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg4)
reg5 = glm(fCulturalRTV~LandPercapita+Yield+InputCosts+EducCode+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg5)
#Mechanical late stage intervention
fMechanicalLS=as.factor(Dat.sub$MechanicalLS)
names(Dat.sub)
reg6 = glm(fMechanicalLS~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg6)
reg7 = glm(fMechanicalLS~LandPercapita+MaizeLand+InputCosts+Gender+EducCode+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg7)
#Mechanical early stage intervention
names(Dat.sub)
fMechanicalES=as.factor(Dat.sub$MechanicalES)
reg8 = glm(fMechanicalES~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg8)
reg9 = glm(fMechanicalES~MaizeLand+InputCosts+EducCode+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg9)
#Biological -NA
#Pesticides
names(Dat.sub)
fPesticides=as.factor(Dat.sub$Pesticides)
reg10 = glm(fPesticides~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg10)
reg11 = glm(fPesticides~LandPercapita+MaizeLand+InputCosts+Gender+PPI+InputRecieved+TotalShocks+OwnExperience+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg11)
#Monitoring
names(Dat.sub)
fMonitoring=as.factor(Dat.sub$Monitoring)
reg12 = glm(fMonitoring~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg12)
reg12 = glm(fMonitoring~MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg12)
####Option 2
#Application of pesticides
names(Dat.sub)
fPesticides2=as.factor(Dat.sub$Pesticides2)
reg13 = glm(fPesticides2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg13)
reg14 = glm(fPesticides2~MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg14)
#Remove crop residues
names(Dat.sub)
fCropResidues2=as.factor(Dat.sub$CropResidues2)
reg15 = glm(fCropResidues2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg15)
reg16 = glm(fCropResidues2~MaizeLand+Gender+EducCode+InputRecieved+OwnExperience+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg16)
#Uproot and burn
names(Dat.sub)
fUproot2=as.factor(Dat.sub$Uproot2)
reg17 = glm(fCropResidues2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg17)
reg18 = glm(fCropResidues2~LandPercapita+MaizeLand+Gender+EducCode+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg18)
#Hand picking
names(Dat.sub)
fHandPick2=as.factor(Dat.sub$HandPick2)
reg19 = glm(fHandPick2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg19)
reg20 = glm(fHandPick2~Yield+InputCosts+PPI+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg20)
#Monitoring
names(Dat.sub)
fMonitor2=as.factor(Dat.sub$Monitor2)
reg21 = glm(fMonitor2~LandPercapita+MaizeLand+Yield+InputCosts+Gender+EducCode+PPI+Labour+InputRecieved+TotalShocks+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg21)
reg22 = glm(fMonitor2~MaizeLand+Yield+InputCosts+Gender+EducCode+InputRecieved+OwnExperience+RadioTv+Extension+Plantclinics,family=binomial,data=Dat.sub)
summary(reg22)
#Test of equality in proportions
prop.test(x = c(45,64), n = c(346,499), correct = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment