Skip to content

Instantly share code, notes, and snippets.

@vrajesh26
Created October 20, 2017 13:53
Show Gist options
  • Save vrajesh26/a1f1af6d83a2e9eab1ec19c253a0b0ec to your computer and use it in GitHub Desktop.
Save vrajesh26/a1f1af6d83a2e9eab1ec19c253a0b0ec to your computer and use it in GitHub Desktop.
To identify the customer segments who are eligible for loan
train<-read.csv("D:/loan prediction/loan_train.csv",na.strings = c(""," ",NA))
test<-read.csv("D:/loan prediction/loan_test.csv",na.strings = c(""," ",NA))
View(train)
colSums(is.na(train))
colSums(is.na(test))
dim(train)
library(mlr)
summarizeColumns(train)
summarizeColumns(train)
library("ggplot2")
ggplot(data = train, mapping = aes(Loan_Status,fill=Loan_Status))+geom_bar()
prop.table(table(train$Loan_Status))
ggplot(data = train, mapping = aes(Gender,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Gender))+geom_bar()
gl<-table(train$Gender,train$Loan_Status)
chisq.test(gl)
ggplot(data = train, mapping = aes(Married,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Married))+geom_bar()
ml<-table(train$Married,train$Loan_Status)
chisq.test(ml)
ggplot(data = train, mapping = aes(Dependents,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Dependents))+geom_bar()
levels(train$Dependents)
dl<-table(train$Dependents,train$Loan_Status)
chisq.test(dl)
ggplot(data = train, mapping = aes(Education,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Education))+geom_bar()
el<-table(train$Education,train$Loan_Status)
chisq.test(el)
ggplot(data = train, mapping = aes(Self_Employed,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Self_Employed))+geom_bar()
sl<-table(train$Self_Employed,train$Loan_Status)
chisq.test(sl)
ggplot(data = train, mapping = aes(ApplicantIncome,fill=Loan_Status))+geom_histogram()
ggplot(data = test, mapping = aes(ApplicantIncome))+geom_histogram()
ggplot(data = train, mapping = aes(CoapplicantIncome,fill=Loan_Status))+geom_histogram()
ggplot(data = test, mapping = aes(CoapplicantIncome))+geom_histogram()
ggplot(data = train, mapping = aes(LoanAmount,fill=Loan_Status))+geom_histogram()
ggplot(data = test, mapping = aes(LoanAmount))+geom_histogram()
summary(train$Loan_Amount_Term)
ggplot(data = train, mapping = aes(Loan_Amount_Term,fill=Loan_Status))+geom_histogram()
summary(train$Credit_History)
train$Credit_History <-as.factor(train$Credit_History)
test$Credit_History <- as.factor(test$Credit_History)
levels(train$Credit_History)
ggplot(data = train, mapping = aes(Credit_History,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Credit_History))+geom_bar()
cl<-table(train$Credit_History,train$Loan_Status)
chisq.test(cl)
ggplot(data = train, mapping = aes(Property_Area,fill=Loan_Status))+geom_bar()
ggplot(data = test, mapping = aes(Property_Area))+geom_bar()
pl<-table(train$Property_Area,train$Loan_Status)
chisq.test(pl)
summary(is.na(train))
loan<-rbind(train[,2:12],test[,2:12])
colSums(is.na(loan))
#Feature
loan$TotalIncome=loan$ApplicantIncome+loan$CoapplicantIncome
loan$Married[is.na(loan$Married) & loan$CoapplicantIncome==0]<-"No"
loan$Married[is.na(loan$Married)]<- "Yes"
loan[is.na(loan$Gender) & is.na(loan$Dependents),]
loan$Gender[is.na(loan$Gender) & is.na(loan$Dependents)] <- "Male"
loan$Dependents[is.na(loan$Dependents) & loan$Married=="No"]<- "0"
mm <- loan[(loan$Gender=="Male" & loan$Married=="Yes"),c(3,6:9,11)]
mmtrain<-mm[!is.na(mm$Dependents),]
mmtest<- mm[is.na(mm$Dependents),]
library("rpart")
depFit <- rpart(data=mmtrain,Dependents~.,xval=3)
rpart.plot(depFit)
p<-predict(depFit,mmtrain,type="class")
loan$Dependents[is.na(loan$Dependents) & loan$Gender=="Male" &
loan$Married == "Yes"]<- predict(depFit,newdata=mmtest,type="class")
gtrain<-loan[!is.na(loan$Gender),1:7]
gtest<-loan[is.na(loan$Gender),1:7]
genFit<-rpart(data=gtrain,Gender~.,xval=3)
rpart.plot(genFit)
p<-predict(genFit,gtrain,type="class")
loan$Gender[is.na(loan$Gender)]<-predict(genFit,gtest,type="class")
table(loan$Self_Employed)
loan$Self_Employed[is.na(loan$Self_Employed)] <- "No"
library(car)
loan$Credit_History<-recode(loan$Credit_History,"NA=2")
ltrain<-loan[!is.na(loan$LoanAmount) & loan$LoanAmount<500,c(1:8,10)]
ltest <- loan[is.na(loan$LoanAmount),c(1:8,10)]
loanFit <- glm(data=ltrain,LoanAmount~.,na.action=na.exclude)
loan$LoanAmount[is.na(loan$LoanAmount)] <- predict(loanFit,newdata=ltest)
loan$Loan_Amount_Term <- as.factor(loan$Loan_Amount_Term)
loan$Loan_Amount_Term[is.na(loan$Loan_Amount_Term)]<-"360"
numDependents <- recode(loan$Dependents,"'3+'='3' ")
numDependents <- as.numeric(as.character(numDependents))
loan$FamilySize <- ifelse((loan$CoapplicantIncome>0 |loan$Married=="Y"),numDependents+2,numDependents+1)
loan$IncomePC <- loan$TotalIncome/loan$FamilySize
loan$LoanAmountByTotInc <- loan$LoanAmount/loan$TotalIncome
loan$LoanAmountPC <- loan$LoanAmount/loan$IncomePC
loan$Loan_Amount_Term <- as.numeric(as.character(loan$Loan_Amount_Term))
loan$LoanPerMonth <- loan$LoanAmount/loan$Loan_Amount_Term
loan$LoanPerMOnthByTotInc <- loan$LoanPerMonth/loan$TotalIncome
loan$LoanPerMonthPC <- loan$LoanPerMonth/loan$LoanAmountPC
loan$Loan_Amount_Term <- as.factor(loan$Loan_Amount_Term)
cor(loan$LoanPerMonthByTotInc,loan$LoanPerMonth)
cor(newtrain[,(sapply(newtrain, is.numeric))])
loan=loan[,-18]
nrow(loan)
nrow(train)
nrow(test)
newtrain <- cbind(loan[1:614,],Loan_Status=train$Loan_Status)
View(newtrain)
newtest<-loan[615:981,]
library("caret")
cntrl<-trainControl(method="repeatedcv",number=5)
rp<-train(x=newtrain[,-c(6,17,19)],y=newtrain[,19],method = "rpart",trControl = cntrl)
cntrl<-trainControl(method="repeatedcv",number=5)
rf1 <- train(x=newtrain[,-c(6,17,19)],y=newtrain[,19],method = 'rf', trControl = cntrl)
pred_rf<-predict(rf1,newdata=newtest)
adg<-data.frame(Loan_ID=test[,1],Loan_Status=pred_rf)
write.csv(adg,file = "loan_output.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment