Skip to content

Instantly share code, notes, and snippets.

@stormxuwz
Created February 2, 2015 04:53
Show Gist options
  • Save stormxuwz/a6f077a600f2ad4e2abd to your computer and use it in GitHub Desktop.
Save stormxuwz/a6f077a600f2ad4e2abd to your computer and use it in GitHub Desktop.
Some regression/classification in R
#### LDA,QDA, RDA, naive baye functions ####
lda.model=function(traindata){
lda.result=lda(Y~.,data=traindata)
return(lda.result)
}
lda.pred=function(model,dataSets.X){
predresult=lapply(dataSets.X,function(X) predict(model,X)$class)
errorInfo(predresult,"lda")
return(predresult)
}
####################################
qda.model=function(traindata){
qda.result=qda(Y~.,data=traindata)
return(qda.result)
}
qda.pred=function(model,dataSets.X){
predresult=lapply(dataSets.X,function(X) predict(model,X)$class)
errorInfo(predresult,"qda")
return(predresult)
}
####################################
rda.model=function(traindata){
rda.result=rda(Y~.,data=traindata)
return(rda.result)
}
rda.pred=function(model,dataSets.X){
predresult=lapply(dataSets.X,function(X) predict(model,X)$class)
errorInfo(predresult,"rda")
return(predresult)
}
####################################
baye.model=function(traindata){
baye.result=naiveBayes(Y~.,data=traindata)
return(baye.result)
##### Mannually programming ####
#traindata.X=traindata[,-1]
#Y=traindata$Y
#ntrain=length(Y)
#nfeatures=ncol(traindata.X)
#p=sum(Y==3)/ntrain
#for(i in 1:nfeatures){
# traindata.X[,1]
#}
}
baye.pred=function(model,dataSets.X){
predresult=lapply(dataSets.X,function(X) predict(model,X))
errorInfo(predresult,"baye")
return(predresult)
}
#### Functions for logistic regression model ####
logreg.select=function(traindata){
logreg.result=glm(Y~.,data=traindata,family=binomial,maxit=200) #Choose 200 as maximum iteration
glm0 = glm(Y ~ 1, data = traindata, family = binomial(logit))
n=nrow(traindata)
#step-wise model
logreg.resultAIC=step(glm0, scope=list(upper=logreg.result), trace=F,direction="forward")
logreg.resultBIC=step(glm0, scope=list(upper=logreg.result), trace=F,k=log(nrow(traindata)),direction="forward")
#regression with lasso
X=traindata[,-1]
Y=traindata[,1]
X=as.matrix(X)
logreg.resultLasso=cv.glmnet(X,Y,family="binomial",alpha=1)
return(list(fullmodel=logreg.result,AICmodel=logreg.resultAIC,BICmodel=logreg.resultBIC,lassomodel=logreg.resultLasso))
}
logreg.pred=function(log.model,dataSets.X,addtitle="logit"){
tmpfunc=function(data.X){
Yhat=predict(log.model,data.X,type = "response")
Yhat=ifelse(Yhat>0.5,8,3)
Yhat=as.factor(Yhat)
return(Yhat)
}
predresult=lapply(dataSets.X,tmpfunc)
errorInfo(predresult,addtitle)
return(predresult)
}
logreg.lasso.pred=function(model,dataSets.X){
tmpfunc=function(data.X){
Yhat=predict(model,as.matrix(data.X),type="response",s="lambda.min")
Yhat=ifelse(Yhat>0.5,8,3)
Yhat=as.factor(Yhat)
return(Yhat)
}
predresult=lapply(dataSets.X,tmpfunc)
errorInfo(predresult,"Lasso")
return(predresult)
}
###### This is the funciton for SVM ########
svm.model=function(traindata,test.X,kn){
if(kn=="quar")
svm.result=ksvm(Y~.,data=traindata,kernel="polydot",kpar=list(degree=2))
else
svm.result=ksvm(Y~.,data=traindata,kernel=kn)
return(svm.result)
}
svm.pred=function(model,dataSets.X,addtitle=""){
predresult=lapply(dataSets.X,function(X) predict(model,X))
errorInfo(predresult,addtitle)
return(predresult)
}
##### This is a function for tree model
tree.build=function(traindata){
### Buld the tree
tree.result=tree(Y~.,data=traindata,mindev=0.005,minsize=2)
png("large_tree.png",width=800,height=800,res=100)
plot(tree.result)
text(tree.result)
dev.off()
cv=cv.tree(tree.result, K=5)
png("tree_cv.png",width=800,height=800,res=100)
plot(cv)
dev.off()
return(list(tree.result,cv))
}
tree.prune=function(tree.result,size){
tree.prune = prune.tree(tree.result, method="deviance",best=size);
png("Pruned_tree.png",width=800,height=800,res=100)
plot(tree.prune)
text(tree.prune)
dev.off()
return(tree.prune)
}
tree.pred=function(tree.model,dataSets.X){
tmpfunc=function(X){
Y_hat=predict(tree.model,X)
### Y_hat is a matrix with probability ###
colClass=as.numeric(c(colnames(Y_hat)[1],colnames(Y_hat)[2]))
Y_hat=ifelse(Y_hat[,1]>Y_hat[,2],colClass[1],colClass[2])
Y_hat=as.factor(Y_hat)
return(Y_hat)
}
predresult=lapply(dataSets.X,tmpfunc)
errorInfo(predresult)
return(predresult)
}
##### This is the function for random forest #####
rf.model=function(traindata){
rf.result=randomForest(Y~.,data=traindata,importance=T,ntree=1000) #mtry is XX by default
sortedImpt = sort(importance(rf.result,scale = F)[,3], decreasing = T );
png("rf_imp.png",width=800,height=800,res=100)
barplot(sortedImpt, horiz=TRUE, col="blue", space=.5, names.arg=substr(names(sortedImpt), 2, 5), cex.names=0.5)
dev.off()
return(list(rf.result,sortedImpt))
}
rf.pred=function(model,dataSets.X){
predresult=lapply(dataSets.X,function(X) predict(model,X))
errorInfo(predresult)
return(predresult)
}
### This is the function to gbm ###
gbm.model=function(traindata){
traindata$Y=ifelse(traindata$Y==3,0,1)
gbm.result=gbm(Y~.,data=traindata,n.trees=5000,shrinkage=0.005,bag.fraction=0.5,train.fraction=1,distribution="adaboost",cv.folds = 3)
png("GBMiter.png",width=1000,height=800,res=100)
#best.iter.oob <- gbm.perf(gbm.result,method="OOB")
#best.iter.test <- gbm.perf(gbm.result,method="test")
best.iter.cv <- gbm.perf(gbm.result,method="cv")
dev.off()
return(list(gbm.result,best.iter.cv))
}
gbm.pred=function(model,dataSets.X,best.iter){
tmpfunc=function(X){
Yhat=predict(model,X,best.iter)
Yhat=plogis(Yhat) ### Convert to p value from logit value
Yhat=ifelse(Yhat>0.5,8,3)
Yhat=as.factor(Yhat)
}
predresult=lapply(dataSets.X,tmpfunc)
errorInfo(predresult)
return(predresult)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment