Skip to content

Instantly share code, notes, and snippets.

@MJacobs1985
Created February 21, 2022 15:22
Show Gist options
  • Save MJacobs1985/9ff090cfca36b293954cb268114e8cad to your computer and use it in GitHub Desktop.
Save MJacobs1985/9ff090cfca36b293954cb268114e8cad to your computer and use it in GitHub Desktop.
set.seed(998)
cl <- makePSOCKcluster(4)
registerDoParallel(cl)
## Create Model Data
arcmodel<-arc%>%dplyr::select(slaughtermonth,
slaughteryear,
weightclasshalv,
length,
rochefant,
`nir-ala`,
`nir-dha`,
`nir-dpa`,
`nir-epa`,
`nir-eta`,
generation1,
c.factor,
`nir-fasummon`,
`nir-fasumo3`,
`nir-fasumo6`,
`nir-fasumsatfa`,
`nir-linoleicacid`,
nirfat,
region,
description,
customer)
glimpse(arcmodel)
arcmodel<-as.data.frame(arcmodel)
DataExplorer::plot_missing(arcmodel)
## Resolve Skewness of weight data
trans <- preProcess(arcmodel, method = c("BoxCox"))
arcmodel_trans <- predict(trans, arcmodel)
## Deal with outliers
arcmodel_trans_trim<-as.data.frame(trim_df(arcmodel_trans, type="iqr"));
class(arcmodel_trans_trim)
## One Hot Encoding
dummy <- dummyVars(" ~ .", data=arcmodel_trans_trim)
arcmodel_trans_trim_dummy <- data.frame(predict(dummy,
newdata = arcmodel_trans_trim))
## Impute missing data
arc.mis.knn.model = preProcess(arcmodel_trans_trim,
method="knnImpute")
DataExplorer::plot_missing(arcmodel_trans_trim)
arcmodel_trans_trim_knn = predict(arc.mis.knn.model,
arcmodel_trans_trim)
head(arcmodel_trans_trim_knn)
DataExplorer::plot_missing(arcmodel_trans_trim_knn)
dim(arcmodel_trans_trim_knn)
## Create Training and Testing datasets
inTraining <- createDataPartition(arcmodel_trans_trim_knn$rochefant,
p = .75,
list = FALSE)
training <- arcmodel_trans_trim_knn[ inTraining,]
testing <- arcmodel_trans_trim_knn[-inTraining,]
dim(training)
dim(testing)
# Control Resampling Methods
tr <- trainControl(method = "repeatedcv",
number = 40,
repeats = 20)
# Simple linear regression to get a feeling
model<-lm(rochefant~slaughteryear +
slaughtermonth +
slaughteryear:slaughtermonth +
customer + description +
length + nirfat + region + generation1 +
weightclasshalv +
weightclasshalv:length,
data=training) # what if the imputation messed up, regression to the mean?
par(mfrow = c(2, 2))
plot(model)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment