Last active
January 11, 2018 11:46
-
-
Save flxw/5a0d3dda72477ec628ee287f515a7c9c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source('load_data.R') | |
d = read_and_preprocess_data_file('data/BADS_WS1718_known.csv') | |
d = subset(d, select = -c(delivery_date)) # remove NAs | |
classdata = read_and_preprocess_data_file('data/BADS_WS1718_class.csv') | |
classdata = subset(classdata, select = -c(delivery_date)) # remove NAs | |
# train the final model with 632 bootstrapping | |
for (iter in 1:400) { | |
# sample with replacement here - to understand why please refer to the book | |
sampled_order_ids = sample(nrow(d), replace = TRUE) | |
sampled_order_ids = unique(sampled_order_ids) | |
training_set = d[sampled_order_ids,] | |
test_set = d[-sampled_order_ids,] | |
probs = append(probs, nrow(training_set)/nrow(d)) | |
# train the model here with the training set, be sure to always train the same model, | |
# and not discard and continuously start at 0 | |
# test the model accuracy with the test set and | |
# append it to the log | |
accs = append(accs, accuracy) | |
} | |
# plot accuracies to see change with higher number of iterations | |
plot(x=1:length(accs), y=accs, type='p') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment