Francisco Lima monogenea

Data scientist and blogger

monogenea / 6-poissonCNN.R

Created October 7, 2019 18:27

	# Compute probabilities and predictions on test set
	predictions <- predict_classes(model, test_array)
	probabilities <- predict_proba(model, test_array)

	# Visual inspection of 32 cases
	set.seed(100)
	random <- sample(1:nrow(testData), 32)
	preds <- predictions[random,]
	probs <- as.vector(round(probabilities[random,], 2))

monogenea / 5-poissonCNN.R

Created October 7, 2019 18:26

	# Fix structure for 2d CNN
	train_array <- t(trainData$X)
	dim(train_array) <- c(50, 50, nrow(trainData$X), 1)
	# Reorder dimensions
	train_array <- aperm(train_array, c(3,1,2,4))

	test_array <- t(testData)
	dim(test_array) <- c(50, 50, nrow(testData), 1)
	# Reorder dimensions
	test_array <- aperm(test_array, c(3,1,2,4))

monogenea / 4-poissonCNN.R

Created October 7, 2019 18:24

	# Check processing on second cat
	par(mar = rep(0, 4))
	testCat <- t(matrix(as.numeric(trainData$X[2,]),
	nrow = width, ncol = height, T))
	image(t(apply(testCat, 2, rev)), col = gray.colors(12),
	axes = F)

	# Save / load
	save(trainData, testData, file = "catdogData.RData")
	# load("catdogData.RData")

monogenea / 3-poissonCNN.R

Created October 7, 2019 18:22

	# Takes approx. 15min
	trainData <- extract_feature("train/", width, height)
	# Takes slightly less
	testData <- extract_feature("test1/", width, height, labelsExist = F)

monogenea / 2-poissonCNN.R

Created October 7, 2019 18:20

	# Set image size
	width <- 50
	height <- 50

	extract_feature <- function(dir_path, width, height, labelsExist = T) {
	img_size <- width * height

	## List images in path
	images_names <- list.files(dir_path)

monogenea / 1-poissonCNN.R

Created October 7, 2019 18:19

	##### Process image #####
	library(keras)
	library(EBImage)
	library(stringr)
	library(pbapply)

	secondCat <- readImage("train/cat.1.jpg")
	display(secondCat)

monogenea / 10-poissonTC.R

Created October 7, 2019 18:16

	# Validate on test set with ensemble
	allPreds <- sapply(modelList, predict, newdata = testSet)
	ensemblePred <- rowSums(allPreds) / length(modelList)

	# Plot predicted vs. observed; create PNG
	plot(ensemblePred, testSet$Y,
	xlim = c(0,100), ylim = c(0,100),
	xlab = "Predicted", ylab = "Observed",
	pch = 16, col = rgb(0, 0, 0, .25))
	abline(a=0, b=1)

monogenea / 9-poissonTC.R

Created October 7, 2019 18:16

	bwplot(resamples(modelList),
	metric = "RMSE")

monogenea / 8-poissonTC.R

Created October 7, 2019 18:15

	# Train
	doMC::registerDoMC(10)
	knnMod <- train(myRec, data = trainSet,
	method = "knn",
	tuneGrid = data.frame(k = seq(5, 25, by = 4)),
	trControl = ctrl)

	enetMod <- train(myRec, data = trainSet,
	method = "glmnet",
	tuneGrid = expand.grid(alpha = seq(0, 1, length.out = 5),

monogenea / 7-poissonTC.R

Created October 7, 2019 18:14

	# simple PCA, plot
	pcaRec <- myRec %>%
	step_pca(all_predictors())

	myPCA <- prep(pcaRec, training = trainSet, retain = T) %>%
	juice()
	colGrad <- trainSet$Y/100 # add color

	plot(myPCA$PC1, myPCA$PC2,
	col = rgb(1 - colGrad, 0, colGrad,.5),