primaryobjects · September 2, 2025 15:11 · primaryobjects · Sep 26, 2017 · bcafferky · Apr 12, 2018
diff --git a/classifytext.R b/classifytext.R
 library(caret)
 library(tm)

 # Training data.
 data <- c('Cats like to chase mice.', 'Dogs like to eat big bones.')
 corpus <- VCorpus(VectorSource(data))

 # Create a document term matrix.
 tdm <- DocumentTermMatrix(corpus, list(removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))

 # Convert to a data.frame for training and assign a classification (factor) to each document.
 train <- as.matrix(tdm)
 train <- cbind(train, c(0, 1))
 colnames(train)[ncol(train)] <- 'y'
 train <- as.data.frame(train)
 train$y <- as.factor(train$y)

 # Train.
 fit <- train(y ~ ., data = train, method = 'bayesglm')

 # Check accuracy on training.
 predict(fit, newdata = train)

 # Test data.
 data2 <- c('Bats eat bugs.')
 corpus <- VCorpus(VectorSource(data2))
 tdm <- DocumentTermMatrix(corpus, control = list(dictionary = Terms(tdm), removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))
 test <- as.matrix(tdm)

 # Check accuracy on test.
 predict(fit, newdata = test)
diff --git a/results.txt b/results.txt
 > data
 [1] "Cats like to chase mice."    "Dogs like to eat big bones."
 > train
  big bone cat chase dog eat like mice y
 1   0    0   1     1   0   0    1    1 0
 2   1    1   0     0   1   1    1    0 1
 > predict(fit, newdata = train)
 [1] 0 1
 > data2
 [1] "Bats eat bugs."
 > test
  big bone cat chase dog eat like mice
 1   0    0   0     0   0   1    0    0
 > predict(fit, newdata = test)
 [1] 1
 >
	library(caret)
	library(tm)

	# Training data.
	data <- c('Cats like to chase mice.', 'Dogs like to eat big bones.')
	corpus <- VCorpus(VectorSource(data))

	# Create a document term matrix.
	tdm <- DocumentTermMatrix(corpus, list(removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))

	# Convert to a data.frame for training and assign a classification (factor) to each document.
	train <- as.matrix(tdm)
	train <- cbind(train, c(0, 1))
	colnames(train)[ncol(train)] <- 'y'
	train <- as.data.frame(train)
	train$y <- as.factor(train$y)

	# Train.
	fit <- train(y ~ ., data = train, method = 'bayesglm')

	# Check accuracy on training.
	predict(fit, newdata = train)

	# Test data.
	data2 <- c('Bats eat bugs.')
	corpus <- VCorpus(VectorSource(data2))
	tdm <- DocumentTermMatrix(corpus, control = list(dictionary = Terms(tdm), removePunctuation = TRUE, stopwords = TRUE, stemming = TRUE, removeNumbers = TRUE))
	test <- as.matrix(tdm)

	# Check accuracy on test.
	predict(fit, newdata = test)
	> data
	[1] "Cats like to chase mice." "Dogs like to eat big bones."
	> train
	big bone cat chase dog eat like mice y
	1 0 0 1 1 0 0 1 1 0
	2 1 1 0 0 1 1 1 0 1
	> predict(fit, newdata = train)
	[1] 0 1
	> data2
	[1] "Bats eat bugs."
	> test
	big bone cat chase dog eat like mice
	1 0 0 0 0 0 1 0 0
	> predict(fit, newdata = test)
	[1] 1
	>