diamonaj · January 30, 2023 20:13
diff --git a/step4.R b/step4.R
 ## We're going to be running regressions...
 ## If a predicted value is positive, we're going to say it's a prediction for hamilton authorship.
 ## If a predicted value is negative, we're going to say it's a prediction for madison authorship.

 author <- rep(NA, nrow(dtm1)) # a vector with a missing value
 author[hamilton] <- 1 # 1 if Hamilton
 author[madison] <- -1 # -1 if Madison

 ## data frame for regression
 author.data <- data.frame(author = author[c(hamilton, madison)],
                          tfm[c(hamilton, madison), ])

 ## To predict the authorship, we use the term frequency of the 4 words 
 ## selected based on our preliminary aanalysis ('upon', 'there', 
 ## 'consequently', and 'whilst'. The data frame object we created above contains
 ## the term frequency of the 10 words including these 4. We estimate the 
 ## coefficients using the 66 essays with known authorship.

 hm.fit <- lm(author ~ enough + upon + although + whilst + always + commonly
                      + consequently + considerable, data = author.data)
 hm.fit
	## We're going to be running regressions...
	## If a predicted value is positive, we're going to say it's a prediction for hamilton authorship.
	## If a predicted value is negative, we're going to say it's a prediction for madison authorship.

	author <- rep(NA, nrow(dtm1)) # a vector with a missing value
	author[hamilton] <- 1 # 1 if Hamilton
	author[madison] <- -1 # -1 if Madison

	## data frame for regression
	author.data <- data.frame(author = author[c(hamilton, madison)],
	tfm[c(hamilton, madison), ])

	## To predict the authorship, we use the term frequency of the 4 words
	## selected based on our preliminary aanalysis ('upon', 'there',
	## 'consequently', and 'whilst'. The data frame object we created above contains
	## the term frequency of the 10 words including these 4. We estimate the
	## coefficients using the 66 essays with known authorship.

	hm.fit <- lm(author ~ enough + upon + although + whilst + always + commonly
	+ consequently + considerable, data = author.data)
	hm.fit