diamonaj’s gists

diamonaj / gist:cf0a4f5265d68d33bca3c63518052906

Created February 28, 2023 02:05



	## -----------------------------------------------------------------------------------------------------------
	library(maps)
	data(us.cities)
	head(us.cities)

	map(database = "usa")
	capitals <- subset(us.cities, capital == 2) # subset state capitals

diamonaj / Reg_assignment.R

Created February 26, 2023 00:22

	dd <- read.csv("progresa.csv")
	dim(dd)

	dd <- na.omit(dd)
	dim(dd)

	### QUESTION 1
	# An example of what I'm looking for

	par(mfrow = c(2,1))

diamonaj / gist:4dd6ce955aeff711682e81238b76e98a

Created February 9, 2023 15:54

diamonaj / federalist.R

Created February 3, 2023 16:02


	### This exercise requires installing a bunch of packages---
	### Unfortunately, the precise sequence and rules for installing may vary
	### depending upon your computer and configuration.

	## ***Taken from Chapter 5 in Kosuke Imai's "Quantitative Social Science"
	## Transcribed by Alexis Diamond, all errors my own...

	##########################################################################

diamonaj / sentiment.R

Created January 30, 2023 23:35

	# assuming you have downloaded the data (Data1.csv) correctly,
	# as discussed here: https://piazza.com/class/l7oq25mqbrz1nd/post/110


	# you may need to change the file location in quotes below, to suit where your file is
	apple <- read.csv("~/Documents/Data1.csv", stringsAsFactors = F, encoding="UTF-8")

	str(apple)

	install.packages("tm")

diamonaj / sentiment.R

Created January 30, 2023 23:34

	# assuming you have downloaded the data (Data1.csv) correctly,
	# as discussed here: https://piazza.com/class/l7oq25mqbrz1nd/post/110


	# you may need to change the file location in quotes below, to suit where your file is
	apple <- read.csv("~/Documents/Data1.csv", stringsAsFactors = F, encoding="UTF-8")

	str(apple)

	library(tm)

diamonaj / step4.R

Created January 30, 2023 20:13

	## We're going to be running regressions...
	## If a predicted value is positive, we're going to say it's a prediction for hamilton authorship.
	## If a predicted value is negative, we're going to say it's a prediction for madison authorship.

	author <- rep(NA, nrow(dtm1)) # a vector with a missing value
	author[hamilton] <- 1 # 1 if Hamilton
	author[madison] <- -1 # -1 if Madison

	## data frame for regression
	author.data <- data.frame(author = author[c(hamilton, madison)],

diamonaj / step3.R

Created January 30, 2023 20:08

	## Authorship prediction
	## authorship of some Federalist Papers is unknown
	## We use the 66 essays attributed to either Hamilton or Madison to
	## predict the authorship of the 11 disputed papers.

	## Since each paper deals with a different topic, we focus on usage of articles,
	## prepositions, and conjuctions. We analyze the frequency of the following
	## 10 words: although, always, commonly, consequently, considerable, enough, there, upon, while,
	## and whilst.

diamonaj / step2.R

Created January 30, 2023 19:59

	# how often are words (word-stems) used across all the docs
	dtm <- DocumentTermMatrix(corpus.stemmed)

	# in the first 5 text files, how frequent are the first 8 words (alphabetical order)
	inspect(dtm[1:5, 1:8])

	# let's make that dtm table a matrix...
	dtm.mat <- as.matrix(dtm)

	####### STEP 3 ----- visualizing the high-frequency words

diamonaj / step1.R

Last active January 30, 2023 18:43

	### In your R working directory, you should have a directory called "federalist" filled with .txt files

	corpus.raw <- Corpus(DirSource(directory = "federalist", pattern = "fp"))

	# this corpus comes with many different text files built in
	# to see text, use "content()" and specify which doc (e.g., the 1st one)
	content(corpus.raw[[1]])

	####### GET THE DATA IN SHAPE
	# make lower case