diamonaj’s gists

diamonaj / CS112 11.1

Last active November 14, 2016 20:44

CS112 11.1

	### RESULTS FOR CS112 LESSON PLAN 11.1 Version 2

	rm(list=ls())
	set.seed(13345)

	### TO CREATE THE DATA (LATER SEPARATED INTO TEST AND TRAINING SETS)
	x <- round(runif(100, -4, 3), 2)
	y = x^3 + 2x^2 - 5x - 3
	y <- jitter(y, 3000) # to add some random noise (i.e., the epsilon in the regression equation)

diamonaj / CS112 11.1 Revised

Created November 15, 2016 13:33

CS112 11.1 Revised

	### RESULTS FOR CS112 LESSON PLAN 11.1
	rm(list=ls())
	set.seed(355)

	num_data <- 200

	### TO CREATE THE DATA (LATER SEPARATED INTO TEST AND TRAINING SETS)
	x <- round(runif(num_data, -4, 3), 2)
	y = x^3 + 2x^2 - 5x - 3
	dev.off()

diamonaj / Advanced FA Regression to the Mean

Created November 15, 2016 15:42

	### REGRESSION TO THE MEAN

	set.seed(12345)

	dev.off()

	### Generate "TRUE ABILITY"

	# N people, all with "zero" ability
	N <- 20

diamonaj / cs112 13.1

Created November 23, 2016 22:19

	left.panel <- c(23, 18, 14, 10, 11, 10, 9, 10, 10, 9)
	par(mfrow = c(1,2))
	plot(x = c(1:10), left.panel, type = "l", lty = 1, lwd = 3,
	ylim = c(5, 25), ylab = "mean squared error", xlab = "degree of polynomial")
	plot(x = c(1:10), left.panel, type = "l", lty = 1, lwd = 3,
	ylim = c(5, 25), ylab = "mean squared error", xlab = "degree of polynomial")

	for(i in 1:6) {
	lines(x = c(1:10), y = jitter(left.panel, 10), col = i)
	}

diamonaj / CS112 13.1 titanic

Created November 28, 2016 09:54

	# Download TITANIC data, loading stringsAsFactors = FALSE
	mm <- read.csv("trainTitanic.csv", stringsAsFactors = FALSE)

	# delete columns (Name, Cabin, Ticket, PassengerId, SibSp, Parch, Embarked)
	mm <- mm[,-c(1, 4, 7, 8, 9, 11, 12)]

	# dimensions are 891 x 5
	mm <- na.omit(mm)

	# dimensinos are 714 x 5

diamonaj / CS112 14.1 lalonde

Created December 3, 2016 14:45

	data(lalonde); library(randomForest)

	# create lalonde2 (just control units); delete orig data to avoid mistakeslalonde2 <- lalonde[which(lalonde$treat == 0),]; rm(lalonde)

	# eliminate the treatment indicator variable (they are all control units)# remove additional columns--we are going to predict "u75" (unemployed in '75)elimin.cols <- which( names(lalonde2) == "treat" \| names(lalonde2) == "re75"\| names(lalonde2) == "re78")

	lalonde2 <- lalonde2[, -elimin.cols]

	# make the dependent variable (what we are trying to predict)# a factor, because random forest will perform classification# for factors, not for 'numeric' variables.# notice that I set the levels when I define this factor...# otherwise, by default, R would set the first factor value# of the first value appearing in the dataset as "0"# even though it is actually a "1"--this could be very confusing.lalonde2$u75 <- factor(lalonde2$u75, levels = c(1,0))

diamonaj / CS112 14.1 lalonde breakout

Last active December 5, 2016 07:49

	data(lalonde); library(randomForest)

	# create lalonde2 (just control units); delete orig data to avoid mistakes
	lalonde2 <- lalonde[which(lalonde$treat == 0),]; rm(lalonde)

	# eliminate the treatment indicator variable (they are all control units)
	# remove additional columns--we are going to predict "u75" (unemployed in '75)
	elimin.cols <- which( names(lalonde2) == "treat" \|
	names(lalonde2) == "re75"\|
	names(lalonde2) == "re78")

diamonaj / CS112 14.1 game

Created December 3, 2016 15:54

	# NOTE TO INSTRUCTOR: READ ENTIRE FILE BEFORE CLASS
	# RUN CODE FROM LINE 49 TO END BEFORE CLASS

	# CONSIDER THE FOLLOWING GAME...
	# IMAGINE YOU WANT TO ESTIMATE THE UNDERLYING PARAMETER VALUE
	# OF A BERNOULLI DISTRIBUTION (i.e., THE PROBABILITY OF A "1"
	# INSTEAD OF A "0"...)

	# YOU CAN PULL THE "CORR" LEVER AND OBTAIN 50 CORRELATED DATA
	# POINTS FROM THE BERNOULLI DISTRIBUTION

diamonaj / CS112 14.1 ancillary code

Created December 3, 2016 15:56

	set.seed(123)

	# create storage vectors
	# to store the means for each experiment

	storage.corr <- c()
	storage.uncorr <- c()

	# "PULL THE CORR LEVER"
	# correlated data

diamonaj / Hints for Assignment 1.R

Created September 9, 2018 13:52

	# A video accompanying this code is available (to those with Minerva Schools at KGI emails) here:
	# https://drive.google.com/file/d/17X9FTMDZdKKuSI0Amo93J0-QvcPgmeL5/view?usp=sharing

	rm(list = ls()) # clears working memory

	# To get the data
	# set the working directory

	setwd("~/Downloads")