smc77’s gists

smc77 / cs229_univariate_regression

Created October 3, 2011 00:28

Univariate regression with housing data

	# First look at a linear model fit to the housing data

	# details about dataset available http://archive.ics.uci.edu/ml/datasets/Housing
	housing <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")[, c(6, 14)]
	names(housing) <- c("num.rooms", "median.values")

	housing.lm <- lm(median.values ~ num.rooms, data=housing)
	plot(housing)
	abline(housing.lm)
	summary(housing.lm)

smc77 / intuitive_regression

Created October 5, 2011 02:01

Fitting various random lines to the housing data to get an intuition about the loss function.

	# Example of randomly chosen lines
	plot(housing)
	abline(0, 5, col="red")
	abline(-50, 10, col="blue")

	# Create the loss function
	loss <- function(intercept, slope) sum(((intercept + (slope * housing[, "num.rooms"])) - housing[, "median.values"])^2)/2

	# Create some data for a given line and compute the loss
	loss(0, 5)

smc77 / gist:1291757

Created October 17, 2011 01:53

Linear regression gradient descent.

	# Load data and initialize values
	data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")

	alpha <- 0.01
	m <- nrow(data)
	x <- matrix(c(rep(1,m), data$area), ncol=2)
	y <- matrix(data$price, ncol=1) / 1000

	# Z-Score the feature
	x.scaled <- x

smc77 / linear algebra in R

Created October 20, 2011 01:32

Quick linear algebra demo

	# Matrix addition
	matrix(c(1, 2, 3, 0, 5, 1), ncol=2) + matrix(c(4, 2, 0, 0.5, 5, 1), ncol=2)

	# Matrix multiplication
	3 * matrix(c(1, 2, 3, 0, 5, 1), ncol=2)

	# Matrix-Vector Multiplication
	matrix(c(1, 4, 2, 3, 0, 1), ncol=2) %*% c(1, 5)

	# Matrix-Mector Multiplication

smc77 / multivariate

Created October 22, 2011 23:44

Multivariate Regression

	# details about dataset available http://archive.ics.uci.edu/ml/datasets/Housing

	housing <- read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")
	names(housing) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV")

	# Subset the data for our model
	housing <- housing[, c("CRIM", "RM", "PTRATIO", "LSTAT", "MEDV")]

	plot(housing)

smc77 / multivariate_grad_descent.R

Created October 23, 2011 21:32

Multivariate Gradient Descent

	# Load data and initialize values
	data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")

	num.iterations <- 1000

	x <- data[, c("area", "bedrooms")]
	y <- matrix(data$price, ncol=1) / 1000 # Divide by a thousand so that numbers are in $1000's

	# Function to standardize input values
	zscore <- function(x, mean.val=NA) {

smc77 / normal_equation.R

Created October 24, 2011 00:11

Normal Equation

	data <- read.csv("http://www.statalgo.com/wp-content/uploads/2011/10/housing.csv")
	x <- as.matrix(cbind(intercept=rep(1, m), data[, c("area", "bedrooms")]))
	theta <- solve(t(x) %% x) %% t(x) %*% y

smc77 / logistic_regression.R

Created October 26, 2011 01:45

Logistic Regression

	# Plot the sigmoid function
	library(ggplot2)
	qplot(-10:10, 1/(1 + exp(-(-10:10))), geom="line", xlab="z", ylab="sigmoid function")

	# Download South African heart disease data
	sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1)

	# Pretty plot
	pairs(sa.heart[1:9],pch=21,bg=c("red","green")[factor(sa.heart$chd)])

smc77 / logistic_gradient_descent.R

Created October 28, 2011 03:14

Logistic Regression with Gradient Descent

	num.iterations <- 1000

	# Download South African heart disease data
	sa.heart <- read.table("http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/SAheart.data", sep=",",head=T,row.names=1)

	x <- sa.heart[,c("age", "ldl")]
	y <- sa.heart$chd
	plot(x, pch=21, bg=c("red","green")[factor(y)])

	# Function to standardize input values

smc77 / logistic_regression_multi.R

Created October 28, 2011 03:28

Multiclass Logistic Regression

	# Plot the data
	pairs(iris[1:4], main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])

	# Use linear discriminant analysis
	iris.lda <- lda(Species ~ ., data = iris)
	summary(iris.lda)

	# Use a multinomial logistic regression model
	library(VGAM)
	iris.vglm <- glm(Species ~ , family=multinomial, data=iris)

Shane Conway smc77