arthurwuhoo’s gists

arthurwuhoo / CapeTownWeather.R

Created June 22, 2016 06:34

	################################################################
	# Reading in 2015-2016 Weather Data for Cape Town
	################################################################

	library(rvest)

	tables <- read_html("https://www.wunderground.com/history/airport/FACT/2015/6/21/CustomHistory.html?dayend=21&monthend=6&yearend=2016&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=")


	raw_weather <- tables %>% html_nodes(css="#obsTable") %>% .[[1]] %>% html_table(header = TRUE, fill = TRUE)

arthurwuhoo / Day 14: Clustering Exercises.R

Created June 16, 2016 19:39

	# ------------------------------------------------------------------
	# DAY 14: CLUSTERING EXERCISES
	# ------------------------------------------------------------------
	# ------------------------------------------------------------------
	# EXERCISE 1
	# Use kk Means Clustering to group the observations in the mtcars data. Is it
	#important to standardise these data first? Vary the number of clusters and choose
	# an appropriate value for kk. Interpret the clusters.
	# ------------------------------------------------------------------

arthurwuhoo / Day 13 - CV Exercises Solutions.R

Created June 16, 2016 18:49

	# ------------------------------------------------------------------
	# DAY 12 EXERCISES - CROSS VALIDATION
	# ------------------------------------------------------------------

	# ------------------------------------------------------------------
	# EXERCISE 1
	# Build a Logistic Regression model classifying the credit status of
	# customers (good or bad) in this data. Without using any packages, apply 5
	# -fold cross-validation on the model. Once you have five models (and five
	# sets of predicted values), average them to in order to create a new

arthurwuhoo / T-SVM.R

Created June 16, 2016 10:56


	sample.data <- read.csv("svm_sample.csv")
	sample.data <- sample.data[,-1] #getting rid of id variables


	library(caret)
	train_index <- createDataPartition(sample.data$color, 0.8)[[1]]

	sample.data.train <- sample.data[train_index,]
	sample.data.test <- sample.data[-train_index,]

arthurwuhoo / Day 12 - Decision Tree Post-Lecture Solutions.R

Created June 14, 2016 21:48

	# ------------------------------------------------------------------
	# DAY 12 EXERCISES - DECISION TREES
	# ------------------------------------------------------------------

	# ------------------------------------------------------------------
	# EXERCISE 1
	# Complete the iris modelling exercise. This is a multiclass problem. Some models
	# support multiclass problems, others don’t. Decision trees do. Divide the data
	# in a 60% training and 40% testing split. Create a model based on the training
	# data.

arthurwuhoo / Day 12: Logistic Regression Exercises.R

Created June 14, 2016 15:40

	# ------------------------------------------------------------------
	# DAY 12 EXERCISES - LOGISTIC REGRESSION
	# ------------------------------------------------------------------

	# ------------------------------------------------------------------
	# EXERCISE 1
	# Create a parsimonious model for the myopia data. Does its performance differ
	# substantially from the full model?
	# ------------------------------------------------------------------

arthurwuhoo / Exercise 3.R

Created June 14, 2016 15:21


	# ------------------------------------------------------------------
	# EXERCISE 3
	# Use the birthwt data in the MASS package to construct a model for low birth
	# weight. Are there any features which should be excluded from the model?
	# ------------------------------------------------------------------

	library(MASS)
	library(caret)

arthurwuhoo / Day 11: Linear Regression Solutions.R

Created June 13, 2016 18:05

	##############################################################

	# DAY 11: LINEAR REGRESSION EXERCISES

	##############################################################

	# 1) Height and Mass. Scrape the height and mass data from here.
	# ----------------------------------------------------------------------------

	library(rvest)

arthurwuhoo / Day 11: Linear Regression Solutions

Created June 13, 2016 18:05

	##############################################################

	# DAY 11: LINEAR REGRESSION EXERCISES

	##############################################################

	# 1) Height and Mass. Scrape the height and mass data from here.
	# ----------------------------------------------------------------------------

	library(rvest)

arthurwuhoo / Clean and Impute.R

Created June 9, 2016 15:27

	# =====================================================================================================================
	# OUTLIERS
	# =====================================================================================================================

	library(dplyr)
	library(corrgram)

	# Focus our attention on a subset of the baseball data.
	#
	baseball = select(baseball, Name, Atbatc:Walksc)

Arthur Wu arthurwuhoo