Randy Zwitch randyzwitch

Founder Zwitch Guitars / Zwitch Woodworking. Open-source software enthusiast.

randyzwitch / frequent-terms

Created September 16, 2013 16:57

frequent terms

	> #Inspect most popular words, minimum frequency of 20
	> findFreqTerms(dtm, lowfreq=20)
	[1] "15" "2008" "2009" "2011" "a" "ad" "add" "adsens"
	[9] "air" "analyt" "and" "appl" "at" "back" "bezel" "black"
	[17] "book" "bookmark" "break" "broke" "broken" "bubbl" "by" "can"
	[25] "case" "chang" "child" "code" "comment" "comput" "cost" "cover"
	[33] "crack" "css" "custom" "data" "delet" "disabl" "display" "do"
	[41] "doe" "drop" "edit" "eleven" "em209" "entri" "fix" "footer"
	[49] "footerphp" "for" "free" "from" "get" "glue" "googl" "hadoop"
	[57] "header" "hing" "how" "i

randyzwitch / kmeans5.R

Created September 16, 2013 17:06

kmeans guessing at 5 main clusters

	#I think there are 5 main topics: Data Science, Web Analytics, R, Julia, Wordpress
	kmeans5<- kmeans(dtm, 5)

	#Merge cluster assignment back to keywords
	kw_with_cluster <- as.data.frame(cbind(searchkeywords$'Natural Search Keyword', kmeans5$cluster))
	names(kw_with_cluster) <- c("keyword", "kmeans5")

	#Make df for each cluster result, quickly "eyeball" results
	cluster1 <- subset(kw_with_cluster, subset=kmeans5 == 1)
	cluster2 <- subset(kw_with_cluster, subset=kmeans5 == 2)

randyzwitch / formatting.sql

Last active December 23, 2015 06:29

Example of SQL formatting

	#Capitalize reserved keywords, indent between keywords and for multi-line statements
	#Tables on separate lines
	#Untested, hopefully this is valid SQL :)


	SELECT
	a.key,
	a.col2,
	a.col3,
	COALESCE(a.col4,0) AS col4,

randyzwitch / kmeans-loop.R

Created September 17, 2013 17:38

Repeated k-means to calculate elbow graph

	#accumulator for cost results
	cost_df <- data.frame()

	#run kmeans for all clusters up to 100
	for(i in 1:100){
	#Run kmeans for each level of i, allowing up to 100 iterations for convergence
	kmeans<- kmeans(x=dtm, centers=i, iter.max=100)

	#Combine cluster number and cost together, write to df
	cost_df<- rbind(cost_df, cbind(i, kmeans$tot.withinss))

randyzwitch / elbow-plot.R

Created September 17, 2013 17:46

elbow plot

	#Calculate lm's for emphasis
	lm(cost_df$cost[1:10] ~ cost_df$cluster[1:10])
	lm(cost_df$cost[10:19] ~ cost_df$cluster[10:19])
	lm(cost_df$cost[20:100] ~ cost_df$cluster[20:100])

	cost_df$fitted <- ifelse(cost_df$cluster <10, (19019.9 - 550.9*cost_df$cluster),
	ifelse(cost_df$cluster <20, (15251.5 - 116.5*cost_df$cluster),
	(13246.1 - 35.9*cost_df$cluster)))

	#Cost plot

randyzwitch / adobe-dailyfeed.R

Created September 24, 2013 12:08

Read in Adobe Analytics raw data feed

	#Unzip file
	unzip(zipfile= "/Volumes/32SDCARD/Data Sciences - General/SampleDailyDataFeed.zip", exdir= "~/Desktop/datafeed")

	#Read in hit data
	hit_data <- read.delim("~/Desktop/datafeed/hit_data.tsv", header=FALSE, stringsAsFactors = FALSE)

	#Read in header to set column names
	column_headers <- read.delim("~/Desktop/datafeed/column_headers.tsv")

	#Set column headers for hit data to the column headers for 'column headers'

randyzwitch / redshift.R

Created November 20, 2013 16:21

Redshift

	library("RPostgreSQL")

	#Connect to Redshift
	redshift.Connection <- dbConnect(dbDriver("PostgreSQL"), host="XXXXXXXXXXXXXXX.redshift.amazonaws.com", dbname="XXXXXXXX",
	user="XXXXXXX", password= "XXXXXXXXXX", port=XXXXXXXX)

	query_string <-
	"select *
	from table
	where something = 'True';"

randyzwitch / redshift_credentials.r

Created November 20, 2013 16:46 — forked from fredbenenson/redshift_credentials.r

	# Install the Redshift R library:
	# https://github.com/pingles/redshift-r
	# install.packages("~/Downloads/redshift-r-master", dependencies = T, repos = NULL, type = "source")

	library(redshift)

	redshift <- redshift.connect("jdbc:postgresql://REDSHIFT_DB:5439/DB_NAME", "LOGIN", "PASSWORD")

	# Example Query:
	data <- dbGetQuery(redshift, "SELECT COUNT(*) FROM table")

randyzwitch / ipython-notebook-ec2.py

Created November 21, 2013 21:50

Setting up IPython Notebook as a remote server

	#### Start IPython, generate SHA1 password to use for IPython Notebook server

	$ ipython
	Python 2.7.5 \|Anaconda 1.8.0 (x86_64)\| (default, Oct 24 2013, 07:02:20)
	Type "copyright", "credits" or "license" for more information.

	IPython 1.1.0 -- An enhanced Interactive Python.
	? -> Introduction and overview of IPython's features.
	%quickref -> Quick reference.
	help -> Python's own help system.

randyzwitch / adobe-documentation.R

Last active February 3, 2016 22:12

Code to generate Adobe Analytics Implementation Documentation

	library("RSiteCatalyst")
	library("WriteXLS")

	#Validate that underlying Perl modules for WriteXLS are installed correctly
	#Will return "Perl found. All required Perl modules were found" if installed correctly
	testPerl()

	#### 1. Pull data for all report suites to create one comprehensive report ####

	#Authenticate with Adobe Analytics API