Skip to content

Instantly share code, notes, and snippets.

View randyzwitch's full-sized avatar

Randy Zwitch randyzwitch

View GitHub Profile
julia> using ODBC
julia> ODBC.connect("MySQL")
Connection 1 to MySQL successful.
#Save query results into a DataFrame called 'results'
julia> results = query("Select * from a1987;");
julia> typeof(results)
DataFrame (use methods(DataFrame) to see constructors)
@randyzwitch
randyzwitch / anomaly-detection.R
Created August 14, 2013 17:58
Adobe Analytics Anomaly Detection
#Run until version > 1.0 on CRAN
library(devtools)
install_github("RSiteCatalyst", "randyzwitch", ref = "master")
#Run if version >= 1.1 on CRAN
library("RSiteCatalyst")
#API Authentication
SCAuth(<username:company>, <shared_secret>)
@randyzwitch
randyzwitch / rsitecatalyst-anomaly-detection-plot.R
Created August 15, 2013 14:01
R plot for RSiteCatalyst Anomaly Detection
#Plot data using ggplot2
library(ggplot2)
#Combine year/month/day together into POSIX
pageviews_w_forecast$date <- ISOdate(pageviews_w_forecast$year, pageviews_w_forecast$month, pageviews_w_forecast$day)
#Convert columns to numeric
pageviews_w_forecast$pageviews <- as.numeric(pageviews_w_forecast$pageviews)
pageviews_w_forecast$pageviews_upper <- as.numeric(pageviews_w_forecast$pageviews_upper)
pageviews_w_forecast$pageviews_lower <- as.numeric(pageviews_w_forecast$pageviews_lower)
@randyzwitch
randyzwitch / airline.hive
Created August 22, 2013 21:13
Hive table creation statement airline dataset
-- Create table from yearly airline csv files
CREATE EXTERNAL TABLE airline (
`Year` int,
`Month` int,
`DayofMonth` int,
`DayOfWeek` int,
`DepTime` int,
`CRSDepTime` int,
`ArrTime` int,
`CRSArrTime` int,
@randyzwitch
randyzwitch / airlineview.hive
Last active December 21, 2015 13:30
Build airline view to remove bad records
-- Create view to "remove" 22 bad records from our table
create view vw_airline as
select * from airline
where uniquecarrier <> "UniqueCarrier";
@randyzwitch
randyzwitch / python-pypy-julia.py
Last active August 9, 2021 11:38
Python vs. PyPy vs. Julia comparison - Factorials & Looping
#Python/pypy
import math
def smallestdivisall(n):
for i in xrange(1, math.factorial(n)+1):
for j in xrange(1, n+1):
if i % j != 0:
break
elif j == n:
return i
@randyzwitch
randyzwitch / julia-smallest-divisible-number.jl
Created August 30, 2013 16:06
Julia smallest divisible number
function smallestdivisall(n::Int64)
for i = 1:factorial(n)
for j = 1:n
if i % j !=0
break
elseif j == n
return i
end
end
end
@randyzwitch
randyzwitch / ggplot-code-JIT-comparison.R
Created September 2, 2013 15:41
Graph code for JIT comparison blog post
library(ggplot2)
library(reshape2)
jit <- read.csv("~/Desktop/jit.csv")
#Melt data into proper format
jitm <- melt(jit, id.vars='numsearch')
jitm$value = log(jitm$value)
#Create Overall ggplot for ln performance
@randyzwitch
randyzwitch / ggplot-dashboard.R
Created September 13, 2013 14:34
Argument to plotlist instead of specifying each graph
library(ggplot2)
# This example uses the ChickWeight dataset, which comes with ggplot2
# First plot
p1 <-
ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet, group=Chick)) +
geom_line() +
ggtitle("Growth curve for individual chicks")
# Second plot
@randyzwitch
randyzwitch / natural-search-dtm.R
Last active April 25, 2016 03:34
k-means clustering for organic search terms
#### 0. Setup
library("RSiteCatalyst")
library("RTextTools") #Loads many packages useful for text mining
#### 1. RSiteCatalyst code - Get Natural Search Keywords & Metrics
#Set credentials
SCAuth(<username:company>, <shared secret>)
#Get list of search engine terms