This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# preliminaries | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
from nltk.stem.porter import PorterStemmer | |
from nltk import tokenize, pos_tag | |
import csv | |
import re | |
# read in data | |
comments = csv.reader(open("/Users/scott/Downloads/issue_comments.csv", "rb")) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
${TABLE} is a placeholder for the table variants | |
you will be benchmarking with this script. Leave | |
${TABLE} in, as it will be used to dynamical substitute | |
your table names at the command line. | |
*/ | |
\timing on | |
\o /dev/null |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
this first set of tables explicitly defines | |
foreign-to-primary-key relationships | |
in addition to shared distribution key | |
*/ | |
drop table if exists public.event_rel; | |
create table public.event_rel ( | |
created_at timestamp encode raw | |
, event_type varchar(200) encode text32k | |
, license_slug varchar(18) encode raw |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is a Spark Streaming job | |
that takes a raw stream of logs | |
from Flume, parses the log lines | |
capturing them in an RDD, then | |
adds a schema and ultimately writes | |
to HDFS. | |
Written by Scott Hoover, 2016. | |
Send questions to [email protected] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# preliminaries | |
library(ggplot2) | |
# read in file | |
df <- read.csv(file = "~/bench_summary.txt", header = FALSE) | |
# generate query numbers and merge with data frame | |
df <- cbind(df, rep(rep(c(1:5),each = 5), 5)) | |
# rename columns |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\timing on | |
\o /dev/null | |
select count(*) from pinger.event_tmp1; | |
select count(*) from pinger.event_tmp1; | |
select count(*) from pinger.event_tmp1; | |
select count(*) from pinger.event_tmp1; | |
select count(*) from pinger.event_tmp1; | |
select looker_instance_slug, count(*) from pinger.event_tmp1 group by 1; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table if exists pinger.event_tmp1; | |
drop table if exists pinger.event_tmp2; | |
drop table if exists pinger.event_tmp3; | |
drop table if exists pinger.event_tmp4; | |
drop table if exists pinger.event_tmp5; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
insert into pinger.event_tmp1 ( | |
select * from pinger.event | |
); | |
insert into pinger.event_tmp2 ( | |
select * from pinger.event | |
); | |
insert into pinger.event_tmp3 ( | |
select * from pinger.event |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* simple sort */ | |
create table pinger.event_tmp1 ( | |
event_id bigint encode mostly16 | |
, agent character varying(65535) encode lzo | |
, referrer character varying(65535) encode lzo | |
, user_id integer encode mostly8 | |
, version character varying(100) encode bytedict | |
, created_at timestamp without time zone encode delta | |
, ip_address character varying(100) encode lzo | |
, event_type character varying(200) encode bytedict |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# query: https://metanew.looker.com/x/HBhS99t | |
# median pickup time | |
df <- read.csv(file = "~/Downloads/pickups.csv", header = TRUE) | |
names(df) <- c("id", "month", "pickup_time") | |
ddply(df, .(month), summarize, median_duration = median(pickup_time, na.rm = TRUE)) |