Skip to content

Instantly share code, notes, and snippets.

@pramsey
Created October 10, 2015 16:16
Show Gist options
  • Select an option

  • Save pramsey/cb9c455df1e735cf2861 to your computer and use it in GitHub Desktop.

Select an option

Save pramsey/cb9c455df1e735cf2861 to your computer and use it in GitHub Desktop.
-- place data in s3 for consumption by redshift
-- unzip -p taxi.zip | perl strip.pl | gzip -9 > taxi.gz
-- aws s3 cp taxi.gz s3://taxitest/taxi.gz
DROP TABLE taxis;
CREATE TABLE taxis (
pickup_datetime varchar,
passenger_count integer,
trip_time_in_secs real,
trip_distance real,
pickup_longitude real,
pickup_latitude real,
easting real,
northing real,
mercator_hash text not null sortkey,
nid integer,
id integer not null distkey
);
COPY taxis FROM 's3://taxitest/taxi.gz'
CREDENTIALS 'aws_access_key_id=xxx;aws_secret_access_key=xxx'
DELIMITER '\t' GZIP;
-- how big is test table?
-- 59454992
select count(*) from taxis;
-- summarize whole table into points at zoom level 8 (resolution 611 m/pixel)
-- 15336
-- 2.1s cold, 0.9s hot
select count(*) from
(
select
floor(easting/611) as x,
floor(northing/611) as y
from taxis
group by x, y
) as foo;
-- summarize whole table into points at zoom level 8 (resolution 611 m/pixel)
-- with histogram data for two variables
-- 5662896
-- 4.2s hot
select count(*) from
(
select
floor(easting/611) as x,
floor(northing/611) as y,
floor(10*trip_distance) as trip_distance,
floor(trip_time_in_secs/10) as trip_time_in_secs
from taxis
group by x, y, trip_distance, trip_time_in_secs
) as foo;
-- summarize partial table using tile hash
-- into points
-- with histogram data for two variables
-- 234321
-- 1.7s cold 0.6s hot
select count(*) from
(
select
floor(easting/100) as x,
floor(northing/100) as y,
floor(trip_time_in_secs) as trip_time_in_secs,
floor(10*trip_distance) as trip_distance
from taxis
where mercator_hash >= '90efbc0f' and mercator_hash < '90efbc10'
group by x, y, trip_time_in_secs, trip_distance
) as foo;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment