Created
October 10, 2015 16:16
-
-
Save pramsey/cb9c455df1e735cf2861 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- place data in s3 for consumption by redshift | |
| -- unzip -p taxi.zip | perl strip.pl | gzip -9 > taxi.gz | |
| -- aws s3 cp taxi.gz s3://taxitest/taxi.gz | |
| DROP TABLE taxis; | |
| CREATE TABLE taxis ( | |
| pickup_datetime varchar, | |
| passenger_count integer, | |
| trip_time_in_secs real, | |
| trip_distance real, | |
| pickup_longitude real, | |
| pickup_latitude real, | |
| easting real, | |
| northing real, | |
| mercator_hash text not null sortkey, | |
| nid integer, | |
| id integer not null distkey | |
| ); | |
| COPY taxis FROM 's3://taxitest/taxi.gz' | |
| CREDENTIALS 'aws_access_key_id=xxx;aws_secret_access_key=xxx' | |
| DELIMITER '\t' GZIP; | |
| -- how big is test table? | |
| -- 59454992 | |
| select count(*) from taxis; | |
| -- summarize whole table into points at zoom level 8 (resolution 611 m/pixel) | |
| -- 15336 | |
| -- 2.1s cold, 0.9s hot | |
| select count(*) from | |
| ( | |
| select | |
| floor(easting/611) as x, | |
| floor(northing/611) as y | |
| from taxis | |
| group by x, y | |
| ) as foo; | |
| -- summarize whole table into points at zoom level 8 (resolution 611 m/pixel) | |
| -- with histogram data for two variables | |
| -- 5662896 | |
| -- 4.2s hot | |
| select count(*) from | |
| ( | |
| select | |
| floor(easting/611) as x, | |
| floor(northing/611) as y, | |
| floor(10*trip_distance) as trip_distance, | |
| floor(trip_time_in_secs/10) as trip_time_in_secs | |
| from taxis | |
| group by x, y, trip_distance, trip_time_in_secs | |
| ) as foo; | |
| -- summarize partial table using tile hash | |
| -- into points | |
| -- with histogram data for two variables | |
| -- 234321 | |
| -- 1.7s cold 0.6s hot | |
| select count(*) from | |
| ( | |
| select | |
| floor(easting/100) as x, | |
| floor(northing/100) as y, | |
| floor(trip_time_in_secs) as trip_time_in_secs, | |
| floor(10*trip_distance) as trip_distance | |
| from taxis | |
| where mercator_hash >= '90efbc0f' and mercator_hash < '90efbc10' | |
| group by x, y, trip_time_in_secs, trip_distance | |
| ) as foo; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment