Last active
February 7, 2022 21:19
-
-
Save bitner/bc6bc22b0334796ff8b6d7ea9808bc24 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import psycopg | |
import os | |
import time | |
# Get a an iterator to use for sample of half million ids cached to a file | |
def sample_ids(): | |
file='sampleids.csv' | |
if not os.path.exists(file): | |
with open(file, 'wb') as f: | |
with psycopg.connect(os.environ.get('PGURL')) as conn: | |
with conn.cursor() as cur: | |
with cur.copy(""" | |
COPY ( | |
SELECT id | |
FROM images TABLESAMPLE SYSTEM (5) | |
LIMIT 500000 | |
) TO STDOUT; | |
""") as copy: | |
for data in copy: | |
f.write(data) | |
yield data | |
else: | |
for line in open(file, 'r'): | |
yield line | |
def count_from_ids(showexplain=False): | |
start = time.time() | |
print(f"Started {time.time() - start}") | |
with psycopg.connect(os.environ.get('PGURL')) as conn: | |
with conn.cursor() as cur: | |
print(f"Connection Started {time.time() - start}") | |
cur.execute(""" | |
SET WORK_MEM TO '100MB'; | |
SET TEMP_BUFFERS TO '100MB'; | |
CREATE TEMP TABLE lookup_ids (id BIGINT PRIMARY KEY) ON COMMIT DROP; | |
""") | |
print(f"Created Temp Table {time.time() - start}") | |
with cur.copy("""COPY lookup_ids FROM STDIN WITH CSV""") as copy: | |
for id in sample_ids(): | |
copy.write_row((int(id),)) | |
cur.execute(""" | |
ANALYZE lookup_ids; | |
""") | |
print(f"Copied data to temp table {time.time() - start}") | |
q=""" | |
SELECT | |
substring(quadkey FOR 4), | |
count(*), | |
min(image_dt), | |
max(image_dt) | |
FROM images JOIN lookup_ids USING (id) | |
GROUP BY 1 ORDER BY 2 DESC | |
; | |
""" | |
if showexplain: | |
print('-----------------------EXPLAIN------------------------------') | |
cur.execute(f"EXPLAIN (ANALYZE,BUFFERS) {q}") | |
for record in cur: | |
print(record[0]) | |
print(f"Ran Explain {time.time() - start}") | |
print('-------------------END EXPLAIN------------------------------') | |
cur.execute(q) | |
print(f"Ran Aggregate {time.time() - start}") | |
print("quadkey\tcount\tmindate\tmaxdate") | |
for record in cur: | |
print(f"{record[0]}\t{record[1]}\t{record[2].isoformat()}\t{record[3].isoformat()}") | |
conn.commit() | |
print(f"Done {time.time() - start}") | |
count_from_ids(True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment