Created
September 27, 2020 03:14
-
-
Save kmuthukk/3d6b325b80c5f5474d7d391f365d51bb to your computer and use it in GitHub Desktop.
Test case for GROUP BY index_column LIMIT n using table scan instead of index scan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Dependencies: | |
# On CentOS you can install psycopg2 thus: | |
# | |
# sudo yum install postgresql-libs | |
# sudo yum install python-psycopg2 | |
import psycopg2; | |
import time; | |
from datetime import datetime | |
from multiprocessing.dummy import Pool as ThreadPool | |
def create_table(table_name, index_type): | |
conn = psycopg2.connect("host=localhost dbname=postgres user=postgres port=5433") | |
conn.set_session(autocommit=True) | |
cur = conn.cursor() | |
cur.execute("""DROP TABLE IF EXISTS """ + table_name); | |
cur.execute("""CREATE TABLE IF NOT EXISTS """ + table_name + """( | |
k text not null, | |
v text not null, | |
PRIMARY KEY((k) HASH)) | |
""") | |
print("Created table: " + table_name) | |
table_name_v_idx = table_name + "_v_idx" | |
cur.execute("""CREATE INDEX """ + table_name_v_idx + """ ON """ + table_name + """(v {})""".format(index_type)) | |
print("Created index: " + table_name_v_idx) | |
def load_data(table_name): | |
conn = psycopg2.connect("host=localhost dbname=postgres user=postgres port=5433") | |
conn.set_session(autocommit=True) | |
cur = conn.cursor() | |
num_rows = 200 | |
for idx in range(num_rows): | |
cur.execute("INSERT INTO " + table_name + "(k, v) VALUES (%s, %s)", | |
("k-"+str(idx), "v-"+str(idx))) | |
print("Inserted %d rows" % (num_rows)) | |
def print_select_results(records): | |
for record in records: | |
print(record) | |
print("===========") | |
def select_data(): | |
conn = psycopg2.connect("host=localhost dbname=postgres user=postgres port=5433") | |
conn.set_session(autocommit=True) | |
cur = conn.cursor() | |
for limit in [100, 200]: | |
print("Testing with limit: {}".format(limit)) | |
cur.execute("""EXPLAIN SELECT v, count(*) FROM tab GROUP BY v limit {}""".format(limit)) | |
records = cur.fetchall() | |
print_select_results(records) | |
# Main | |
print("Running tests with ASC index for column v") | |
create_table("tab", "ASC") | |
load_data("tab") | |
select_data() | |
print("===========================================") | |
print("Running tests with HASH index for column v") | |
create_table("tab", "HASH") | |
load_data("tab") | |
select_data() | |
print("===========================================") |
Author
kmuthukk
commented
Sep 27, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment