Created
September 16, 2016 08:28
-
-
Save jeromer/a7e6ab89be76209e0f9faf9520d5db4b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# Tested on Postgres 9.5 | |
import psycopg2 | |
import datetime | |
conn = psycopg2.connect("dbname=sandbox user=postgres") | |
cursor = conn.cursor() | |
TABLE = "test_bulk_update" | |
ROWS_TO_LOAD = int(1e6) | |
SEP = "-" * 50 | |
START = 0 | |
END = ROWS_TO_LOAD - 100 | |
def main(): | |
create_test_table() | |
fill_table() | |
update_method_a() | |
update_method_b() | |
update_method_c() | |
update_method_d() | |
drop_test_table() | |
def create_test_table(): | |
cursor.execute(""" | |
CREATE TABLE %s( | |
id SERIAL NOT NULL, | |
text text, | |
CONSTRAINT test_bulk_update_pkey PRIMARY KEY (id) | |
); | |
""" % TABLE) | |
conn.commit() | |
def fill_table(): | |
sql = """ | |
INSERT INTO %s (text) VALUES (generate_series(1, %d)); | |
""" % (TABLE, ROWS_TO_LOAD) | |
cursor.execute(sql) | |
conn.commit() | |
sql = "SELECT COUNT(*) as total FROM %s" % TABLE | |
cursor.execute(sql) | |
total = cursor.fetchall()[0][0] | |
print "Testing with %d rows" % (total) | |
def update_method_a(): | |
print SEP | |
print "Method A (one commit per update)... zzzZZZZzZZZZZzZZZ" | |
start = datetime.datetime.now() | |
for i in range(START, END): | |
sql = "UPDATE " + TABLE + " SET text = 'a' WHERE id = %s" | |
cursor.execute(sql, [i]) | |
conn.commit() | |
end = datetime.datetime.now() | |
print "Done : %s" % (end - start) | |
def update_method_b(): | |
print SEP | |
print "Method B (one global commit)" | |
start = datetime.datetime.now() | |
for i in range(START, END): | |
sql = "UPDATE " + TABLE + " SET text = 'b' WHERE id = %s" | |
cursor.execute(sql, [i]) | |
conn.commit() | |
end = datetime.datetime.now() | |
print "Done : %s" % (end - start) | |
def update_method_c(): | |
print SEP | |
print "Method C (Using IN)" | |
start = datetime.datetime.now() | |
sql = """ | |
UPDATE %s SET text = 'c' | |
WHERE id IN (SELECT id FROM %s) | |
""" % (TABLE, TABLE) | |
cursor.execute(sql) | |
conn.commit() | |
end = datetime.datetime.now() | |
print "Done : %s" % (end - start) | |
def update_method_d(): | |
print SEP | |
print "Method D (Using FROM source in UPDATE)" | |
start = datetime.datetime.now() | |
sql = "CREATE TEMP TABLE tmp_table(LIKE %s INCLUDING ALL);" % TABLE | |
cursor.execute(sql) | |
conn.commit() | |
sql = """ | |
INSERT INTO tmp_table SELECT * FROM %s WHERE id BETWEEN %s AND %s | |
""" % (TABLE, START, END) | |
cursor.execute(sql) | |
conn.commit() | |
sql = """ | |
UPDATE %s t1 | |
SET text = 'd' | |
FROM tmp_table t2 | |
WHERE t1.id = t2.id | |
""" % TABLE | |
cursor.execute(sql) | |
conn.commit() | |
sql = "DROP TABLE tmp_table" | |
cursor.execute(sql) | |
conn.commit() | |
end = datetime.datetime.now() | |
print "Done : %s" % (end - start) | |
def drop_test_table(): | |
sql = "DROP TABLE %s" % TABLE | |
cursor.execute(sql) | |
conn.commit() | |
if __name__ == '__main__': | |
main() | |
# Testing with 10000 rows | |
# -------------------------------------------------- | |
# Method A (one commit per update)... zzzZZZZzZZZZZzZZZ | |
# Done : 0:00:36.887775 | |
# -------------------------------------------------- | |
# Method B (one global commit) | |
# Done : 0:00:00.915838 | |
# -------------------------------------------------- | |
# Method C (Using IN) | |
# Done : 0:00:00.054597 | |
# -------------------------------------------------- | |
# Method D (Using FROM source in UPDATE) | |
# Done : 0:00:00.098071 | |
# Testing with 100000 rows | |
# -------------------------------------------------- | |
# Method A (one commit per update)... zzzZZZZzZZZZZzZZZ | |
# Done : 0:06:38.097410 | |
# -------------------------------------------------- | |
# Method B (one global commit) | |
# Done : 0:00:08.409724 | |
# -------------------------------------------------- | |
# Method C (Using IN) | |
# Done : 0:00:00.607172 | |
# -------------------------------------------------- | |
# Method D (Using FROM source in UPDATE) | |
# Done : 0:00:00.801136 | |
# Testing with 500000 rows (method A disabled because to slow) | |
# -------------------------------------------------- | |
# Method B (one global commit) | |
# Done : 0:00:41.990436 | |
# -------------------------------------------------- | |
# Method C (Using IN) | |
# Done : 0:00:04.296147 | |
# -------------------------------------------------- | |
# Method D (Using FROM source in UPDATE) | |
# Done : 0:00:05.939987 | |
# Testing with 1000000 rows (method A disabled because to slow) | |
# -------------------------------------------------- | |
# Method B (one global commit) | |
# Done : 0:01:25.472691 | |
# -------------------------------------------------- | |
# Method C (Using IN) | |
# Done : 0:00:08.467847 | |
# -------------------------------------------------- | |
# Method D (Using FROM source in UPDATE) | |
# Done : 0:00:12.588521 | |
# (This is actually divided in ~6 seconds to create the | |
# temporary table and ~ 6 seconds to apply the update) | |
# Maybe tweaking temp_buffer could help | |
# https://www.postgresql.org/docs/current/static/runtime-config-resource.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment