Created
December 3, 2024 14:48
-
-
Save pdet/7459d5b6ad7430dfc86eb5dfe0b643e0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import duckdb | |
import time | |
def gen_tpch(): | |
con = duckdb.connect() | |
con.execute("CALL dbgen(sf=20);") | |
con.execute("COPY lineitem to 'lineitem.csv'") | |
con.execute("COPY lineitem to 'lineitem.parquet'") | |
con.execute("COPY lineitem TO 'lineitem_zstd.parquet' (FORMAT 'parquet', CODEC 'zstd', COMPRESSION_LEVEL 1);") | |
schema = ''' | |
CREATE TABLE lineitem | |
( | |
l_orderkey BIGINT not null, | |
l_partkey BIGINT not null, | |
l_suppkey BIGINT not null, | |
l_linenumber BIGINT not null, | |
l_quantity DOUBLE PRECISION not null, | |
l_extendedprice DOUBLE PRECISION not null, | |
l_discount DOUBLE PRECISION not null, | |
l_tax DOUBLE PRECISION not null, | |
l_returnflag CHAR(1) not null, | |
l_linestatus CHAR(1) not null, | |
l_shipdate DATE not null, | |
l_commitdate DATE not null, | |
l_receiptdate DATE not null, | |
l_shipinstruct CHAR(25) not null, | |
l_shipmode CHAR(10) not null, | |
l_comment VARCHAR(44) not null | |
); | |
''' | |
def load_data(file): | |
for i in range (5): | |
con = duckdb.connect() | |
con.execute('SET max_temp_directory_size = \'0GB\'') | |
con.execute('SET preserve_insertion_order = false;') | |
con.execute(schema) | |
start_time = time.time() | |
con.execute(f"COPY lineitem FROM \'{file}\'") | |
end_time = time.time() | |
print(file + " Time: " + str(end_time-start_time)) | |
gen_tpch() | |
load_data('lineitem.csv') | |
load_data('lineitem.parquet') | |
load_data('lineitem_zstd.parquet') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment