Last active
November 19, 2020 00:37
-
-
Save agrif/7a55045d0a1b23314b1a0ab9ebe3377b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import sqlite3 | |
import timeit | |
import numpy | |
import tempfile | |
import os.path | |
A_VALUES = ['foo', 'bar', 'baz'] | |
B_VALUES = ['magrat', 'nanny', 'granny'] | |
def make_db(path, make_index, rows=100000): | |
conn = sqlite3.connect(path) | |
conn.execute('''CREATE TABLE files (date integer, a text, b text)''') | |
if make_index: | |
conn.execute('''CREATE INDEX idx_date ON files (date DESC)''') | |
conn.execute('''CREATE INDEX idx_a ON files (a)''') | |
dates = list(range(rows)) | |
random.shuffle(dates) | |
for date in dates: | |
a = random.choice(A_VALUES) | |
b = random.choice(B_VALUES) | |
conn.execute('''INSERT INTO files VALUES (?, ?, ?)''', (date, a, b)) | |
conn.commit() | |
conn.close() | |
def explain(conn, stmt): | |
c = sqlite3.connect(conn) | |
for row in c.execute('EXPLAIN QUERY PLAN ' + stmt): | |
print('\t', row) | |
c.close() | |
def test_once(conn, stmt, repeat=10, number=10): | |
m = numpy.array(timeit.repeat('c = sqlite3.connect(conn); c.execute(stmt); c.close()', | |
globals=dict( | |
conn=conn, stmt=stmt, sqlite3=sqlite3), | |
repeat=repeat, number=number)) | |
m /= number | |
return (numpy.mean(m), numpy.std(m) / numpy.sqrt(len(m))) | |
def test(conn_n, conn_i, stmt, **kwargs): | |
print(stmt) | |
print('\twithout index: {} +/- {}'.format( | |
*test_once(conn_n, stmt, **kwargs))) | |
explain(conn_n, stmt) | |
print('\twith index : {} +/- {}'.format( | |
*test_once(conn_i, stmt, **kwargs))) | |
explain(conn_i, stmt) | |
print() | |
def main(): | |
with tempfile.TemporaryDirectory(prefix='sqlite-speed-test') as d: | |
conn_n = os.path.join(d, 'n.db') | |
make_db(conn_n, False) | |
conn_i = os.path.join(d, 'i.db') | |
make_db(conn_i, True) | |
test(conn_n, conn_i, '''SELECT * FROM files ORDER BY date DESC LIMIT 10''') | |
test(conn_n, conn_i, '''SELECT * FROM files WHERE date >= 99000 ORDER BY date DESC LIMIT 10''') | |
test(conn_n, conn_i, '''SELECT * FROM files WHERE a = 'foo' ORDER BY date DESC LIMIT 10''') | |
test(conn_n, conn_i, '''SELECT * FROM files WHERE date >= 99000 AND a = 'foo' ORDER BY date DESC LIMIT 10''') | |
if __name__ == '__main__': | |
main() |
On an rpi:
SELECT * FROM files ORDER BY date DESC LIMIT 10
without index: 0.06142930562025868 +/- 0.0001987548468534629
(4, 0, 0, 'SCAN TABLE files')
(17, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.000779168670414947 +/- 5.318106691219967e-06
(5, 0, 0, 'SCAN TABLE files USING INDEX idx_date')
SELECT * FROM files WHERE date >= 99000 ORDER BY date DESC LIMIT 10
without index: 0.04036442552052903 +/- 1.1437906990803711e-05
(4, 0, 0, 'SCAN TABLE files')
(19, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.0007978113304125146 +/- 4.249244538527003e-06
(5, 0, 0, 'SEARCH TABLE files USING INDEX idx_date (date>?)')
SELECT * FROM files WHERE a = 'foo' ORDER BY date DESC LIMIT 10
without index: 0.06691772890975699 +/- 8.197634649339435e-05
(5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
(21, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.06689942425000482 +/- 1.963261286442556e-05
(5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
(21, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
SELECT * FROM files WHERE date >= 99000 AND a = 'foo' ORDER BY date DESC LIMIT 10
without index: 0.06074390851019416 +/- 9.533401321089343e-05
(5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
(23, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.06087353886920027 +/- 8.640054799431789e-05
(5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
(23, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
SELECT * FROM files ORDER BY dateval DESC LIMIT 10
without index: 0.6497548129999997 +/- 0.0011802204336985222
(4, 0, 0, 'SCAN TABLE files')
(18, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.0005200229999968543 +/- 7.557763301111602e-06
(5, 0, 0, 'SCAN TABLE files USING INDEX IDX_date')
SELECT * FROM files WHERE dateval >= 99000 ORDER BY dateval DESC LIMIT 10
without index: 0.4978252509999971 +/- 0.0005956959430678823
(4, 0, 0, 'SCAN TABLE files')
(20, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.0004957730000046468 +/- 4.144979865982506e-06
(5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_date (dateval>?)')
SELECT * FROM files WHERE randa = 'banana' ORDER BY dateval DESC LIMIT 10
without index: 0.6311629120000021 +/- 0.0008121937958165911
(5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
(22, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.6361734520000029 +/- 0.0007221630336897132
(5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
(22, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
SELECT * FROM files WHERE dateval >= 99000 AND randa = 'banana' ORDER BY dateval DESC LIMIT 10
without index: 0.6118271329999925 +/- 0.0011363213043028707
(5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
(24, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
with index : 0.6189112849999969 +/- 0.0014941011420588394
(5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
(24, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
The DBs I used were created using the following (adding/removing a date index as needed):
CREATE TABLE files (
dateval INTEGER,
randa TEXT,
randb TEXT,
randc TEXT
);
CREATE INDEX IDX_date ON files(dateval);
CREATE INDEX IDX_randa ON files(randa);
CREATE INDEX IDX_randb ON files(randb);
CREATE INDEX IDX_randc ON files(randc);
;WITH Rand1 AS (
SELECT 'apple' AS 'randa'
UNION
SELECT 'orange'
UNION
SELECT 'banana'
UNION
SELECT 'cucumber'
UNION
SELECT 'watermelon'
)
, rand2 AS (
SELECT 'ford' AS 'randb'
UNION
SELECT 'chevy'
UNION
SELECT 'jeep'
UNION
SELECT 'subaru'
UNION
SELECT 'mercedes'
)
,rand3 AS (
SELECT 'dog' AS 'randc'
UNION
SELECT 'cat'
UNION
SELECT 'squirrel'
UNION
SELECT 'frog'
UNION
SELECT 'shark'
)
, rand4 AS (
SELECT 1 AS 'n'
UNION ALL
SELECT n + 1
FROM
rand4
WHERE n+1 <= 50000
)
INSERT INTO files(randa, randb, randc, dateval)
SELECT
*
FROM
rand1 AS a
CROSS JOIN rand2 AS b
CROSS JOIN rand3 AS c
CROSS JOIN rand4 AS d
ORDER By random()
;
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.