Skip to content

Instantly share code, notes, and snippets.

@agrif
Last active November 19, 2020 00:37
Show Gist options
  • Save agrif/7a55045d0a1b23314b1a0ab9ebe3377b to your computer and use it in GitHub Desktop.
Save agrif/7a55045d0a1b23314b1a0ab9ebe3377b to your computer and use it in GitHub Desktop.
import random
import sqlite3
import timeit
import numpy
import tempfile
import os.path
A_VALUES = ['foo', 'bar', 'baz']
B_VALUES = ['magrat', 'nanny', 'granny']
def make_db(path, make_index, rows=100000):
conn = sqlite3.connect(path)
conn.execute('''CREATE TABLE files (date integer, a text, b text)''')
if make_index:
conn.execute('''CREATE INDEX idx_date ON files (date DESC)''')
conn.execute('''CREATE INDEX idx_a ON files (a)''')
dates = list(range(rows))
random.shuffle(dates)
for date in dates:
a = random.choice(A_VALUES)
b = random.choice(B_VALUES)
conn.execute('''INSERT INTO files VALUES (?, ?, ?)''', (date, a, b))
conn.commit()
conn.close()
def explain(conn, stmt):
c = sqlite3.connect(conn)
for row in c.execute('EXPLAIN QUERY PLAN ' + stmt):
print('\t', row)
c.close()
def test_once(conn, stmt, repeat=10, number=10):
m = numpy.array(timeit.repeat('c = sqlite3.connect(conn); c.execute(stmt); c.close()',
globals=dict(
conn=conn, stmt=stmt, sqlite3=sqlite3),
repeat=repeat, number=number))
m /= number
return (numpy.mean(m), numpy.std(m) / numpy.sqrt(len(m)))
def test(conn_n, conn_i, stmt, **kwargs):
print(stmt)
print('\twithout index: {} +/- {}'.format(
*test_once(conn_n, stmt, **kwargs)))
explain(conn_n, stmt)
print('\twith index : {} +/- {}'.format(
*test_once(conn_i, stmt, **kwargs)))
explain(conn_i, stmt)
print()
def main():
with tempfile.TemporaryDirectory(prefix='sqlite-speed-test') as d:
conn_n = os.path.join(d, 'n.db')
make_db(conn_n, False)
conn_i = os.path.join(d, 'i.db')
make_db(conn_i, True)
test(conn_n, conn_i, '''SELECT * FROM files ORDER BY date DESC LIMIT 10''')
test(conn_n, conn_i, '''SELECT * FROM files WHERE date >= 99000 ORDER BY date DESC LIMIT 10''')
test(conn_n, conn_i, '''SELECT * FROM files WHERE a = 'foo' ORDER BY date DESC LIMIT 10''')
test(conn_n, conn_i, '''SELECT * FROM files WHERE date >= 99000 AND a = 'foo' ORDER BY date DESC LIMIT 10''')
if __name__ == '__main__':
main()
@agrif
Copy link
Author

agrif commented Nov 18, 2020

SELECT * FROM files ORDER BY date DESC LIMIT 10
        without index: 0.032463798779999706 +/- 0.00018860684155957107
         (0, 0, 0, 'SCAN TABLE files')
         (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.00025278348000938426 +/- 9.451280462250982e-07
         (0, 0, 0, 'SCAN TABLE files USING INDEX idx_date')

SELECT * FROM files WHERE date >= 99000 ORDER BY date DESC LIMIT 10
        without index: 0.005971146600013527 +/- 2.697076579037403e-05
         (0, 0, 0, 'SCAN TABLE files')
         (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.00025689187997159026 +/- 9.053740671558195e-07
         (0, 0, 0, 'SEARCH TABLE files USING INDEX idx_date (date>?)')

SELECT * FROM files WHERE a = 'foo' ORDER BY date DESC LIMIT 10
        without index: 0.0222957262700038 +/- 0.0001270135111434987
         (0, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.02220842708999953 +/- 0.00010049390929546134
         (0, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')

SELECT * FROM files WHERE date >= 99000 AND a = 'foo' ORDER BY date DESC LIMIT 10
        without index: 0.009369827230011651 +/- 9.147887964175942e-05
         (0, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.009462642399985271 +/- 5.476026017599398e-05
         (0, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (0, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')

@agrif
Copy link
Author

agrif commented Nov 18, 2020

On an rpi:


SELECT * FROM files ORDER BY date DESC LIMIT 10
        without index: 0.06142930562025868 +/- 0.0001987548468534629
         (4, 0, 0, 'SCAN TABLE files')
         (17, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.000779168670414947 +/- 5.318106691219967e-06
         (5, 0, 0, 'SCAN TABLE files USING INDEX idx_date')

SELECT * FROM files WHERE date >= 99000 ORDER BY date DESC LIMIT 10
        without index: 0.04036442552052903 +/- 1.1437906990803711e-05
         (4, 0, 0, 'SCAN TABLE files')
         (19, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.0007978113304125146 +/- 4.249244538527003e-06
         (5, 0, 0, 'SEARCH TABLE files USING INDEX idx_date (date>?)')

SELECT * FROM files WHERE a = 'foo' ORDER BY date DESC LIMIT 10
        without index: 0.06691772890975699 +/- 8.197634649339435e-05
         (5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (21, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.06689942425000482 +/- 1.963261286442556e-05
         (5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (21, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')

SELECT * FROM files WHERE date >= 99000 AND a = 'foo' ORDER BY date DESC LIMIT 10
        without index: 0.06074390851019416 +/- 9.533401321089343e-05
         (5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (23, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.06087353886920027 +/- 8.640054799431789e-05
         (5, 0, 0, 'SEARCH TABLE files USING INDEX idx_a (a=?)')
         (23, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')

@Gizmokid2005
Copy link

Gizmokid2005 commented Nov 19, 2020

SELECT * FROM files ORDER BY dateval DESC LIMIT 10
        without index: 0.6497548129999997 +/- 0.0011802204336985222
         (4, 0, 0, 'SCAN TABLE files')
         (18, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.0005200229999968543 +/- 7.557763301111602e-06
         (5, 0, 0, 'SCAN TABLE files USING INDEX IDX_date')

SELECT * FROM files WHERE dateval >= 99000 ORDER BY dateval DESC LIMIT 10
        without index: 0.4978252509999971 +/- 0.0005956959430678823
         (4, 0, 0, 'SCAN TABLE files')
         (20, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.0004957730000046468 +/- 4.144979865982506e-06
         (5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_date (dateval>?)')

SELECT * FROM files WHERE randa = 'banana' ORDER BY dateval DESC LIMIT 10
        without index: 0.6311629120000021 +/- 0.0008121937958165911
         (5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
         (22, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.6361734520000029 +/- 0.0007221630336897132
         (5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
         (22, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')

SELECT * FROM files WHERE dateval >= 99000 AND randa = 'banana' ORDER BY dateval DESC LIMIT 10
        without index: 0.6118271329999925 +/- 0.0011363213043028707
         (5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
         (24, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')
        with index   : 0.6189112849999969 +/- 0.0014941011420588394
         (5, 0, 0, 'SEARCH TABLE files USING INDEX IDX_randa (randa=?)')
         (24, 0, 0, 'USE TEMP B-TREE FOR ORDER BY')

The DBs I used were created using the following (adding/removing a date index as needed):

CREATE TABLE files (
dateval INTEGER,
randa TEXT,
randb TEXT,
randc TEXT
);
CREATE INDEX IDX_date ON files(dateval);

CREATE INDEX IDX_randa ON files(randa);

CREATE INDEX IDX_randb ON files(randb);

CREATE INDEX IDX_randc ON files(randc);

;WITH Rand1 AS (
SELECT 'apple' AS 'randa'
UNION
SELECT 'orange'
UNION
SELECT 'banana'
UNION
SELECT 'cucumber'
UNION
SELECT 'watermelon'
)
, rand2 AS (
SELECT 'ford' AS 'randb'
UNION
SELECT 'chevy'
UNION
SELECT 'jeep'
UNION
SELECT 'subaru'
UNION
SELECT 'mercedes'
)
,rand3 AS (
SELECT 'dog' AS 'randc'
UNION
SELECT 'cat'
UNION
SELECT 'squirrel'
UNION
SELECT 'frog'
UNION
SELECT 'shark'
)
, rand4 AS (
SELECT 1 AS 'n'
UNION ALL
SELECT n + 1
FROM
rand4
WHERE n+1 <= 50000
)

INSERT INTO files(randa, randb, randc, dateval)
SELECT
	*
FROM
	rand1 AS a
	CROSS JOIN rand2 AS b
	CROSS JOIN rand3 AS c
	CROSS JOIN rand4 AS d
ORDER By random()
;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment