-
-
Save fopina/8c9644f9666d60cad37b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
''' | |
pip install pysqlite | |
pip install unqlite | |
''' | |
import sqlite3 | |
from unqlite import UnQLite | |
import timeit | |
import os | |
def create_sqlite_table(): | |
conn = sqlite3.connect('tmp.sqlite3') | |
c = conn.cursor() | |
c.execute( | |
''' | |
create table test_table | |
(a text, b text, c text) | |
''' | |
) | |
conn.close() | |
def insert_sqlite_rows(number): | |
conn = sqlite3.connect('tmp.sqlite3') | |
c = conn.cursor() | |
for x in xrange(number): | |
c.execute( | |
''' | |
insert into test_table values(?,'2','3') | |
''', | |
[x] | |
) | |
conn.commit() | |
conn.close() | |
def insert_unqlite_items(number): | |
db = UnQLite('tmp.unqlite') | |
items = db.collection('items') | |
items.create() | |
for x in xrange(number): | |
items.store([{ | |
'a': str(x), | |
'b': '2', | |
'c': '3', | |
}]) | |
if __name__ == '__main__': | |
os.unlink('tmp.unqlite') | |
os.unlink('tmp.sqlite3') | |
create_sqlite_table() | |
print "insert_sqlite_rows(1000): ", timeit.timeit("insert_sqlite_rows(1000)", setup="from __main__ import insert_sqlite_rows", number=1) | |
print "insert_unqlite_items(1000): ", timeit.timeit("insert_unqlite_items(1000)", setup="from __main__ import insert_unqlite_items", number=1) | |
print 'sqlite size: ', os.path.getsize('tmp.sqlite3') | |
print 'unqlite size: ', os.path.getsize('tmp.unqlite') |
fopina
commented
Jul 23, 2015
If you remove just one TAB character on line 36(what means you commit only once after 1000 lines are inserted), you get completely different picture:
Before:
insert_sqlite_rows(1000): 1.26048803329
insert_unqlite_items(1000): 0.530463933945
After:
insert_sqlite_rows(1000): 0.00636601448059
insert_unqlite_items(1000): 0.457049846649
If you use bulk-insert, you can get even better numbers:
cur.executemany(""" insert into test_table values(?,'2','3')""", [[x] for x in range(0, number)])
insert_sqlite_rows(1000): 0.00369000434875
insert_unqlite_items(1000): 0.49748301506
BTW the reason execute many is so much faster: transactions. The original code just did INSERT, no prepare the statement and no BEGIN; and COMMIT; around the INSERT. Prepare parses the SQL and builds the query plan - YMMV but that can be as expensive or more than executing it. But the killer is the lack of an explicit transaction. Lacking that, SQLite's in auto commit mode i.e.
bool autocommit = isautocommitmode;
if (autocommit) {
execute("BEGIN;")
execute(sql)
if (autocommit)
execute("COMMIT;")
You're not setting any PRAGMAs so you're getting the default synchronous=FULL (and other things like journal_mode=DELETE). That means COMMIT = flush (FlushFileBuffers on windows, fsync on unix). That's expensive. You're doing that for every individual commit.
I suspect execute many does all its work within 1 transaction. That's why you're seeing the orders of magnitude improvement, but you can do it yourself.
SQLite is pretty damn fast if you use it well and is tops for data integrity.
How's UNQlite's reliability? Any options to improve perf to rival SQLite?