-
-
Save fopina/8c9644f9666d60cad37b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
''' | |
pip install pysqlite | |
pip install unqlite | |
''' | |
import sqlite3 | |
from unqlite import UnQLite | |
import timeit | |
import os | |
def create_sqlite_table(): | |
conn = sqlite3.connect('tmp.sqlite3') | |
c = conn.cursor() | |
c.execute( | |
''' | |
create table test_table | |
(a text, b text, c text) | |
''' | |
) | |
conn.close() | |
def insert_sqlite_rows(number): | |
conn = sqlite3.connect('tmp.sqlite3') | |
c = conn.cursor() | |
for x in xrange(number): | |
c.execute( | |
''' | |
insert into test_table values(?,'2','3') | |
''', | |
[x] | |
) | |
conn.commit() | |
conn.close() | |
def insert_unqlite_items(number): | |
db = UnQLite('tmp.unqlite') | |
items = db.collection('items') | |
items.create() | |
for x in xrange(number): | |
items.store([{ | |
'a': str(x), | |
'b': '2', | |
'c': '3', | |
}]) | |
if __name__ == '__main__': | |
os.unlink('tmp.unqlite') | |
os.unlink('tmp.sqlite3') | |
create_sqlite_table() | |
print "insert_sqlite_rows(1000): ", timeit.timeit("insert_sqlite_rows(1000)", setup="from __main__ import insert_sqlite_rows", number=1) | |
print "insert_unqlite_items(1000): ", timeit.timeit("insert_unqlite_items(1000)", setup="from __main__ import insert_unqlite_items", number=1) | |
print 'sqlite size: ', os.path.getsize('tmp.sqlite3') | |
print 'unqlite size: ', os.path.getsize('tmp.unqlite') |
BTW the reason execute many is so much faster: transactions. The original code just did INSERT, no prepare the statement and no BEGIN; and COMMIT; around the INSERT. Prepare parses the SQL and builds the query plan - YMMV but that can be as expensive or more than executing it. But the killer is the lack of an explicit transaction. Lacking that, SQLite's in auto commit mode i.e.
bool autocommit = isautocommitmode;
if (autocommit) {
execute("BEGIN;")
execute(sql)
if (autocommit)
execute("COMMIT;")
You're not setting any PRAGMAs so you're getting the default synchronous=FULL (and other things like journal_mode=DELETE). That means COMMIT = flush (FlushFileBuffers on windows, fsync on unix). That's expensive. You're doing that for every individual commit.
I suspect execute many does all its work within 1 transaction. That's why you're seeing the orders of magnitude improvement, but you can do it yourself.
SQLite is pretty damn fast if you use it well and is tops for data integrity.
How's UNQlite's reliability? Any options to improve perf to rival SQLite?
If you use bulk-insert, you can get even better numbers:
cur.executemany(""" insert into test_table values(?,'2','3')""", [[x] for x in range(0, number)])