Skip to content

Instantly share code, notes, and snippets.

@husio
Created June 24, 2010 20:39
Show Gist options
  • Save husio/451936 to your computer and use it in GitHub Desktop.
Save husio/451936 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import random
import os
import psycopg2
import datetime
import multiprocessing
# ltree contrib module
# http://www.postgresql.org/docs/current/static/ltree.html
SCHEMA = '''
create table advert (
title varchar(250),
site integer,
category_path ltree,
key varchar(250) primary key,
created date default now(),
price varchar(8),
data text
)
'''
TEXT = '''
Lorem ipsum dolor sit amet enim. Etiam ullamcorper. Suspendisse a pellentesque
dui, non felis. Maecenas malesuada elit lectus felis, malesuada ultricies.
Curabitur et ligula. Ut molestie a, ultricies porta urna. Vestibulum commodo
volutpat a, convallis ac, laoreet enim. Phasellus fermentum in, dolor.
Pellentesque facilisis. Nulla imperdiet sit amet magna. Vestibulum dapibus,
mauris nec malesuada fames ac turpis velit, rhoncus eu, luctus et interdum
adipiscing wisi. Aliquam erat ac ipsum. Integer aliquam purus. Quisque lorem
tortor fringilla sed, vestibulum id, eleifend justo vel bibendum sapien massa
ac turpis faucibus orci luctus non, consectetuer lobortis quis, varius in,
purus. Integer ultrices posuere cubilia Curae, Nulla ipsum dolor lacus,
suscipit adipiscing. Cum sociis natoque penatibus et ultrices volutpat. Nullam
wisi ultricies a, gravida vitae, dapibus risus ante sodales lectus blandit eu,
tempor diam pede cursus vitae, ultricies eu, faucibus quis, porttitor eros
cursus lectus, pellentesque eget, bibendum a, gravida ullamcorper quam. Nullam
viverra consectetuer. Quisque cursus et, porttitor risus. Aliquam sem. In
hendrerit nulla quam nunc, accumsan congue. Lorem ipsum primis in nibh vel
risus. Sed vel lectus. Ut sagittis, ipsum dolor quam.
'''.strip()
TEXT_P = TEXT.split()
C_PATH = (
'aaaa',
'aaaa.bbbbbb'
'aaaa.bbbbbb.cccccccc',
'aaaa.bbbbbb.cccccccc.dddddddd',
'aaaa.bbbbbb.cccccccc.dddddddd.eeeee',
'aaaa.bbbbbb.cccccccc.dddddddd.eeeee.ffffff',
'xxxxx',
'xxxxx.yyyy',
'xxxxx.yyyy.zzzzz',
'xxxxx.yyyy.zzzzz.wwwwwww',
'xxxxx.yyyy.zzzzz.wwwwwww.qqqqqq',
)
NOW = datetime.datetime.now()
def measure_time(fun):
def decorator(*args, **kwds):
start_time = time.time()
try:
return fun(*args, **kwds)
finally:
work_time = time.time() - start_time
name = fun.__name__
print 'Work time (%s): %.2fsec' % (name, work_time)
return decorator
def document_generator(ndocs):
rand = os.getpid() * random.randint(1, 1000)
for i in xrange(ndocs):
key = '%s_%s' % (rand, i)
site = random.randint(0, 20)
r_number = random.randint(0, 100)
title = ' '.join(TEXT_P[r_number:r_number + 15])
data = "{junk: 'some json data here'}"
price = str(random.randint(0, 1000))
sys_path = random.choice(C_PATH)
dt = NOW - datetime.timedelta(days=r_number)
yield (key, site, dt, price, sys_path, title, data)
@measure_time
def insert_data(doc_number):
conn = psycopg2.connect("dbname=test user=piotrek")
curr = conn.cursor()
curr.executemany('''
INSERT INTO advert (key, site, created, price, category_path, title, data)
VALUES (%s, %s, %s, %s, %s, %s, %s)
''', document_generator(doc_number))
conn.commit()
curr.close()
conn.close()
def create_db():
conn = psycopg2.connect("dbname=test user=piotrek")
curr = conn.cursor()
try:
curr.execute(SCHEMA)
conn.commit()
return True
except:
pass
finally:
conn.close()
return False
@measure_time
def insert_db():
n_proc = 4
n_docs = 10000 * 56
inserters = [multiprocessing.Process(target=insert_data, args=(n_docs, )
) for x in range(n_proc)]
[i.start() for i in inserters]
[i.join() for i in inserters]
print 'done'
@measure_time
def count_data():
conn = psycopg2.connect("dbname=test user=piotrek")
curr = conn.cursor()
curr.execute('SELECT count(key) FROM advert')
print 'row number: %s' % curr.fetchone()
conn.rollback()
curr.close()
conn.close()
@measure_time
def create_indexes():
conn = psycopg2.connect("dbname=test user=piotrek")
curr = conn.cursor()
curr.execute("CREATE INDEX advert_category_path_gist_idx ON advert USING gist(category_path);")
curr.execute("CREATE INDEX advert_site_created_idx ON advert (site, created);")
conn.commit()
curr.close()
conn.close()
def get_size(name):
conn = psycopg2.connect("dbname=test user=piotrek")
curr = conn.cursor()
curr.execute("SELECT pg_size_pretty(pg_total_relation_size(%s));", (name,))
result = curr.fetchone()
conn.rollback()
curr.close()
conn.close()
return result
@measure_time
def dummy_query_hits(n_queries):
print 'this may take some time...'
conn = psycopg2.connect("dbname=test user=piotrek")
curr = conn.cursor()
for x in range(n_queries):
site = random.randint(1, 20)
offset = random.randint(1000, 6000)
c_path = random.choice(C_PATH)
curr.execute("""
select 1 from advert where
site = %s and
category_path <@ %s
order by created desc limit 1 offset %s
""", (site, c_path, offset))
curr.fetchall()
conn.commit()
conn.close()
@measure_time
def main():
#create_db()
#insert_data()
#count_data()
#create_indexes()
print "adverts size: %s" % get_size("advert")
print "gist index size: %s" % get_size("advert_category_path_gist_idx")
print "site/created index size: %s" % get_size("advert_site_created_idx")
dummy_query_hits(60)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment