Created
June 24, 2010 20:39
-
-
Save husio/451936 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import time | |
import random | |
import os | |
import psycopg2 | |
import datetime | |
import multiprocessing | |
# ltree contrib module | |
# http://www.postgresql.org/docs/current/static/ltree.html | |
SCHEMA = ''' | |
create table advert ( | |
title varchar(250), | |
site integer, | |
category_path ltree, | |
key varchar(250) primary key, | |
created date default now(), | |
price varchar(8), | |
data text | |
) | |
''' | |
TEXT = ''' | |
Lorem ipsum dolor sit amet enim. Etiam ullamcorper. Suspendisse a pellentesque | |
dui, non felis. Maecenas malesuada elit lectus felis, malesuada ultricies. | |
Curabitur et ligula. Ut molestie a, ultricies porta urna. Vestibulum commodo | |
volutpat a, convallis ac, laoreet enim. Phasellus fermentum in, dolor. | |
Pellentesque facilisis. Nulla imperdiet sit amet magna. Vestibulum dapibus, | |
mauris nec malesuada fames ac turpis velit, rhoncus eu, luctus et interdum | |
adipiscing wisi. Aliquam erat ac ipsum. Integer aliquam purus. Quisque lorem | |
tortor fringilla sed, vestibulum id, eleifend justo vel bibendum sapien massa | |
ac turpis faucibus orci luctus non, consectetuer lobortis quis, varius in, | |
purus. Integer ultrices posuere cubilia Curae, Nulla ipsum dolor lacus, | |
suscipit adipiscing. Cum sociis natoque penatibus et ultrices volutpat. Nullam | |
wisi ultricies a, gravida vitae, dapibus risus ante sodales lectus blandit eu, | |
tempor diam pede cursus vitae, ultricies eu, faucibus quis, porttitor eros | |
cursus lectus, pellentesque eget, bibendum a, gravida ullamcorper quam. Nullam | |
viverra consectetuer. Quisque cursus et, porttitor risus. Aliquam sem. In | |
hendrerit nulla quam nunc, accumsan congue. Lorem ipsum primis in nibh vel | |
risus. Sed vel lectus. Ut sagittis, ipsum dolor quam. | |
'''.strip() | |
TEXT_P = TEXT.split() | |
C_PATH = ( | |
'aaaa', | |
'aaaa.bbbbbb' | |
'aaaa.bbbbbb.cccccccc', | |
'aaaa.bbbbbb.cccccccc.dddddddd', | |
'aaaa.bbbbbb.cccccccc.dddddddd.eeeee', | |
'aaaa.bbbbbb.cccccccc.dddddddd.eeeee.ffffff', | |
'xxxxx', | |
'xxxxx.yyyy', | |
'xxxxx.yyyy.zzzzz', | |
'xxxxx.yyyy.zzzzz.wwwwwww', | |
'xxxxx.yyyy.zzzzz.wwwwwww.qqqqqq', | |
) | |
NOW = datetime.datetime.now() | |
def measure_time(fun): | |
def decorator(*args, **kwds): | |
start_time = time.time() | |
try: | |
return fun(*args, **kwds) | |
finally: | |
work_time = time.time() - start_time | |
name = fun.__name__ | |
print 'Work time (%s): %.2fsec' % (name, work_time) | |
return decorator | |
def document_generator(ndocs): | |
rand = os.getpid() * random.randint(1, 1000) | |
for i in xrange(ndocs): | |
key = '%s_%s' % (rand, i) | |
site = random.randint(0, 20) | |
r_number = random.randint(0, 100) | |
title = ' '.join(TEXT_P[r_number:r_number + 15]) | |
data = "{junk: 'some json data here'}" | |
price = str(random.randint(0, 1000)) | |
sys_path = random.choice(C_PATH) | |
dt = NOW - datetime.timedelta(days=r_number) | |
yield (key, site, dt, price, sys_path, title, data) | |
@measure_time | |
def insert_data(doc_number): | |
conn = psycopg2.connect("dbname=test user=piotrek") | |
curr = conn.cursor() | |
curr.executemany(''' | |
INSERT INTO advert (key, site, created, price, category_path, title, data) | |
VALUES (%s, %s, %s, %s, %s, %s, %s) | |
''', document_generator(doc_number)) | |
conn.commit() | |
curr.close() | |
conn.close() | |
def create_db(): | |
conn = psycopg2.connect("dbname=test user=piotrek") | |
curr = conn.cursor() | |
try: | |
curr.execute(SCHEMA) | |
conn.commit() | |
return True | |
except: | |
pass | |
finally: | |
conn.close() | |
return False | |
@measure_time | |
def insert_db(): | |
n_proc = 4 | |
n_docs = 10000 * 56 | |
inserters = [multiprocessing.Process(target=insert_data, args=(n_docs, ) | |
) for x in range(n_proc)] | |
[i.start() for i in inserters] | |
[i.join() for i in inserters] | |
print 'done' | |
@measure_time | |
def count_data(): | |
conn = psycopg2.connect("dbname=test user=piotrek") | |
curr = conn.cursor() | |
curr.execute('SELECT count(key) FROM advert') | |
print 'row number: %s' % curr.fetchone() | |
conn.rollback() | |
curr.close() | |
conn.close() | |
@measure_time | |
def create_indexes(): | |
conn = psycopg2.connect("dbname=test user=piotrek") | |
curr = conn.cursor() | |
curr.execute("CREATE INDEX advert_category_path_gist_idx ON advert USING gist(category_path);") | |
curr.execute("CREATE INDEX advert_site_created_idx ON advert (site, created);") | |
conn.commit() | |
curr.close() | |
conn.close() | |
def get_size(name): | |
conn = psycopg2.connect("dbname=test user=piotrek") | |
curr = conn.cursor() | |
curr.execute("SELECT pg_size_pretty(pg_total_relation_size(%s));", (name,)) | |
result = curr.fetchone() | |
conn.rollback() | |
curr.close() | |
conn.close() | |
return result | |
@measure_time | |
def dummy_query_hits(n_queries): | |
print 'this may take some time...' | |
conn = psycopg2.connect("dbname=test user=piotrek") | |
curr = conn.cursor() | |
for x in range(n_queries): | |
site = random.randint(1, 20) | |
offset = random.randint(1000, 6000) | |
c_path = random.choice(C_PATH) | |
curr.execute(""" | |
select 1 from advert where | |
site = %s and | |
category_path <@ %s | |
order by created desc limit 1 offset %s | |
""", (site, c_path, offset)) | |
curr.fetchall() | |
conn.commit() | |
conn.close() | |
@measure_time | |
def main(): | |
#create_db() | |
#insert_data() | |
#count_data() | |
#create_indexes() | |
print "adverts size: %s" % get_size("advert") | |
print "gist index size: %s" % get_size("advert_category_path_gist_idx") | |
print "site/created index size: %s" % get_size("advert_site_created_idx") | |
dummy_query_hits(60) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment