Skip to content

Instantly share code, notes, and snippets.

@yentsun
Created February 2, 2012 07:31
Show Gist options
  • Save yentsun/1722184 to your computer and use it in GitHub Desktop.
Save yentsun/1722184 to your computer and use it in GitHub Desktop.
An example of indexing records from a database with PyLucene 2.3 for later quering from Zend_Search_Lucene.
# -*- coding: utf-8 -*-
import os, sys, subprocess
import ConfigParser
import lucene
from optparse import OptionParser
from sqlalchemy import MetaData, Table, create_engine, orm
INDEX_DIRECTORY = '%s/items' % os.path.dirname(os.path.realpath(__file__))
class Model(object):
pass
class Article(object):
pass
class Pic(object):
pass
class Variant(object):
pass
def create_db_session():
config = ConfigParser.ConfigParser()
parser = OptionParser()
parser.add_option("-e", "--env", dest="environment",
help="use environment ENV section", metavar="ENV", default='production')
(options, args) = parser.parse_args()
config_file_path = '%s/../../configs/application.ini' % INDEX_DIRECTORY
config.read(config_file_path)
section = options.environment
db_user = config.get(section, 'resources.db.params.username')
db_pass = config.get(section, 'resources.db.params.password')
db_host = config.get(section, 'resources.db.params.host')
db_name = config.get(section, 'resources.db.params.dbname')
conn_string = 'mysql://%s:%s@%s/%s?charset=cp1251' % (db_user.replace('"', ''),
db_pass.replace('"', ''),
db_host.replace('"', ''),
db_name.replace('"', ''))
engine = create_engine(conn_string)
MySQLSession = orm.sessionmaker(engine)
def map_tables(engine):
metadata = MetaData()
models = Table('tbl_models', metadata, autoload=True, autoload_with=engine)
articles = Table('tbl_goods', metadata, autoload=True, autoload_with=engine)
pics = Table('tbl_pics', metadata, autoload=True, autoload_with=engine)
variant = Table('tbl_variants', metadata, autoload=True, autoload_with=engine)
orm.mapper(Model, models)
orm.mapper(Article, articles)
orm.mapper(Pic, pics)
orm.mapper(Variant, variant)
map_tables(engine)
return MySQLSession()
def progressbar(it, prefix = "", size = 30):
count = it.count()
def _show(_i):
x = int(size*_i/count)
sys.stdout.write("\r%s[%s%s] %i/%i" % (prefix, "#"*x, "."*(size-x), _i, count))
sys.stdout.flush()
_show(0)
for i, item in enumerate(it):
yield item
_show(i+1)
sys.stdout.write("\r \r\n")
def run_index(session):
models = session.query(Model)
for model in progressbar(models, 'Проиндексировано: '):
id = model.model_id
articles = session.\
query(Article).\
filter(Article.good_modelid==id).\
group_by(Article.good_color)
articles_ids = []
colors = []
for article in articles:
articles_ids.append(article.good_id)
colors.append(article.good_color)
title = model.model_name
image = model.model_pic
description = model.model_description
price = model.model_min_price;
articles_joined = ' '.join(articles_ids)
colors_joined = ' '.join(colors)
is_new = model.model_new
document = lucene.Document()
document.add(lucene.Field('id', id, lucene.Field.Store.YES, lucene.Field.Index.UN_TOKENIZED))
document.add(lucene.Field('articles', articles_joined, lucene.Field.Store.NO, lucene.Field.Index.TOKENIZED))
document.add(lucene.Field('colors', colors_joined, lucene.Field.Store.NO, lucene.Field.Index.TOKENIZED))
document.add(lucene.Field('title', title, lucene.Field.Store.YES, lucene.Field.Index.TOKENIZED))
document.add(lucene.Field('image', image, lucene.Field.Store.YES, lucene.Field.Index.NO))
document.add(lucene.Field('description', description, lucene.Field.Store.NO, lucene.Field.Index.TOKENIZED))
document.add(lucene.Field('price', str(price), lucene.Field.Store.YES, lucene.Field.Index.NO))
document.add(lucene.Field('new', str(is_new), lucene.Field.Store.YES, lucene.Field.Index.NO))
index_writer.addDocument(document)
index_writer.optimize()
index_writer.close()
session = create_db_session()
lucene.initVM()
analyzer = lucene.RussianAnalyzer()
subprocess.call(['rm', '-rf', '%s/*' % INDEX_DIRECTORY])
index_writer = lucene.IndexWriter(INDEX_DIRECTORY, analyzer)
run_index(session)
subprocess.call(['chmod', '-R', '0777', INDEX_DIRECTORY])
@yentsun
Copy link
Author

yentsun commented Feb 2, 2012

Note: Only index created with PyLucene 2.3 is compatible with Zend_Search_Lucene!

Useful links:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment