Created
February 2, 2012 07:31
-
-
Save yentsun/1722184 to your computer and use it in GitHub Desktop.
An example of indexing records from a database with PyLucene 2.3 for later quering from Zend_Search_Lucene.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os, sys, subprocess | |
import ConfigParser | |
import lucene | |
from optparse import OptionParser | |
from sqlalchemy import MetaData, Table, create_engine, orm | |
INDEX_DIRECTORY = '%s/items' % os.path.dirname(os.path.realpath(__file__)) | |
class Model(object): | |
pass | |
class Article(object): | |
pass | |
class Pic(object): | |
pass | |
class Variant(object): | |
pass | |
def create_db_session(): | |
config = ConfigParser.ConfigParser() | |
parser = OptionParser() | |
parser.add_option("-e", "--env", dest="environment", | |
help="use environment ENV section", metavar="ENV", default='production') | |
(options, args) = parser.parse_args() | |
config_file_path = '%s/../../configs/application.ini' % INDEX_DIRECTORY | |
config.read(config_file_path) | |
section = options.environment | |
db_user = config.get(section, 'resources.db.params.username') | |
db_pass = config.get(section, 'resources.db.params.password') | |
db_host = config.get(section, 'resources.db.params.host') | |
db_name = config.get(section, 'resources.db.params.dbname') | |
conn_string = 'mysql://%s:%s@%s/%s?charset=cp1251' % (db_user.replace('"', ''), | |
db_pass.replace('"', ''), | |
db_host.replace('"', ''), | |
db_name.replace('"', '')) | |
engine = create_engine(conn_string) | |
MySQLSession = orm.sessionmaker(engine) | |
def map_tables(engine): | |
metadata = MetaData() | |
models = Table('tbl_models', metadata, autoload=True, autoload_with=engine) | |
articles = Table('tbl_goods', metadata, autoload=True, autoload_with=engine) | |
pics = Table('tbl_pics', metadata, autoload=True, autoload_with=engine) | |
variant = Table('tbl_variants', metadata, autoload=True, autoload_with=engine) | |
orm.mapper(Model, models) | |
orm.mapper(Article, articles) | |
orm.mapper(Pic, pics) | |
orm.mapper(Variant, variant) | |
map_tables(engine) | |
return MySQLSession() | |
def progressbar(it, prefix = "", size = 30): | |
count = it.count() | |
def _show(_i): | |
x = int(size*_i/count) | |
sys.stdout.write("\r%s[%s%s] %i/%i" % (prefix, "#"*x, "."*(size-x), _i, count)) | |
sys.stdout.flush() | |
_show(0) | |
for i, item in enumerate(it): | |
yield item | |
_show(i+1) | |
sys.stdout.write("\r \r\n") | |
def run_index(session): | |
models = session.query(Model) | |
for model in progressbar(models, 'Проиндексировано: '): | |
id = model.model_id | |
articles = session.\ | |
query(Article).\ | |
filter(Article.good_modelid==id).\ | |
group_by(Article.good_color) | |
articles_ids = [] | |
colors = [] | |
for article in articles: | |
articles_ids.append(article.good_id) | |
colors.append(article.good_color) | |
title = model.model_name | |
image = model.model_pic | |
description = model.model_description | |
price = model.model_min_price; | |
articles_joined = ' '.join(articles_ids) | |
colors_joined = ' '.join(colors) | |
is_new = model.model_new | |
document = lucene.Document() | |
document.add(lucene.Field('id', id, lucene.Field.Store.YES, lucene.Field.Index.UN_TOKENIZED)) | |
document.add(lucene.Field('articles', articles_joined, lucene.Field.Store.NO, lucene.Field.Index.TOKENIZED)) | |
document.add(lucene.Field('colors', colors_joined, lucene.Field.Store.NO, lucene.Field.Index.TOKENIZED)) | |
document.add(lucene.Field('title', title, lucene.Field.Store.YES, lucene.Field.Index.TOKENIZED)) | |
document.add(lucene.Field('image', image, lucene.Field.Store.YES, lucene.Field.Index.NO)) | |
document.add(lucene.Field('description', description, lucene.Field.Store.NO, lucene.Field.Index.TOKENIZED)) | |
document.add(lucene.Field('price', str(price), lucene.Field.Store.YES, lucene.Field.Index.NO)) | |
document.add(lucene.Field('new', str(is_new), lucene.Field.Store.YES, lucene.Field.Index.NO)) | |
index_writer.addDocument(document) | |
index_writer.optimize() | |
index_writer.close() | |
session = create_db_session() | |
lucene.initVM() | |
analyzer = lucene.RussianAnalyzer() | |
subprocess.call(['rm', '-rf', '%s/*' % INDEX_DIRECTORY]) | |
index_writer = lucene.IndexWriter(INDEX_DIRECTORY, analyzer) | |
run_index(session) | |
subprocess.call(['chmod', '-R', '0777', INDEX_DIRECTORY]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Note: Only index created with PyLucene 2.3 is compatible with Zend_Search_Lucene!
Useful links: