jorgehatccrma · April 12, 2019 09:33
diff --git a/demoLuceneJython.py b/demoLuceneJython.py
 """
 This simple Jython script shows how to use Apache Lucene
 directly in a Jython script
 """

 # your usual Python imports
 import sys
 from contextlib import contextmanager

 # add jars to classpath
 jars = [
    "lucene-7.1.0/core/lucene-core-7.1.0.jar",
    "lucene-7.1.0/queryparser/lucene-queryparser-7.1.0.jar",
 ]
 for jar in jars:
    sys.path.append(jar)

 # Now that jars are in the path, we can import java code as if it
 # was regular Python!
 from org.apache.lucene.analysis.standard import StandardAnalyzer
 from org.apache.lucene.document import Document
 from org.apache.lucene.document import Field
 from org.apache.lucene.document import StringField
 from org.apache.lucene.document import TextField
 from org.apache.lucene.index import DirectoryReader
 from org.apache.lucene.index import IndexWriter
 from org.apache.lucene.index import IndexWriterConfig
 from org.apache.lucene.queryparser.classic import QueryParser
 from org.apache.lucene.search import IndexSearcher
 from org.apache.lucene.store import RAMDirectory


 @contextmanager
 def closing(thing):
    """
    Simple wrapper to make Lucene's classes appear more pythonic.
    """
    try:
        yield thing
    finally:
        thing.close()


 def make_index(analyzer):
    """ Create an inverted index to power the search. """
    
    def add_doc(w, title, isbn):
        """ Utility to add "documents" to the index. """
        doc = Document()
        doc.add(TextField("title", title, Field.Store.YES))
        # use a string field for isbn because we don't
        # want it tokenized
        doc.add(StringField("isbn", isbn, Field.Store.YES))
        w.addDocument(doc)
    
    # create the index
    index = RAMDirectory()
    
    config = IndexWriterConfig(analyzer)
    with closing(IndexWriter(index, config)) as w:
        add_doc(w, "Lucene in Action", "193398817")
        add_doc(w, "Lucene for Dummies", "55320055Z")
        add_doc(w, "Managing Gigabytes", "55063554A")
        add_doc(w, "The Art of Computer Science", "9900333X")
    
    return index


 def query(querystr, index, analyzer):
    """ Search for the `querystr` in the index. """
    
    # the "title" arg specifies the default field to use
    # when no field is explicitly specified in the query.
    q = QueryParser("title", analyzer).parse(querystr)
    
    # search
    hitsPerPage = 10
    with closing(DirectoryReader.open(index)) as reader:
        searcher = IndexSearcher(reader)
        docs = searcher.search(q, hitsPerPage)
        hits = docs.scoreDocs
        # display results (needs reader to be open)
        print("Found {:d} hits.".format(len(hits)))
        for i, hit in enumerate(hits):
            docId = hit.doc
            d = searcher.doc(docId)
            print("{:d}. {}\t{}".format(i + 1, d.get("isbn"), d.get("title")))


 if __name__ == "__main__":
    
    # Specify the analyzer for tokenizing text.
    # The same analyzer should be used for indexing and searching
    analyzer = StandardAnalyzer()
    
    # create the index to search
    index = make_index(analyzer)

    # perform a search
    querystr = sys.argv[1] if len(sys.argv) > 1 else "lucene"
    query(querystr, index, analyzer)
	"""
	This simple Jython script shows how to use Apache Lucene
	directly in a Jython script
	"""

	# your usual Python imports
	import sys
	from contextlib import contextmanager

	# add jars to classpath
	jars = [
	"lucene-7.1.0/core/lucene-core-7.1.0.jar",
	"lucene-7.1.0/queryparser/lucene-queryparser-7.1.0.jar",
	]
	for jar in jars:
	sys.path.append(jar)

	# Now that jars are in the path, we can import java code as if it
	# was regular Python!
	from org.apache.lucene.analysis.standard import StandardAnalyzer
	from org.apache.lucene.document import Document
	from org.apache.lucene.document import Field
	from org.apache.lucene.document import StringField
	from org.apache.lucene.document import TextField
	from org.apache.lucene.index import DirectoryReader
	from org.apache.lucene.index import IndexWriter
	from org.apache.lucene.index import IndexWriterConfig
	from org.apache.lucene.queryparser.classic import QueryParser
	from org.apache.lucene.search import IndexSearcher
	from org.apache.lucene.store import RAMDirectory


	@contextmanager
	def closing(thing):
	"""
	Simple wrapper to make Lucene's classes appear more pythonic.
	"""
	try:
	yield thing
	finally:
	thing.close()


	def make_index(analyzer):
	""" Create an inverted index to power the search. """

	def add_doc(w, title, isbn):
	""" Utility to add "documents" to the index. """
	doc = Document()
	doc.add(TextField("title", title, Field.Store.YES))
	# use a string field for isbn because we don't
	# want it tokenized
	doc.add(StringField("isbn", isbn, Field.Store.YES))
	w.addDocument(doc)

	# create the index
	index = RAMDirectory()

	config = IndexWriterConfig(analyzer)
	with closing(IndexWriter(index, config)) as w:
	add_doc(w, "Lucene in Action", "193398817")
	add_doc(w, "Lucene for Dummies", "55320055Z")
	add_doc(w, "Managing Gigabytes", "55063554A")
	add_doc(w, "The Art of Computer Science", "9900333X")

	return index


	def query(querystr, index, analyzer):
	""" Search for the `querystr` in the index. """

	# the "title" arg specifies the default field to use
	# when no field is explicitly specified in the query.
	q = QueryParser("title", analyzer).parse(querystr)

	# search
	hitsPerPage = 10
	with closing(DirectoryReader.open(index)) as reader:
	searcher = IndexSearcher(reader)
	docs = searcher.search(q, hitsPerPage)
	hits = docs.scoreDocs
	# display results (needs reader to be open)
	print("Found {:d} hits.".format(len(hits)))
	for i, hit in enumerate(hits):
	docId = hit.doc
	d = searcher.doc(docId)
	print("{:d}. {}\t{}".format(i + 1, d.get("isbn"), d.get("title")))


	if __name__ == "__main__":

	# Specify the analyzer for tokenizing text.
	# The same analyzer should be used for indexing and searching
	analyzer = StandardAnalyzer()

	# create the index to search
	index = make_index(analyzer)

	# perform a search
	querystr = sys.argv[1] if len(sys.argv) > 1 else "lucene"
	query(querystr, index, analyzer)