Last active
August 29, 2015 14:20
-
-
Save dustinschultz/6d731fd880c8c0ac226e to your computer and use it in GitHub Desktop.
Lucene Fuzzy Query Edit Distance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.io.StringReader; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.TokenStream; | |
import org.apache.lucene.analysis.en.EnglishAnalyzer; | |
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.document.TextField; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.queryparser.classic.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
public class LuceneSearchTest { | |
public static void main(String[] args) throws IOException, ParseException { | |
// English Analyzer | |
Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_48); | |
// In memory index | |
final Directory directory = new RAMDirectory(); | |
final IndexWriterConfig config = new IndexWriterConfig( | |
Version.LUCENE_48, analyzer); | |
final IndexWriter writer = new IndexWriter(directory, config); | |
// Document with analyzed field (TextField) | |
final Document document1 = new Document(); | |
document1.add(new TextField("representation", "SALICYLIC", Store.YES)); | |
// Add Document | |
writer.addDocument(document1); | |
writer.commit(); | |
writer.close(); | |
// | |
final IndexReader indexReader = DirectoryReader.open(directory); | |
final QueryParser qp = new AnalyzingQueryParser(Version.LUCENE_48, | |
"representation", analyzer); | |
final Query query = qp.parse("salisyic~"); | |
System.out.println("Query type: " + query.getClass().getSimpleName()); | |
System.out.println("Parsed query: " + query); | |
final IndexSearcher searcher = new IndexSearcher(indexReader); | |
final TopDocs docs = searcher.search(query, 100); | |
System.out.println("Found " + docs.scoreDocs.length + " results"); | |
System.out.println("---"); | |
tokenize(analyzer, "SALICYLIC"); | |
System.out.println("---"); | |
tokenize(analyzer, "SALISYIC"); | |
} | |
private static void tokenize(Analyzer analyzer, String term) throws IOException, | |
ParseException { | |
System.out.println("Analyzer: " + analyzer.getClass()); | |
System.out.println("Term: " + term); | |
TokenStream stream = analyzer.tokenStream("representation", | |
new StringReader(term)); | |
CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); | |
stream.reset(); | |
while (stream.incrementToken()) { | |
System.out.println(cattr.toString()); | |
} | |
stream.end(); | |
stream.close(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment