Skip to content

Instantly share code, notes, and snippets.

@dustinschultz
Last active August 29, 2015 14:20
Show Gist options
  • Save dustinschultz/6d731fd880c8c0ac226e to your computer and use it in GitHub Desktop.
Save dustinschultz/6d731fd880c8c0ac226e to your computer and use it in GitHub Desktop.
Lucene Fuzzy Query Edit Distance
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class LuceneSearchTest {
public static void main(String[] args) throws IOException, ParseException {
// English Analyzer
Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_48);
// In memory index
final Directory directory = new RAMDirectory();
final IndexWriterConfig config = new IndexWriterConfig(
Version.LUCENE_48, analyzer);
final IndexWriter writer = new IndexWriter(directory, config);
// Document with analyzed field (TextField)
final Document document1 = new Document();
document1.add(new TextField("representation", "SALICYLIC", Store.YES));
// Add Document
writer.addDocument(document1);
writer.commit();
writer.close();
//
final IndexReader indexReader = DirectoryReader.open(directory);
final QueryParser qp = new AnalyzingQueryParser(Version.LUCENE_48,
"representation", analyzer);
final Query query = qp.parse("salisyic~");
System.out.println("Query type: " + query.getClass().getSimpleName());
System.out.println("Parsed query: " + query);
final IndexSearcher searcher = new IndexSearcher(indexReader);
final TopDocs docs = searcher.search(query, 100);
System.out.println("Found " + docs.scoreDocs.length + " results");
System.out.println("---");
tokenize(analyzer, "SALICYLIC");
System.out.println("---");
tokenize(analyzer, "SALISYIC");
}
private static void tokenize(Analyzer analyzer, String term) throws IOException,
ParseException {
System.out.println("Analyzer: " + analyzer.getClass());
System.out.println("Term: " + term);
TokenStream stream = analyzer.tokenStream("representation",
new StringReader(term));
CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
System.out.println(cattr.toString());
}
stream.end();
stream.close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment