Skip to content

Instantly share code, notes, and snippets.

@gtke
Created June 14, 2013 15:43
Show Gist options
  • Select an option

  • Save gtke/5782859 to your computer and use it in GitHub Desktop.

Select an option

Save gtke/5782859 to your computer and use it in GitHub Desktop.
Indexing/Searching Demo in Lucene. [still incomplete + using some deprecated methods]
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.util.Scanner;
public class Test {
private static StandardAnalyzer analyzer;
private static IndexWriter writer;
private static IndexSearcher searcher;
private static IndexReader reader;
private static String indexDir = "path";
private static String dataDir = "path";
private static QueryParser parser;
private static Query query;
public static void main(String[] args) throws IOException, ParseException, Exception {
analyzer = new StandardAnalyzer(Version.LUCENE_43);
Directory dir = FSDirectory.open(new File(indexDir));
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
writer = new IndexWriter(dir, config);
// 1. Indexing
int numIndexed = 0;
try{
numIndexed = index(dataDir, new TextFilesFilter());
}finally{
System.out.println("# documents indexed: " + numIndexed);
writerClose();
}
//2. Searching
reader = DirectoryReader.open(dir);
searcher = new IndexSearcher(reader);
Scanner input = new Scanner(System.in);
System.out.print("Search: ");
String s = input.nextLine();
search(dataDir, s);
}
// Index/Searching methods
public static void writerClose() throws IOException{
writer.commit();
writer.close();
}
public static int index(String dataDir, FileFilter filter)throws Exception{
File [] files = new File(dataDir).listFiles();
for(File f : files){
if(!f.isDirectory()&&
!f.isHidden()&&
f.exists()&&
f.canRead()&&
(filter == null || filter.accept(f))){
indexFile(f);
}
}
Directory temp = writer.getDirectory();
System.out.println(temp);
return writer.numDocs(); // return number of documents indexed
}
private static class TextFilesFilter implements FileFilter{
public boolean accept(File path){
return path.getName().toLowerCase().endsWith(".txt");
}
}
@SuppressWarnings("deprecation")
protected static Document getDocument(File f) throws Exception{
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f)));
doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
return doc;
}
private static void indexFile(File f) throws Exception{
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
public static void search(String indexDir, String s) throws IOException, ParseException{
Directory dir = FSDirectory.open(new File(indexDir));
parser = new QueryParser(Version.LUCENE_43,"contents",analyzer);
query = parser.parse(s);
TopDocs hits = searcher.search(query, 10);
System.out.println("Found: " + hits.totalHits);
for(ScoreDoc scoreDoc : hits.scoreDocs){
Document doc = searcher.doc(scoreDoc.doc);
System.out.println(doc.get("fullpath"));
}
reader.close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment