Created
June 14, 2013 15:43
-
-
Save gtke/5782859 to your computer and use it in GitHub Desktop.
Indexing/Searching Demo in Lucene. [still incomplete + using some deprecated methods]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
| import org.apache.lucene.document.Document; | |
| import org.apache.lucene.document.Field; | |
| import org.apache.lucene.document.FieldType; | |
| import org.apache.lucene.document.StringField; | |
| import org.apache.lucene.document.TextField; | |
| import org.apache.lucene.index.DirectoryReader; | |
| import org.apache.lucene.index.IndexReader; | |
| import org.apache.lucene.index.IndexWriter; | |
| import org.apache.lucene.index.IndexWriterConfig; | |
| import org.apache.lucene.queryparser.classic.ParseException; | |
| import org.apache.lucene.queryparser.classic.QueryParser; | |
| import org.apache.lucene.search.IndexSearcher; | |
| import org.apache.lucene.search.Query; | |
| import org.apache.lucene.search.ScoreDoc; | |
| import org.apache.lucene.search.TopDocs; | |
| import org.apache.lucene.search.TopScoreDocCollector; | |
| import org.apache.lucene.store.Directory; | |
| import org.apache.lucene.store.FSDirectory; | |
| import org.apache.lucene.store.RAMDirectory; | |
| import org.apache.lucene.util.Version; | |
| import java.io.File; | |
| import java.io.FileFilter; | |
| import java.io.FileReader; | |
| import java.io.IOException; | |
| import java.util.Scanner; | |
| public class Test { | |
| private static StandardAnalyzer analyzer; | |
| private static IndexWriter writer; | |
| private static IndexSearcher searcher; | |
| private static IndexReader reader; | |
| private static String indexDir = "path"; | |
| private static String dataDir = "path"; | |
| private static QueryParser parser; | |
| private static Query query; | |
| public static void main(String[] args) throws IOException, ParseException, Exception { | |
| analyzer = new StandardAnalyzer(Version.LUCENE_43); | |
| Directory dir = FSDirectory.open(new File(indexDir)); | |
| IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); | |
| writer = new IndexWriter(dir, config); | |
| // 1. Indexing | |
| int numIndexed = 0; | |
| try{ | |
| numIndexed = index(dataDir, new TextFilesFilter()); | |
| }finally{ | |
| System.out.println("# documents indexed: " + numIndexed); | |
| writerClose(); | |
| } | |
| //2. Searching | |
| reader = DirectoryReader.open(dir); | |
| searcher = new IndexSearcher(reader); | |
| Scanner input = new Scanner(System.in); | |
| System.out.print("Search: "); | |
| String s = input.nextLine(); | |
| search(dataDir, s); | |
| } | |
| // Index/Searching methods | |
| public static void writerClose() throws IOException{ | |
| writer.commit(); | |
| writer.close(); | |
| } | |
| public static int index(String dataDir, FileFilter filter)throws Exception{ | |
| File [] files = new File(dataDir).listFiles(); | |
| for(File f : files){ | |
| if(!f.isDirectory()&& | |
| !f.isHidden()&& | |
| f.exists()&& | |
| f.canRead()&& | |
| (filter == null || filter.accept(f))){ | |
| indexFile(f); | |
| } | |
| } | |
| Directory temp = writer.getDirectory(); | |
| System.out.println(temp); | |
| return writer.numDocs(); // return number of documents indexed | |
| } | |
| private static class TextFilesFilter implements FileFilter{ | |
| public boolean accept(File path){ | |
| return path.getName().toLowerCase().endsWith(".txt"); | |
| } | |
| } | |
| @SuppressWarnings("deprecation") | |
| protected static Document getDocument(File f) throws Exception{ | |
| Document doc = new Document(); | |
| doc.add(new Field("contents", new FileReader(f))); | |
| doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
| doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
| return doc; | |
| } | |
| private static void indexFile(File f) throws Exception{ | |
| System.out.println("Indexing " + f.getCanonicalPath()); | |
| Document doc = getDocument(f); | |
| writer.addDocument(doc); | |
| } | |
| public static void search(String indexDir, String s) throws IOException, ParseException{ | |
| Directory dir = FSDirectory.open(new File(indexDir)); | |
| parser = new QueryParser(Version.LUCENE_43,"contents",analyzer); | |
| query = parser.parse(s); | |
| TopDocs hits = searcher.search(query, 10); | |
| System.out.println("Found: " + hits.totalHits); | |
| for(ScoreDoc scoreDoc : hits.scoreDocs){ | |
| Document doc = searcher.doc(scoreDoc.doc); | |
| System.out.println(doc.get("fullpath")); | |
| } | |
| reader.close(); | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment