Created
April 10, 2011 21:21
-
-
Save mjbommar/912734 to your computer and use it in GitHub Desktop.
Search a Lucene index of the U.S. Code built from XHTML.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @author Michael J Bommarito II | |
* @date Apr 9, 2011 | |
* @license MIT, (C) Michael J Bommarito II 2011 | |
*/ | |
package org.mjb; | |
// Java standard library imports | |
import java.io.*; | |
// Lucene imports | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.index.*; | |
import org.apache.lucene.search.*; | |
import org.apache.lucene.store.*; | |
public class searchCodeIndex { | |
// Lucene index searcher | |
private static Searcher indexSearcher; | |
public static void main(String[] args) { | |
// Check if the proper command line was passed. | |
if (args.length != 2) { | |
System.err.println("Usage: searchCodeIndex <field> <term>"); | |
System.exit(-1); | |
} | |
// Load the index | |
loadIndex("index/"); | |
// Setup the query | |
Term term = new Term(args[0], args[1]); | |
Query termQuery = new TermQuery(term); | |
try { | |
// Execute the search and iterate over the top documents. | |
TopDocs topDocs = indexSearcher.search(termQuery, 5); | |
ScoreDoc[] scoreDosArray = topDocs.scoreDocs; | |
for (ScoreDoc scoredoc : scoreDosArray) { | |
// Retrieve the matched document and show relevant details | |
Document doc = indexSearcher.doc(scoredoc.doc); | |
// Output basic document information | |
String documentID = doc.getField("documentid").stringValue(); | |
System.out.println("documentid:" | |
+ documentID.replace("_", " U.S.C. ")); | |
System.out.println("currentthrough:" | |
+ doc.getField("currentthrough").stringValue()); | |
System.out.println("score:" + scoredoc.score); | |
// Output the path information for the document | |
String[] paths = doc.getField("itempath").stringValue() | |
.split("/"); | |
String tabBuffer = ""; | |
System.out.println("itempath:"); | |
System.out.println("Title " | |
+ Integer.valueOf(documentID.split("_")[0])); | |
for (int i = 2; i < paths.length - 1; i++) { | |
System.out.println(tabBuffer + paths[i]); | |
tabBuffer += ">"; | |
} | |
System.out.println(tabBuffer | |
+ doc.getField("head").stringValue()); | |
System.out.println(); | |
} | |
} catch (Exception E) { | |
E.printStackTrace(); | |
} | |
} | |
private static void loadIndex(String indexPath) { | |
// Load the index searcher | |
try { | |
Directory indexDir = FSDirectory.open(new File(indexPath)); | |
indexSearcher = new IndexSearcher(indexDir); | |
} catch (Exception E) { | |
E.printStackTrace(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment