Created
July 4, 2011 09:08
-
-
Save mattwarren/1063112 to your computer and use it in GitHub Desktop.
Wildcards with keyword analyser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
var dir = new RAMDirectory(); | |
var analyzer = new KeywordAnalyzer(); | |
var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); | |
var fields = new[] | |
{ | |
"MRS. SHABA", "MRS. SHABA", "MRS. SMITH", | |
"HUGO SHABA", "MENTOS SHABA", "MENTOS SHABAX" | |
}; | |
foreach (var field in fields) | |
{ | |
var doc = new Document(); | |
doc.Add(new Field("Name", field, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); | |
writer.AddDocument(doc); | |
} | |
writer.Close(true); | |
var searcher = new IndexSearcher(dir, true); | |
var termEnum = searcher.GetIndexReader().Terms(); | |
Console.WriteLine("Indexed Terms:"); | |
while (termEnum.Next()) | |
{ | |
var term = termEnum.Term(); | |
var freq = termEnum.DocFreq(); | |
Console.WriteLine("\t {0}:{1,-20} (Count = {2})", term.Field(), term.Text(), freq); | |
} | |
Console.WriteLine(); | |
var queryParser = new QueryParser(Version.LUCENE_29, "", analyzer); | |
queryParser.SetLowercaseExpandedTerms(false); | |
//have to escape ' ' <SPACE> characters | |
var queries = new[] {"Name:MRS.*", "Name:MRS.\\ S*", "Name:MENTOS*"}; | |
foreach (var queryText in queries) | |
{ | |
var query = queryParser.Parse(queryText); | |
Console.WriteLine("Query Used \'{0}\' ({1})", query, query.GetType()); | |
if (query is PrefixQuery) | |
{ | |
var prefixQuery = query as PrefixQuery; | |
var prefix = prefixQuery.GetPrefix(); | |
Console.WriteLine("\tPrefix Query - {0} \"{1}\"", prefix.Field(), prefix.Text()); | |
} | |
var timer = Stopwatch.StartNew(); | |
var result = searcher.Search(query, 10); | |
timer.Stop(); | |
Console.WriteLine("Query took {0:0.00} ms, there were {1} total hits", | |
timer.ElapsedMilliseconds, result.totalHits); | |
Console.WriteLine(String.Join(", ", result.scoreDocs.Select(x => new {DocId = x.doc, Score = x.score}))); | |
Console.WriteLine(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment