Created
June 30, 2011 13:27
-
-
Save mattwarren/1056231 to your computer and use it in GitHub Desktop.
Lucene string compare bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Globalization; | |
using System.Threading; | |
using Lucene.Net.Analysis; | |
using Lucene.Net.Documents; | |
using Lucene.Net.Index; | |
using Lucene.Net.QueryParsers; | |
using Lucene.Net.Search; | |
using Lucene.Net.Store; | |
using Version = Lucene.Net.Util.Version; | |
using System.Linq; | |
namespace SimpleLuceneTest | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
var dir = new RAMDirectory(); | |
var analyzer = new KeywordAnalyzer(); | |
var writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); | |
var fields = new[] | |
{ | |
"daab", "bcda", "dacb", "dacb", | |
"aacb", "aaac", "bcbb", "acba", | |
"aaaa", "dada" | |
}; | |
foreach (var field in fields) | |
{ | |
var doc = new Document(); | |
doc.Add(new Field("Name", field, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); | |
writer.AddDocument(doc); | |
} | |
writer.Close(true); | |
IndexSearcher searcher = new IndexSearcher(dir, true); | |
var termEnum = searcher.GetIndexReader().Terms(); | |
Console.WriteLine("Indexed Terms:"); | |
while (termEnum.Next()) | |
{ | |
var term = termEnum.Term(); | |
var freq = termEnum.DocFreq(); | |
Console.WriteLine("\t {0}:{1,-20} (Count = {2})", term.Field(), term.Text(), freq); | |
} | |
Console.WriteLine(); | |
QueryParser queryParser = new QueryParser(Version.LUCENE_29, "", analyzer); | |
var queries = new List<string> { "Name:da*" }; | |
RunQueries(queries, queryParser, searcher); | |
Console.WriteLine("-------------------------"); | |
Console.WriteLine("Changing locale to \"da\""); | |
Console.WriteLine("-------------------------\n"); | |
var culture = CultureInfo.CreateSpecificCulture("da"); | |
Thread.CurrentThread.CurrentCulture = culture; | |
Thread.CurrentThread.CurrentUICulture = culture; | |
RunQueries(queries, queryParser, searcher); | |
} | |
private static void RunQueries(IEnumerable<string> queries, QueryParser queryParser, IndexSearcher searcher) | |
{ | |
foreach (var queryText in queries) | |
{ | |
var query = queryParser.Parse(queryText); | |
Console.WriteLine("Query Used \'{0}\'", query); | |
if (query is PrefixQuery) | |
{ | |
var prefixQuery = query as PrefixQuery; | |
var termEnum = prefixQuery.GetEnum(searcher.GetIndexReader()); | |
do | |
{ | |
Console.WriteLine("\t <" + termEnum.Term() + "> freq=" + termEnum.DocFreq()); | |
} while (termEnum.Next()); | |
} | |
var timer = Stopwatch.StartNew(); | |
var result = searcher.Search(query, 10); | |
timer.Stop(); | |
Console.WriteLine("Query took {0:0.00} ms, there were {1} total hits", | |
timer.ElapsedMilliseconds, result.totalHits); | |
Console.WriteLine(String.Join(", ", result.scoreDocs.Select(x => new { DocId = x.doc }))); | |
Console.WriteLine(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment