Last active
November 10, 2017 18:59
-
-
Save rflechner/ff8f9d63fdb789038a891cb21c2eb592 to your computer and use it in GitHub Desktop.
Exploring Lucene in Fsharp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I @"../../packages/Newtonsoft.Json/lib/net45/" | |
#I @"../../packages/Lucene.Net/lib/NET40/" | |
#r "Lucene.Net.dll" | |
open System | |
open System.IO | |
open System.Globalization | |
open Lucene | |
open Lucene.Net | |
open Lucene.Net.Analysis.Standard | |
open Lucene.Net.Documents | |
open Lucene.Net.Index | |
open Lucene.Net.QueryParsers | |
open Lucene.Net.Search | |
open Lucene.Net.Store | |
open Lucene.Net.Util | |
let (/>) p1 p2 = Path.Combine(p1, p2) | |
let indexPath = __SOURCE_DIRECTORY__ /> "lucene_explore" | |
if indexPath |> Directory.Exists |> not | |
then Directory.CreateDirectory indexPath |> ignore | |
let generateFiles () = | |
let dir = new SimpleFSDirectory(new DirectoryInfo(indexPath)) | |
let version = Lucene.Net.Util.Version.LUCENE_30 | |
let writer = new IndexWriter(dir, new StandardAnalyzer(version), IndexWriter.MaxFieldLength.UNLIMITED) | |
for i in 0 .. 100 do | |
let doc = new Document() | |
doc.Add(new Field("Id", (i.ToString()), Field.Store.YES, Field.Index.ANALYZED)) | |
doc.Add(new Field("Value", (sprintf "super text %d" i), Field.Store.YES, Field.Index.ANALYZED)) | |
writer.AddDocument(doc) | |
writer.Optimize() | |
writer.Commit() | |
writer.Dispose() | |
let maxCount = 10000 | |
let dir = new SimpleFSDirectory(new DirectoryInfo(indexPath)) | |
let version = Lucene.Net.Util.Version.LUCENE_30 | |
seq { | |
use reader = IndexReader.Open(dir, true) | |
let searcher = new IndexSearcher(reader) | |
use analyzer = new StandardAnalyzer(version) | |
let queryParser = new QueryParser(version, "Value", analyzer) | |
queryParser.AllowLeadingWildcard <- true | |
//let query = queryParser.Parse "<TERM>\"super text\"<TERM>" | |
let tq = new TermQuery(new Term("Value","super")) | |
//let tq = new TermQuery(new Term("Value","super text 5")) // I want that !!! | |
//let q1 = queryParser.Parse "<TERM>\"super\"<TERM>" | |
let query = new BooleanQuery() | |
//query.Add(new BooleanClause(q1, Occur.MUST)) | |
query.Add(new BooleanClause(tq, Occur.MUST)) | |
let hits = searcher.Search(query, maxCount) | |
let length = hits.ScoreDocs.Length | |
for i in 0 .. length-1 do | |
let hit = hits.ScoreDocs.[i] | |
let doc = searcher.Doc hit.Doc | |
let idField = doc.GetField "Id" | |
let vField = doc.GetField "Value" | |
yield (Int32.Parse idField.StringValue), vField.StringValue | |
} |> Seq.toList | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment