Created
November 26, 2023 21:53
-
-
Save GuillaumePressiat/b33d6a3aba0aabd6fd8f9fbfc3efad1f to your computer and use it in GitHub Desktop.
Tantivy test en Rust, slop, phraseQuery, fuzzyTerm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[macro_use] | |
extern crate tantivy; | |
use tantivy::query::QueryParser; | |
use tantivy::schema::*; | |
use tantivy::ReloadPolicy; | |
use tempfile::TempDir; | |
use tantivy::query::FuzzyTermQuery; | |
use tantivy::query::PhraseQuery; | |
use tantivy::query::TermQuery; | |
use tantivy::collector::{Count, TopDocs}; | |
use tantivy::{doc, Index, Term}; | |
fn main() -> tantivy::Result<()> { | |
let index_path = TempDir::new()?; | |
let mut schema_builder = Schema::builder(); | |
schema_builder.add_text_field("title", TEXT | STORED); | |
schema_builder.add_text_field("body", TEXT); | |
let schema = schema_builder.build(); | |
let index = Index::create_in_dir(&index_path, schema.clone())?; | |
let mut index_writer = index.writer(50_000_000)?; | |
let title = schema.get_field("title").unwrap(); | |
let body = schema.get_field("body").unwrap(); | |
// let mut old_man_doc = Document::default(); | |
// old_man_doc.add_text(title, "The Old Man and the Sea"); | |
// old_man_doc.add_text( | |
// body, | |
// "He was an old man who fished alone in a skiff in the Gulf Stream and \ | |
// he had gone eighty-four days now without taking a fish.", | |
// ); | |
// index_writer.add_document(old_man_doc); | |
index_writer.add_document(doc!( | |
title => "Of Mice and Men", | |
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \ | |
bank and runs deep and green. The water is warm too, for it has slipped twinkling \ | |
over the yellow sands in the sunlight before reaching the narrow pool. On one \ | |
side of the river the golden foothill slopes curve up to the strong and rocky \ | |
Gabilan Mountains, but on the valley side the water is lined with trees—willows \ | |
fresh and green with every spring, carrying in their lower leaf junctures the \ | |
debris of the winter’s flooding; and sycamores with mottled, white, recumbent \ | |
limbs and branches that arch over the pool" | |
)); | |
index_writer.add_document(doc!( | |
title => "Of Mice and Men", | |
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \ | |
bank and runs deep and green. The water is warm too, for it has slipped twinkling \ | |
over the yellow sands in the sunlight before reaching the narrow pool. On one \ | |
side of the river the golden foothill slopes curve up to the strong and rocky \ | |
Gabilan Mountains, but on the valley side the water is lined with trees—willows \ | |
fresh and green with every spring, carrying in their lower leaf junctures the \ | |
debris of the winter’s flooding; and sycamores with mottled, white, recumbent \ | |
limbs and branches that arch over the pool" | |
)); | |
// Double titre | |
index_writer.add_document(doc!( | |
title => "Frankenstein", | |
title => "The Modern Prometheus", | |
body => "You will rejoice to hear that no disaster has accompanied the commencement of an \ | |
enterprise which you have regarded with such evil forebodings. I arrived here \ | |
yesterday, and my first task is to assure my dear sister of my welfare and \ | |
increasing confidence in the success of my undertaking." | |
)); | |
index_writer.commit()?; | |
let reader = index | |
.reader_builder() | |
.reload_policy(ReloadPolicy::OnCommit) | |
.try_into()?; | |
// https://stackoverflow.com/questions/38816955/elasticsearch-fuzzy-phrases | |
let query_parser = QueryParser::for_index(&index, vec![title, body]); | |
let reader = index.reader()?; | |
let searcher = reader.searcher(); | |
{ | |
let term = Term::from_field_text(body, "'disaster"); | |
// let query = FuzzyTermQuery::new(term, 2, true); | |
let query = PhraseQuery::new(vec![ | |
Term::from_field_text(body, "no"), | |
Term::from_field_text(body, "disaster")]); | |
let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?; | |
for (_score, doc_address) in top_docs { | |
let retrieved_doc = searcher.doc(doc_address)? ; | |
println!("{}", schema.to_json(&retrieved_doc)); | |
// println!("{}", _score); | |
} | |
} | |
// // let query = query_parser.parse_query("sea whale")?; | |
// let terms = Term::from_field_text(body, "that disaster"); | |
// let query = FuzzyTermQuery::new(terms, 0, false); | |
// // let query = query_parser.parse_query("'that disaster'~2")?; | |
// let top_docs = searcher.search(&query, &TopDocs::with_limit(10))?; | |
// for (_score, doc_address) in top_docs { | |
// let retrieved_doc = searcher.doc(doc_address)?; | |
// println!("{}", schema.to_json(&retrieved_doc)); | |
// } | |
Ok(()) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment