Created
May 16, 2019 00:28
-
-
Save Kronuz/91be4c94b538eabb7f09ee4c0defb363 to your computer and use it in GitHub Desktop.
Xapian core Document::Internal buffered terms map
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/xapian/backends/documentinternal.cc b/src/xapian/backends/documentinternal.cc | |
index 7c87292fc..a20fda9a4 100644 | |
--- a/src/xapian/backends/documentinternal.cc | |
+++ b/src/xapian/backends/documentinternal.cc | |
@@ -38,13 +38,16 @@ namespace Xapian { | |
void | |
Document::Internal::ensure_terms_fetched() const | |
{ | |
- if (terms) | |
+ if (terms || terms_buffer) | |
return; | |
+ if (!database.get()) { | |
+ terms_buffer.reset(new std::deque<terms_buffer_value_type>); | |
+ return; | |
+ } | |
+ | |
terms.reset(new terms_type()); | |
termlist_size = 0; | |
- if (!database.get()) | |
- return; | |
unique_ptr<TermList> t(database->open_term_list(did)); | |
while (t->next(), !t->at_end()) { | |
@@ -72,6 +75,38 @@ Document::Internal::ensure_values_fetched() const | |
} | |
} | |
+void | |
+Document::Internal::apply_terms_buffer() const | |
+{ | |
+ if (!terms_buffer) | |
+ return; | |
+ | |
+ terms.reset(new terms_type()); | |
+ termlist_size = 0; | |
+ | |
+ std::vector<terms_buffer_value_type> v(terms_buffer->begin(), terms_buffer->end()); | |
+ std::sort(v.begin(), v.end(), [](const auto& a, const auto& b) { | |
+ return a.term < b.term; | |
+ }); | |
+ terms_buffer.reset(); | |
+ | |
+ for (const auto& t : v) { | |
+ auto i = terms->find(t.term); | |
+ if (i == terms->end()) { | |
+ ++termlist_size; | |
+ terms->emplace_hint(terms->end(), | |
+ t.term, | |
+ t.has_term_pos ? TermInfo(t.wdf_inc, t.term_pos) : TermInfo(t.wdf_inc)); | |
+ } else if (t.has_term_pos) { | |
+ if (i->second.add_position(t.wdf_inc, t.term_pos)) | |
+ ++termlist_size; | |
+ } else { | |
+ if (i->second.increase_wdf(t.wdf_inc)) | |
+ ++termlist_size; | |
+ } | |
+ } | |
+} | |
+ | |
string | |
Document::Internal::fetch_data() const | |
{ | |
@@ -105,6 +140,8 @@ Document::Internal::set_database(const Database& db) const | |
TermList* | |
Document::Internal::open_term_list() const | |
{ | |
+ apply_terms_buffer(); | |
+ | |
if (terms) | |
return new DocumentTermList(this); | |
@@ -139,6 +176,8 @@ Document::Internal::get_description() const | |
description_append(desc, *data); | |
} | |
+ apply_terms_buffer(); | |
+ | |
if (terms) { | |
desc += ", terms["; | |
desc += str(terms->size()); | |
diff --git a/src/xapian/backends/documentinternal.h b/src/xapian/backends/documentinternal.h | |
index a1aaab178..3ba3fa636 100644 | |
--- a/src/xapian/backends/documentinternal.h | |
+++ b/src/xapian/backends/documentinternal.h | |
@@ -32,6 +32,7 @@ | |
#include "xapian/backends/databaseinternal.h" | |
#include "xapian/common/overflow.h" | |
+#include <deque> | |
#include <map> | |
#include <memory> | |
#include <string> | |
@@ -76,6 +77,14 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
using terms_type = std::map<std::string, TermInfo, std::less<std::string>, allocators::memory_pool_allocator<std::pair<const std::string, TermInfo>>>; | |
mutable std::unique_ptr<terms_type> terms; | |
+ struct terms_buffer_value_type { | |
+ std::string term; | |
+ Xapian::termcount wdf_inc; | |
+ bool has_term_pos; | |
+ Xapian::termpos term_pos; | |
+ }; | |
+ mutable std::unique_ptr<std::deque<terms_buffer_value_type>> terms_buffer; | |
+ | |
/** The number of distinct terms in @a terms. | |
* | |
* Only valid when terms is non-NULL. | |
@@ -110,6 +119,8 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
*/ | |
void ensure_values_fetched() const; | |
+ void apply_terms_buffer() const; | |
+ | |
protected: | |
/** Document value slots and their contents. | |
* | |
@@ -198,7 +209,7 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
* compared to the version read, otherwise it means modifications | |
* compared to an empty database. | |
*/ | |
- bool terms_modified() const { return terms != NULL; } | |
+ bool terms_modified() const { return terms != NULL || terms_buffer != NULL; } | |
/** Return true if the document's values might have been modified. | |
* | |
@@ -251,6 +262,11 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
void add_term(const std::string& term, Xapian::termcount wdf_inc) { | |
ensure_terms_fetched(); | |
+ if (terms_buffer) { | |
+ terms_buffer->push_back({term, wdf_inc, false, 0}); | |
+ return; | |
+ } | |
+ | |
auto i = terms->find(term); | |
if (i == terms->end()) { | |
++termlist_size; | |
@@ -265,6 +281,8 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
bool remove_term(const std::string& term) { | |
ensure_terms_fetched(); | |
+ apply_terms_buffer(); | |
+ | |
auto i = terms->find(term); | |
if (i == terms->end()) { | |
return false; | |
@@ -286,6 +304,11 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
ensure_terms_fetched(); | |
positions_modified_ = true; | |
+ if (terms_buffer) { | |
+ terms_buffer->push_back({term, wdf_inc, true, term_pos}); | |
+ return; | |
+ } | |
+ | |
auto i = terms->find(term); | |
if (i == terms->end()) { | |
++termlist_size; | |
@@ -305,6 +328,8 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
Xapian::termcount wdf_dec) { | |
ensure_terms_fetched(); | |
+ apply_terms_buffer(); | |
+ | |
auto i = terms->find(term); | |
if (i == terms->end() || i->second.is_deleted()) { | |
return remove_posting_result::NO_TERM; | |
@@ -330,6 +355,8 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
Xapian::termpos& n_removed) { | |
ensure_terms_fetched(); | |
+ apply_terms_buffer(); | |
+ | |
auto i = terms->find(term); | |
if (i == terms->end() || i->second.is_deleted()) { | |
return remove_posting_result::NO_TERM; | |
@@ -352,6 +379,9 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
/// Clear all terms from the document. | |
void clear_terms() { | |
+ if (terms_buffer) { | |
+ terms_buffer.reset(); | |
+ } | |
if (!terms) { | |
if (database.get()) { | |
terms.reset(new terms_type()); | |
@@ -370,6 +400,8 @@ class Document::Internal : public Xapian::Internal::intrusive_base { | |
/// Return the number of distinct terms in this document. | |
Xapian::termcount termlist_count() const { | |
+ apply_terms_buffer(); | |
+ | |
if (terms) | |
return termlist_size; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment