Created
December 10, 2009 06:25
-
-
Save kanru/253164 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- flint_database.cc 2009-12-08 13:34:24.790284881 +0800 | |
+++ flint_database.cc 2009-12-10 14:22:14.493653956 +0800 | |
@@ -1188,7 +1188,7 @@ | |
termlist.next(); | |
while (!termlist.at_end()) { | |
- string tname = termlist.get_termname(); | |
+ string tname = termlist.get_termname(); | |
position_table.delete_positionlist(did, tname); | |
termcount wdf = termlist.get_wdf(); | |
@@ -1278,20 +1278,50 @@ | |
} | |
if (!modifying || document.internal->terms_modified()) { | |
- // FIXME - in the case where there is overlap between the new | |
- // termlist and the old termlist, it would be better to compare the | |
- // two lists, and make the minimum set of modifications required. | |
- // This would lead to smaller changesets for replication, and | |
- // probably be faster overall. | |
- | |
- // First, add entries to remove the postings in the underlying record. | |
Xapian::Internal::RefCntPtr<const FlintWritableDatabase> ptrtothis(this); | |
FlintTermList termlist(ptrtothis, did); | |
+ Xapian::TermIterator term = document.termlist_begin(); | |
+ Xapian::TermIterator term_end = document.termlist_end(); | |
+ flint_doclen_t new_doclen = termlist.get_doclength(); | |
+ string old_tname, new_tname; | |
+ | |
+ total_length -= new_doclen; | |
+ | |
+ termlist.next(); | |
+ while (true) { | |
+ bool identical = false; | |
+ int cmp; | |
+ if (termlist.at_end() && term == term_end) | |
+ break; | |
+ if (!termlist.at_end() && term != term_end) { | |
+ old_tname = termlist.get_termname(); | |
+ new_tname = *term; | |
+ cmp = old_tname.compare(new_tname); | |
+ | |
+ // Check postlist to see whether they are identical | |
+ if (cmp == 0) { | |
+ int new_count = term.positionlist_count(); | |
+ int old_count = termlist.positionlist_count(); | |
+ if (old_count == new_count) { | |
+ PositionIterator it = term.positionlist_begin(); | |
+ PositionIterator it_end = term.positionlist_end(); | |
+ PositionIterator old = termlist.positionlist_begin(); | |
+ if (equal(it, it_end, old)) | |
+ identical = true; | |
+ } | |
+ } | |
+ } else if (termlist.at_end()) { | |
+ cmp = 2; | |
+ new_tname = *term; | |
+ } else { | |
+ cmp = -2; | |
+ old_tname = termlist.get_termname(); | |
+ } | |
- termlist.next(); | |
- while (!termlist.at_end()) { | |
- string tname = termlist.get_termname(); | |
+ if (cmp < 0) { | |
+ const string& tname = old_tname; | |
termcount wdf = termlist.get_wdf(); | |
+ new_doclen -= wdf; | |
map<string, pair<termcount_diff, termcount_diff> >::iterator i; | |
i = freq_deltas.find(tname); | |
@@ -1318,58 +1348,62 @@ | |
// Modifying a document we added/modified since the last flush. | |
k->second = make_pair('D', 0u); | |
} | |
- | |
- termlist.next(); | |
- } | |
- | |
- total_length -= termlist.get_doclength(); | |
- | |
- flint_doclen_t new_doclen = 0; | |
- Xapian::TermIterator term = document.termlist_begin(); | |
- Xapian::TermIterator term_end = document.termlist_end(); | |
- for ( ; term != term_end; ++term) { | |
- // Calculate the new document length | |
+ } else if (!identical) { | |
+ const string& tname = new_tname; | |
termcount wdf = term.get_wdf(); | |
- new_doclen += wdf; | |
- | |
- string tname = *term; | |
- if (tname.size() > MAX_SAFE_TERM_LENGTH) | |
- throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname); | |
- map<string, pair<termcount_diff, termcount_diff> >::iterator i; | |
- i = freq_deltas.find(tname); | |
- if (i == freq_deltas.end()) { | |
- freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf)))); | |
- } else { | |
- ++i->second.first; | |
- i->second.second += wdf; | |
- } | |
+ new_doclen += wdf; | |
+ | |
+ if (cmp > 0) { | |
+ if (tname.size() > MAX_SAFE_TERM_LENGTH) | |
+ throw Xapian::InvalidArgumentError("Term too long (> "STRINGIZE(MAX_SAFE_TERM_LENGTH)"): " + tname); | |
+ map<string, pair<termcount_diff, termcount_diff> >::iterator i; | |
+ i = freq_deltas.find(tname); | |
+ if (i == freq_deltas.end()) { | |
+ freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf)))); | |
+ } else { | |
+ ++i->second.first; | |
+ i->second.second += wdf; | |
+ } | |
+ | |
+ // Add did to tname's postlist | |
+ map<string, map<docid, pair<char, termcount> > >::iterator j; | |
+ j = mod_plists.find(tname); | |
+ if (j == mod_plists.end()) { | |
+ map<docid, pair<char, termcount> > m; | |
+ j = mod_plists.insert(make_pair(tname, m)).first; | |
+ } | |
+ map<docid, pair<char, termcount> >::iterator k; | |
+ k = j->second.find(did); | |
+ if (k != j->second.end()) { | |
+ Assert(k->second.first == 'D'); | |
+ k->second.first = 'M'; | |
+ k->second.second = wdf; | |
+ } else { | |
+ j->second.insert(make_pair(did, make_pair('A', wdf))); | |
+ } | |
+ } | |
+ | |
+ PositionIterator it = term.positionlist_begin(); | |
+ PositionIterator it_end = term.positionlist_end(); | |
+ if (it != it_end) { | |
+ position_table.set_positionlist( | |
+ did, tname, it, it_end); | |
+ } else { | |
+ position_table.delete_positionlist(did, tname); | |
+ } | |
+ } | |
+ if (termlist.at_end()) | |
+ ++term; | |
+ else if (term == term_end) | |
+ termlist.next(); | |
+ else { | |
+ if (cmp >= 0) | |
+ ++term; | |
+ if (cmp <= 0) | |
+ termlist.next(); | |
+ } | |
+ } | |
- // Add did to tname's postlist | |
- map<string, map<docid, pair<char, termcount> > >::iterator j; | |
- j = mod_plists.find(tname); | |
- if (j == mod_plists.end()) { | |
- map<docid, pair<char, termcount> > m; | |
- j = mod_plists.insert(make_pair(tname, m)).first; | |
- } | |
- map<docid, pair<char, termcount> >::iterator k; | |
- k = j->second.find(did); | |
- if (k != j->second.end()) { | |
- Assert(k->second.first == 'D'); | |
- k->second.first = 'M'; | |
- k->second.second = wdf; | |
- } else { | |
- j->second.insert(make_pair(did, make_pair('A', wdf))); | |
- } | |
- | |
- PositionIterator it = term.positionlist_begin(); | |
- PositionIterator it_end = term.positionlist_end(); | |
- if (it != it_end) { | |
- position_table.set_positionlist( | |
- did, tname, it, it_end); | |
- } else { | |
- position_table.delete_positionlist(did, tname); | |
- } | |
- } | |
LOGLINE(DB, "Calculated doclen for replacement document " << did << " as " << new_doclen); | |
// Set the termlist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*Before* | |
13:27:55 kanru@anar:~/abs/xapian-core/src/xapian-build | |
% time notmuch tag +test id:[email protected] | |
MOD_PLISTS: 368 | |
notmuch tag +test id:[email protected] 0.05s user 0.03s system 11% cpu 0.673 total | |
13:28:11 kanru@anar:~/abs/xapian-core/src/xapian-build | |
% time notmuch tag -test id:[email protected] | |
MOD_PLISTS: 368 | |
notmuch tag -test id:[email protected] 0.06s user 0.01s system 10% cpu 0.681 total | |
*After* | |
13:30:40 kanru@anar:~/abs/xapian-core/src/xapian-build | |
% time notmuch tag +test id:[email protected] | |
MOD_PLIST: 1 | |
notmuch tag +test id:[email protected] 0.01s user 0.02s system 6% cpu 0.436 total | |
13:31:23 kanru@anar:~/abs/xapian-core/src/xapian-build | |
% time notmuch tag -test id:[email protected] | |
MOD_PLIST: 1 | |
notmuch tag -test id:[email protected] 0.01s user 0.01s system 5% cpu 0.383 total |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14:28:28 kanru@anar:~ | |
% time notmuch tag +test tag:notmuch | |
notmuch tag +test tag:notmuch 1.71s user 0.03s system 65% cpu 2.632 total | |
14:30:34 kanru@anar:~ | |
% time notmuch tag -test tag:notmuch | |
notmuch tag -test tag:notmuch 1.61s user 0.02s system 73% cpu 2.204 total | |
14:30:47 kanru@anar:~ | |
% notmuch count tag:notmuch | |
682 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment