inexorabletash · August 3, 2025 17:52
diff --git a/@ IndexedDB Full Text Search (Proof of Concept).md b/@ IndexedDB Full Text Search (Proof of Concept).md
diff --git a/example.html b/example.html
 <!DOCTYPE html>
 <script src="porter-stemmer.js"></script>
 <script src="segment.js"></script>
 <script src="fulltext.js"></script>
 <script>
 // Copyright 2019 Google LLC.
 // SPDX-License-Identifier: Apache-2.0

 const doc1 = `You already know all the details, but here’s the official word from Yahoo on its $1.1 billion Tumblr deal`;
 const doc2 = `Yahoo! Inc. (NASDAQ: YHOO) and Tumblr announced today that they have reached a definitive agreement for Yahoo! to acquire Tumblr.`;
 const doc3 = `Of all the things 26-year-old David Karp has done in life, creating Tumblr stands as his most profitable venture, thus far.`

 indexedDB.deleteDatabase('db-fulltext');
 const request = indexedDB.open('db-fulltext');
 request.onupgradeneeded = e => {
  const db = request.result;
  const store = db.createObjectStore('documents', {keyPath: 'docid'});
  store.createIndex('fulltext', 'terms', {multiEntry: true});

  store.put({docid: 1, text: doc1, terms: FullText.tokenize(doc1, 'en')});
  store.put({docid: 2, text: doc2, terms: FullText.tokenize(doc2, 'en')});
  store.put({docid: 3, text: doc3, terms: FullText.tokenize(doc3, 'en')});
 };
 request.onsuccess = e => {
  const db = request.result;
  const tx = db.transaction('documents');
  const index = tx.objectStore('documents').index('fulltext');

  [
    'yahoo',
    'tumblr',
    'Karp',
    'yahoo tumblr'
  ].forEach(query => {
    FullText.search(index, query, 'en', ids =>
      console.log('query:', JSON.stringify(query), 'results:', ids));
  });
 };
 </script>
diff --git a/fulltext.js b/fulltext.js
 // Copyright 2019 Google LLC.
 // SPDX-License-Identifier: Apache-2.0

 /*global stemmer*/

 self.FullText = (() => {

  function tokenize(text, locale) {
    const words = new Set();
    const segmenter = Intl.Segmenter(locale, {granularity: 'word'});
    for (let {index, segment, isWordLike} of segmenter.segment(text)) {
      if (isWordLike) {
        let word = segment.toLowerCase();
        word = stemmer(word);
        words.add(word);
      }
    }
    return Array.from(words);
  }

  function search(index, query, locale, callback) {
    const results = [];

    const terms = tokenize(query, locale);
    if (terms.length === 0)
      throw new Error('no words in query');

    // Open a cursor for each term.
    let expect = 0;
    const requests = terms.map(term => index.openKeyCursor(term));
    requests.forEach(request => {
      ++expect;
      request.onsuccess = () => {
        if (--expect === 0)
          barrier();
      };
    });

    function barrier() {
      const cursors = requests.map(r => r.result);

      // If any cursor has reached end-of-range, we're done.
      if (cursors.includes(null)) {
        callback(results);
        return;
      }

      // Order cursors lowest/highest by primary key.
      cursors.sort((a, b) => indexedDB.cmp(a.primaryKey, b.primaryKey));

      // All equal? (lowest == highest)
      if (indexedDB.cmp(cursors[0].primaryKey,
                        cursors[cursors.length - 1].primaryKey) === 0) {
        // Yes - we have a match. Record it and advance all.
        results.push(cursors[0].primaryKey);
        expect = cursors.length;
        cursors.forEach(cursor => cursor.continue());
      } else {
        // No - advance lowest cursor.
        expect = 1;
        cursors[0].continue();
      }
    }
  }

  return {
    tokenize: tokenize,
    search: search
  };

 })();
	<!DOCTYPE html>
	<script src="porter-stemmer.js"></script>
	<script src="segment.js"></script>
	<script src="fulltext.js"></script>
	<script>
	// Copyright 2019 Google LLC.
	// SPDX-License-Identifier: Apache-2.0

	const doc1 = `You already know all the details, but here’s the official word from Yahoo on its $1.1 billion Tumblr deal`;
	const doc2 = `Yahoo! Inc. (NASDAQ: YHOO) and Tumblr announced today that they have reached a definitive agreement for Yahoo! to acquire Tumblr.`;
	const doc3 = `Of all the things 26-year-old David Karp has done in life, creating Tumblr stands as his most profitable venture, thus far.`

	indexedDB.deleteDatabase('db-fulltext');
	const request = indexedDB.open('db-fulltext');
	request.onupgradeneeded = e => {
	const db = request.result;
	const store = db.createObjectStore('documents', {keyPath: 'docid'});
	store.createIndex('fulltext', 'terms', {multiEntry: true});

	store.put({docid: 1, text: doc1, terms: FullText.tokenize(doc1, 'en')});
	store.put({docid: 2, text: doc2, terms: FullText.tokenize(doc2, 'en')});
	store.put({docid: 3, text: doc3, terms: FullText.tokenize(doc3, 'en')});
	};
	request.onsuccess = e => {
	const db = request.result;
	const tx = db.transaction('documents');
	const index = tx.objectStore('documents').index('fulltext');

	[
	'yahoo',
	'tumblr',
	'Karp',
	'yahoo tumblr'
	].forEach(query => {
	FullText.search(index, query, 'en', ids =>
	console.log('query:', JSON.stringify(query), 'results:', ids));
	});
	};
	</script>
	// Copyright 2019 Google LLC.
	// SPDX-License-Identifier: Apache-2.0

	/global stemmer/

	self.FullText = (() => {

	function tokenize(text, locale) {
	const words = new Set();
	const segmenter = Intl.Segmenter(locale, {granularity: 'word'});
	for (let {index, segment, isWordLike} of segmenter.segment(text)) {
	if (isWordLike) {
	let word = segment.toLowerCase();
	word = stemmer(word);
	words.add(word);
	}
	}
	return Array.from(words);
	}

	function search(index, query, locale, callback) {
	const results = [];

	const terms = tokenize(query, locale);
	if (terms.length === 0)
	throw new Error('no words in query');

	// Open a cursor for each term.
	let expect = 0;
	const requests = terms.map(term => index.openKeyCursor(term));
	requests.forEach(request => {
	++expect;
	request.onsuccess = () => {
	if (--expect === 0)
	barrier();
	};
	});

	function barrier() {
	const cursors = requests.map(r => r.result);

	// If any cursor has reached end-of-range, we're done.
	if (cursors.includes(null)) {
	callback(results);
	return;
	}

	// Order cursors lowest/highest by primary key.
	cursors.sort((a, b) => indexedDB.cmp(a.primaryKey, b.primaryKey));

	// All equal? (lowest == highest)
	if (indexedDB.cmp(cursors[0].primaryKey,
	cursors[cursors.length - 1].primaryKey) === 0) {
	// Yes - we have a match. Record it and advance all.
	results.push(cursors[0].primaryKey);
	expect = cursors.length;
	cursors.forEach(cursor => cursor.continue());
	} else {
	// No - advance lowest cursor.
	expect = 1;
	cursors[0].continue();
	}
	}
	}

	return {
	tokenize: tokenize,
	search: search
	};

	})();