Skip to content

Instantly share code, notes, and snippets.

@vhbui02
Last active May 18, 2023 03:59
Show Gist options
  • Save vhbui02/e24351ee041609f57d035d6fb84552b5 to your computer and use it in GitHub Desktop.
Save vhbui02/e24351ee041609f57d035d6fb84552b5 to your computer and use it in GitHub Desktop.
[MongoDB Text Search on non-Atlas Self-Managed Deployments] #mongodb

Create a text index, it can include any field whose value is a string or an array of string elements

A collection can only have 1 text search index, but the good news is that text search index can cover multiple fields

Cons

  • does not support fuzzy search
  • does not support autocomplete
  • does not work well with language using diacritic marks

Using find()

db.stores.insertMany([
   { _id: 1, name: 'Java Hut', description: 'Coffee and cakes' },
   { _id: 2, name: 'Burger Buns', description: 'Gourmet hamburgers' },
   { _id: 3, name: 'Coffee Shop', description: 'Just coffee' },
   {
      _id: 4,
      name: 'Clothes Clothes Clothes',
      description: 'Discount clothing',
   },
   { _id: 5, name: 'Java Shopping', description: 'Indonesian goods' },
])
db.stores.createIndex({ name: 'text', description: 'text'})

db.stores.find({
   $text: {
      // search for an EXACT Phrase
      // $search: '"coffee shop"',

      // exclude a term using `-`
     	// using regex
      $search: '^java shop -coffee',
   },
})

// default, search results are unsorted
// use 'how well a document match query' score to sort
db.stores
   .find(
      {
         $text: {
            // $text will tokenize the search string using whitespace and most punctuation as delimiter, and perform a logical OR
            $search: 'java shop coffee',
         },
      },
  // 2nd parameter is projection
      { score: { $meta: 'textScore' } }
   )
   .sort({ score: { $meta: 'textScore'} })

Using aggregate()

// aggregation pipeline
// $match stage  + $text operator (must be the 1st state, only 1 $text operator can exist, no $or or $not logical expression)
// $sort stage + $meta aggreation operator

// since there is only 1 text index, you do not need to specify which field to search, text index already knows. Also to avoid choosing unindexed field.

db.address.createIndex({ building: 'text' })

db.articles.aggregate([
   {
      $match: {
         $text: {
            $search: 'cake',
         },
      },
   },
   {
      $group: {
         _id: null, // no group key => group all results
         totalViews: {
            $sum: '$views',
         },
      },
   },
])

// return results sorted by text score
db.articles.aggregate([
   {
      $match: {
         $text: {
            $search: 'cake tea',
         },
      },
   },
   {
      $sort: {
         score: {
            $meta: 'textScore', // textScore is built-in, it is not a custom defined field
         },
      },
   },
   {
      $project: {
         title: 1,
         _id: 0,
      },
   },
])

// match on Text score
// textScore metadata is available in projection, sort and conditions that SUBSEQUENT a $match stage that include the $text operation

db.articles.aggregate([
   {
      $match: {
         // using text index
         $text: {
            $search: 'cake tea',
         },
         // specify a custom language
         $language: 'en',
         'some field here': 'abcefef',
      },
   },
   {
      $project: {
         title: 1,
         _id: 0,
         score: {
            $meta: 'textScore',
         },
      },
   },
   {
      $match: {
         score: {
            $gt: 1.0,
         },
      },
   },
])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment