erikhatcher · March 23, 2026 11:35
diff --git a/common.js b/common.js
 const NUM_DOCS = 20000000;
 const NUM_REPS = 50;

 use('demo'); // TODO
 collection = db.demo; // TODO

 function get_pipeline_stats(pipeline) {
  start = performance.now();
  cursor = db.demo.aggregate(pipeline);
  end = performance.now();
  agg_time = end - start;

  start = performance.now();
  docs = cursor.toArray();
  end = performance.now();
  array_time = end - start;

  // Explain output is generated outside the scope of the timings
  explain = db.demo.explain("executionStats").aggregate(pipeline);

  return { 
    pipeline: pipeline, 
    docs: docs, 
    agg_time: agg_time, 
    array_time: array_time, 
    total_time: agg_time + array_time,
    explain: explain
  };
 }

 function collect_stats(pipeline) {
  stats = []
  for (let j = 0; j < NUM_REPS; j++) {
    stats.push(
      get_pipeline_stats(pipeline)
    );
  }
  return stats;
 }

 function calculate_stats(arr) {
  if (arr.length === 0) {
    return { minmax: undefined, average: undefined, min: undefined };
  }
  const min = Math.min(...arr);
  const max = Math.max(...arr);
  const sum = arr.reduce((total, currentVal) => total + currentVal, 0);
  const average = sum / arr.length;

  return { max, average, min };
 }


 // print("...from common.js")
diff --git a/run.js b/run.js
 output('Full run: setup...');
 load('common.js');

 let START = performance.now();
 function output(text) {
  // output text with timer (since first output)  
  print("\n[T+" + ((performance.now() - START)/1000) + "s] " + text); 
 }

 function report(description, pipeline) {
  stats = collect_stats(pipeline);
  call_times = stats.map(item => (item.total_time));
  output(
    description + ':\n' +
    JSON.stringify(pipeline) + '\n------\n' +
    JSON.stringify(calculate_stats(call_times))
  );
 }

 output("Dropping search index...")
 try {
  collection.dropSearchIndex('search_index');
  output("Dropped search_index, sleeping 5 seconds to ensure it gets time to drop in the background");
  sleep(5000); // give the index time to drop before we try to create it again
 } catch (error) {
  // no worries
 }

 output('Dropping B-tree indexes...');
 try {
  collection.dropIndex({ a: 1 });
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ b: 1 });
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ a_str: 1 });    
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ b_str: 1 });  
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ a: 1, b: 1, _id: 1 });
 } catch (error) {
  // no worries
 }
 output("Dropped b-tree indexes");

 output('Counting total collection documents...');
 total_docs = collection.countDocuments();
 output('done counting total documents');

 // With no indexes this will be a sluggish start, but we're just counting to report...
 baseline_match_pipeline = [{ $match: { a: 42, b: 42 } }];

 // execute an initial query, as a warmup
 output('Executing warmup baseline query...');
 get_pipeline_stats(baseline_match_pipeline);
 output('...completed warmup baseline query');

 // Now collect the stats
 output('Executing baseline query...');
 baseline_results = get_pipeline_stats(baseline_match_pipeline);
 output('...completed baseline query');

 output("Baseline stats, no indexes:\n" +
    JSON.stringify(baseline_match_pipeline) + `\n` + 
    baseline_results.total_time + "ms total time\n" +
    baseline_results.docs.length + " intersected documents out of " + total_docs 
 );

 // Now let's create an index on `a` and see how that changes things.
 // Note that we don't have an index on `b`, so the query can't fully take advantage of
 // an index intersection, but it should be faster than the full
 // collection scan since it can at least use the index on `a` to narrow down the
 // candidates before filtering on `b`.
 output("Creating index { a: 1 }...")
 collection.createIndex({ a: 1 });    
 output("{ a: 1 } index created");   

 report("match with an index on `a`", baseline_match_pipeline);

 output("Creating index { b: 1 }...")
 collection.createIndex({ b: 1 }); 
 output("{ b: 1 } index created");   

 report("match with an index on `a` and `b` independently", baseline_match_pipeline);

 report('match + count, independent indexes', 
 [
  { $match: { a: 42, b: 42 }},
  { $count: 'num_found' }
 ]);


 // Clearly our query is perfectly suited for a B-tree compound index. However, our original use case is to support a multitude of fields and combinations of them in queries
 // making it infeasible to create indexes for every combination due to write performance. However, we'll revisit a compound index on this scenario later to show how it really is
 // the best solution when the index intersection combinations are within write-performance acceptability. 

 // Let's leverage the inherent index intersection "skip list" efficient multi-field index navigation provided by a search index. 

 // Setting up the search index now so it'll be built by the time we run the search tests
 collection.createSearchIndex(
   "search_index",
   { 
     mappings: {
       dynamic: {
        typeSet: "default_mappings"
       }
     },
     typeSets: [
        {
            name: "default_mappings",
            types: [
                { type: "number" },
                { type: "token" }
            ]
        }
     ],
     storedSource: { include: ["_id", "a", "b", "ab_str"] }
   }
 );
 output("Search index creation initiated. Polling for index readiness...");

 // poll for index query-ability
 index_ready = false
 while (!index_ready) {
    sleep(1000); // wait for 1 second before retrying
    try {
      test_docs = collection.aggregate([
        {
        $search: {
            index: 'search_index',
            compound: {
                filter: [ 
                { equals: { path: 'a', value: 42 } },
                { equals: { path: 'b', value: 42 } }
                ]
            }
        }
        }
      ]).toArray();
      output("Test query found " + test_docs.length + " docs");
      index_ready = true; // if the query succeeds, we can exit the loop
    } catch (error) {
      // try again
    }
 }

 output("Search index created and ready");

 // Here's the best search index query for this intersection case, no sort, returning stored source to bypass id lookups, except no limit.
 baseline_search_pipeline = 
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true, // bypass idLookup's
      concurrent: true,         // improve query performance on a typical churning segmented search index
      compound: {
        filter: [               // filters are non-scoring, slightly more performant than scoring clauses
          { equals: { path: 'a', value: 42 } },
          { equals: { path: 'b', value: 42 } }
        ]
      }
    }
  }
 ]

 report("search index query filtering on a and b independently", baseline_search_pipeline);

 // Let's add sorting to both baselines and see the results:
 sorted_baseline_match_pipeline = [
  { $match: { a: 42, b: 42 }},
  { $sort: { _id: 1 }}
 ]
 report("Baseline match with _id sort with indexes already built only on a and b independently", sorted_baseline_match_pipeline);

 sorted_baseline_search_pipeline = 
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true, // bypass idLookup's
      concurrent: true,         // improve query performance on a typical churning segmented search index
      compound: {
        filter: [               // filters are non-scoring, slightly more performant than scoring clauses
          { equals: { path: 'a', value: 42 } },
          { equals: { path: 'b', value: 42 } }
        ]
      },
      sort: {
        _id: 1
      }
    }
  }
 ]
 report("Baseline search with _id sort", sorted_baseline_search_pipeline);

 // Now with limits on each... 
 sorted_match_pipeline_with_limit = [
  { $match: { a: 42, b: 42 }},
  { $sort: { _id: 1 }},
  { $limit: 200 }
 ]
 report("Match with _id sort and limit. Indexes already built on a and b, independently, at this point", sorted_match_pipeline_with_limit);

 sorted_search_pipeline_with_limit = 
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true, // bypass idLookup's
      concurrent: true,         // improve query performance on a typical churning segmented search index
      compound: {
        filter: [               // filters are non-scoring, slightly more performant than scoring clauses
          { equals: { path: 'a', value: 42 } },
          { equals: { path: 'b', value: 42 } }
        ]
      },
      sort: {
        _id: 1
      }
    }
  },
  { $limit: 200 }
 ]
 report("Search with _id sort and limit", sorted_search_pipeline_with_limit);

 // Now let's build a compound B-tree index and see how it performs
 output("Creating compound { a: 1, b: 1, _id: 1 } index...")
 collection.createIndex({ a: 1, b: 1, _id: 1 });
 output("{ a: 1, b: 1, _id: 1 } compound index created")

 report("match with _id sort and limit with compound index", sorted_match_pipeline_with_limit);

 report('search on compound field including sort and limit',
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true,
      concurrent: true,
      equals: {
        path: 'ab_str',
        value: '42_42'
      },
      sort: {
        _id: 1
      }
    }
  },
  { $limit: 200 }
 ]);

 report('search on compound field with sort and no limit, using stored source for returned results',
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true,
      concurrent: true,
      equals: {
        path: 'ab_str',
        value: '42_42'
      },
      sort: {
        _id: 1
      }
    }
  }
 ]);

 report('search on compound field sort and no limit, using idLookups for returned results',
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: false,
      concurrent: true,
      equals: {
        path: 'ab_str',
        value: '42_42'
      },
      sort: {
        _id: 1
      }
    }
  }
 ]);

 report('match + count, numeric compound index', 
 [
  { $match: { a: 42, b: 42 }},
  { $count: 'num_found' }
 ]);

 report('searchMeta total count by filter equals numeric',
 [
  { 
    $searchMeta: {
      index: 'search_index',
      concurrent: true,
      compound: {
        filter: [
          { equals: { path: 'a', value: 42 } },
          { equals: { path: 'b', value: 42 } }
        ]
      },
      count: {
        type: "total"
      }
    }
  }
 ]);

 report('searchMeta total count on compound field',
 [
  {    
    $searchMeta: {
      index: 'search_index',
      concurrent: true,
      equals: { path: 'ab_str', value: '42_42' },
      count: {
        type: "total"
      }
    }
  }
 ]);

 report('search + count using filter equals int',
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true,
      concurrent: true,
      compound: {
        filter: [
          { equals: { path: 'a', value: 42 } },
          { equals: { path: 'b', value: 42 } }
        ]
      }
    }
  },
  { $count: 'num_found' }
 ]);

 report('search + count using compound field',
 [
  { 
    $search: {
      index: 'search_index',
      returnStoredSource: true,
      concurrent: true,
      equals: { path: 'ab_str', value: '42_42' }
    }
  },
  { $count: 'num_found' }
 ]);
diff --git a/setup.js b/setup.js
 load('common.js');

 print("Rebuilding collection of " + NUM_DOCS);

 try {
  collection.dropSearchIndex('search_index');
  print(" * dropped search_index");
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ a: 1 });    
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ b: 1 }); 
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ a_str: 1 });    
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ b_str: 1 }); 
 } catch (error) {
  // no worries
 }
 try {
  collection.dropIndex({ a: 1, b: 1 });
 } catch (error) {
  // no worries
 }
 print(" * dropped b-tree indexes");

 collection.deleteMany({});
 print(" * deleted all docs");

 print(" * generating " + NUM_DOCS + " docs into an in-memory array...");
 let batch = [];    
 for (let i = 0; i < NUM_DOCS; i++) {    
  let a = Math.floor(Math.random()*100);
  let b = Math.floor(Math.random()*100);
  batch.push({
   _id: i + 1,
    a: a,     
    b: b,
    ab_str: a.toString() + "_" + b.toString()
  });    
  if (batch.length === 100000) {
    print(" * inserting batch of " + batch.length + " documents...");
    collection.insertMany(batch);
    print(" * inserted " + batch.length + " documents");
    batch = []; // free the memory, does this help mongosh have more memory?
  }
 }
 if (batch.length > 0) {
  collection.insertMany(batch);
 }
 print(" * inserted " + NUM_DOCS + " documents");
 batch = []; // free the memory, does this help mongosh have more memory?

 // Search index setup moved to run.js
 // Setting up the search index now so it'll be built by the time we run the search tests
 // collection.createSearchIndex(
 //    "search_index",
 //    { 
 //      mappings: {
 //        dynamic: {
 //         typeSet: "default_mappings"
 //        }
 //      },
 //      typeSets: [
 //         {
 //             name: "default_mappings",
 //             types: [
 //                 { type: "number" },
 //                 { type: "token" }
 //             ]
 //         }
 //      ],
 //      storedSource: { include: ["_id", "a", "b", "ab_str"] }
 //    }
 // )
 // print(" * created search index");
	const NUM_DOCS = 20000000;
	const NUM_REPS = 50;

	use('demo'); // TODO
	collection = db.demo; // TODO

	function get_pipeline_stats(pipeline) {
	start = performance.now();
	cursor = db.demo.aggregate(pipeline);
	end = performance.now();
	agg_time = end - start;

	start = performance.now();
	docs = cursor.toArray();
	end = performance.now();
	array_time = end - start;

	// Explain output is generated outside the scope of the timings
	explain = db.demo.explain("executionStats").aggregate(pipeline);

	return {
	pipeline: pipeline,
	docs: docs,
	agg_time: agg_time,
	array_time: array_time,
	total_time: agg_time + array_time,
	explain: explain
	};
	}

	function collect_stats(pipeline) {
	stats = []
	for (let j = 0; j < NUM_REPS; j++) {
	stats.push(
	get_pipeline_stats(pipeline)
	);
	}
	return stats;
	}

	function calculate_stats(arr) {
	if (arr.length === 0) {
	return { minmax: undefined, average: undefined, min: undefined };
	}
	const min = Math.min(...arr);
	const max = Math.max(...arr);
	const sum = arr.reduce((total, currentVal) => total + currentVal, 0);
	const average = sum / arr.length;

	return { max, average, min };
	}


	// print("...from common.js")
	output('Full run: setup...');
	load('common.js');

	let START = performance.now();
	function output(text) {
	// output text with timer (since first output)
	print("\n[T+" + ((performance.now() - START)/1000) + "s] " + text);
	}

	function report(description, pipeline) {
	stats = collect_stats(pipeline);
	call_times = stats.map(item => (item.total_time));
	output(
	description + ':\n' +
	JSON.stringify(pipeline) + '\n------\n' +
	JSON.stringify(calculate_stats(call_times))
	);
	}

	output("Dropping search index...")
	try {
	collection.dropSearchIndex('search_index');
	output("Dropped search_index, sleeping 5 seconds to ensure it gets time to drop in the background");
	sleep(5000); // give the index time to drop before we try to create it again
	} catch (error) {
	// no worries
	}

	output('Dropping B-tree indexes...');
	try {
	collection.dropIndex({ a: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ b: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ a_str: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ b_str: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ a: 1, b: 1, _id: 1 });
	} catch (error) {
	// no worries
	}
	output("Dropped b-tree indexes");

	output('Counting total collection documents...');
	total_docs = collection.countDocuments();
	output('done counting total documents');

	// With no indexes this will be a sluggish start, but we're just counting to report...
	baseline_match_pipeline = [{ $match: { a: 42, b: 42 } }];

	// execute an initial query, as a warmup
	output('Executing warmup baseline query...');
	get_pipeline_stats(baseline_match_pipeline);
	output('...completed warmup baseline query');

	// Now collect the stats
	output('Executing baseline query...');
	baseline_results = get_pipeline_stats(baseline_match_pipeline);
	output('...completed baseline query');

	output("Baseline stats, no indexes:\n" +
	JSON.stringify(baseline_match_pipeline) + `\n` +
	baseline_results.total_time + "ms total time\n" +
	baseline_results.docs.length + " intersected documents out of " + total_docs
	);

	// Now let's create an index on `a` and see how that changes things.
	// Note that we don't have an index on `b`, so the query can't fully take advantage of
	// an index intersection, but it should be faster than the full
	// collection scan since it can at least use the index on `a` to narrow down the
	// candidates before filtering on `b`.
	output("Creating index { a: 1 }...")
	collection.createIndex({ a: 1 });
	output("{ a: 1 } index created");

	report("match with an index on `a`", baseline_match_pipeline);

	output("Creating index { b: 1 }...")
	collection.createIndex({ b: 1 });
	output("{ b: 1 } index created");

	report("match with an index on `a` and `b` independently", baseline_match_pipeline);

	report('match + count, independent indexes',
	[
	{ $match: { a: 42, b: 42 }},
	{ $count: 'num_found' }
	]);


	// Clearly our query is perfectly suited for a B-tree compound index. However, our original use case is to support a multitude of fields and combinations of them in queries
	// making it infeasible to create indexes for every combination due to write performance. However, we'll revisit a compound index on this scenario later to show how it really is
	// the best solution when the index intersection combinations are within write-performance acceptability.

	// Let's leverage the inherent index intersection "skip list" efficient multi-field index navigation provided by a search index.

	// Setting up the search index now so it'll be built by the time we run the search tests
	collection.createSearchIndex(
	"search_index",
	{
	mappings: {
	dynamic: {
	typeSet: "default_mappings"
	}
	},
	typeSets: [
	{
	name: "default_mappings",
	types: [
	{ type: "number" },
	{ type: "token" }
	]
	}
	],
	storedSource: { include: ["_id", "a", "b", "ab_str"] }
	}
	);
	output("Search index creation initiated. Polling for index readiness...");

	// poll for index query-ability
	index_ready = false
	while (!index_ready) {
	sleep(1000); // wait for 1 second before retrying
	try {
	test_docs = collection.aggregate([
	{
	$search: {
	index: 'search_index',
	compound: {
	filter: [
	{ equals: { path: 'a', value: 42 } },
	{ equals: { path: 'b', value: 42 } }
	]
	}
	}
	}
	]).toArray();
	output("Test query found " + test_docs.length + " docs");
	index_ready = true; // if the query succeeds, we can exit the loop
	} catch (error) {
	// try again
	}
	}

	output("Search index created and ready");

	// Here's the best search index query for this intersection case, no sort, returning stored source to bypass id lookups, except no limit.
	baseline_search_pipeline =
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true, // bypass idLookup's
	concurrent: true, // improve query performance on a typical churning segmented search index
	compound: {
	filter: [ // filters are non-scoring, slightly more performant than scoring clauses
	{ equals: { path: 'a', value: 42 } },
	{ equals: { path: 'b', value: 42 } }
	]
	}
	}
	}
	]

	report("search index query filtering on a and b independently", baseline_search_pipeline);

	// Let's add sorting to both baselines and see the results:
	sorted_baseline_match_pipeline = [
	{ $match: { a: 42, b: 42 }},
	{ $sort: { _id: 1 }}
	]
	report("Baseline match with _id sort with indexes already built only on a and b independently", sorted_baseline_match_pipeline);

	sorted_baseline_search_pipeline =
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true, // bypass idLookup's
	concurrent: true, // improve query performance on a typical churning segmented search index
	compound: {
	filter: [ // filters are non-scoring, slightly more performant than scoring clauses
	{ equals: { path: 'a', value: 42 } },
	{ equals: { path: 'b', value: 42 } }
	]
	},
	sort: {
	_id: 1
	}
	}
	}
	]
	report("Baseline search with _id sort", sorted_baseline_search_pipeline);

	// Now with limits on each...
	sorted_match_pipeline_with_limit = [
	{ $match: { a: 42, b: 42 }},
	{ $sort: { _id: 1 }},
	{ $limit: 200 }
	]
	report("Match with _id sort and limit. Indexes already built on a and b, independently, at this point", sorted_match_pipeline_with_limit);

	sorted_search_pipeline_with_limit =
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true, // bypass idLookup's
	concurrent: true, // improve query performance on a typical churning segmented search index
	compound: {
	filter: [ // filters are non-scoring, slightly more performant than scoring clauses
	{ equals: { path: 'a', value: 42 } },
	{ equals: { path: 'b', value: 42 } }
	]
	},
	sort: {
	_id: 1
	}
	}
	},
	{ $limit: 200 }
	]
	report("Search with _id sort and limit", sorted_search_pipeline_with_limit);

	// Now let's build a compound B-tree index and see how it performs
	output("Creating compound { a: 1, b: 1, _id: 1 } index...")
	collection.createIndex({ a: 1, b: 1, _id: 1 });
	output("{ a: 1, b: 1, _id: 1 } compound index created")

	report("match with _id sort and limit with compound index", sorted_match_pipeline_with_limit);

	report('search on compound field including sort and limit',
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true,
	concurrent: true,
	equals: {
	path: 'ab_str',
	value: '42_42'
	},
	sort: {
	_id: 1
	}
	}
	},
	{ $limit: 200 }
	]);

	report('search on compound field with sort and no limit, using stored source for returned results',
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true,
	concurrent: true,
	equals: {
	path: 'ab_str',
	value: '42_42'
	},
	sort: {
	_id: 1
	}
	}
	}
	]);

	report('search on compound field sort and no limit, using idLookups for returned results',
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: false,
	concurrent: true,
	equals: {
	path: 'ab_str',
	value: '42_42'
	},
	sort: {
	_id: 1
	}
	}
	}
	]);

	report('match + count, numeric compound index',
	[
	{ $match: { a: 42, b: 42 }},
	{ $count: 'num_found' }
	]);

	report('searchMeta total count by filter equals numeric',
	[
	{
	$searchMeta: {
	index: 'search_index',
	concurrent: true,
	compound: {
	filter: [
	{ equals: { path: 'a', value: 42 } },
	{ equals: { path: 'b', value: 42 } }
	]
	},
	count: {
	type: "total"
	}
	}
	}
	]);

	report('searchMeta total count on compound field',
	[
	{
	$searchMeta: {
	index: 'search_index',
	concurrent: true,
	equals: { path: 'ab_str', value: '42_42' },
	count: {
	type: "total"
	}
	}
	}
	]);

	report('search + count using filter equals int',
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true,
	concurrent: true,
	compound: {
	filter: [
	{ equals: { path: 'a', value: 42 } },
	{ equals: { path: 'b', value: 42 } }
	]
	}
	}
	},
	{ $count: 'num_found' }
	]);

	report('search + count using compound field',
	[
	{
	$search: {
	index: 'search_index',
	returnStoredSource: true,
	concurrent: true,
	equals: { path: 'ab_str', value: '42_42' }
	}
	},
	{ $count: 'num_found' }
	]);
	load('common.js');

	print("Rebuilding collection of " + NUM_DOCS);

	try {
	collection.dropSearchIndex('search_index');
	print(" * dropped search_index");
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ a: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ b: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ a_str: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ b_str: 1 });
	} catch (error) {
	// no worries
	}
	try {
	collection.dropIndex({ a: 1, b: 1 });
	} catch (error) {
	// no worries
	}
	print(" * dropped b-tree indexes");

	collection.deleteMany({});
	print(" * deleted all docs");

	print(" * generating " + NUM_DOCS + " docs into an in-memory array...");
	let batch = [];
	for (let i = 0; i < NUM_DOCS; i++) {
	let a = Math.floor(Math.random()*100);
	let b = Math.floor(Math.random()*100);
	batch.push({
	_id: i + 1,
	a: a,
	b: b,
	ab_str: a.toString() + "_" + b.toString()
	});
	if (batch.length === 100000) {
	print(" * inserting batch of " + batch.length + " documents...");
	collection.insertMany(batch);
	print(" * inserted " + batch.length + " documents");
	batch = []; // free the memory, does this help mongosh have more memory?
	}
	}
	if (batch.length > 0) {
	collection.insertMany(batch);
	}
	print(" * inserted " + NUM_DOCS + " documents");
	batch = []; // free the memory, does this help mongosh have more memory?

	// Search index setup moved to run.js
	// Setting up the search index now so it'll be built by the time we run the search tests
	// collection.createSearchIndex(
	// "search_index",
	// {
	// mappings: {
	// dynamic: {
	// typeSet: "default_mappings"
	// }
	// },
	// typeSets: [
	// {
	// name: "default_mappings",
	// types: [
	// { type: "number" },
	// { type: "token" }
	// ]
	// }
	// ],
	// storedSource: { include: ["_id", "a", "b", "ab_str"] }
	// }
	// )
	// print(" * created search index");