Skip to content

Instantly share code, notes, and snippets.

@jmikola
Created August 10, 2012 17:35
Show Gist options
  • Save jmikola/3315992 to your computer and use it in GitHub Desktop.
Save jmikola/3315992 to your computer and use it in GitHub Desktop.
Test batch insertion of documents and referenced GridFS files
#!/usr/bin/env php
<?php
require_once 'config.php'; // mongodb-odm/tools/sandbox/config.php
use Doctrine\ODM\MongoDB\Mapping\Annotations as ODM;
/**
* Test batch insertion of documents and referenced GridFS files.
*
* For each test iteration, a number (i.e. batch size) of documents and files
* will be persisted and flushed. Time and peak memory usage will be recorded
* between tests, with the collections dropped and DocumentManager cleared
* between tests.
*
* CLI arguments determine the size of the initial and final batches, and the
* step to increase the batch size between iterations.
*/
if (4 !== $argc) {
printf("Usage: %s [start] [end] [step]\n", basename(__FILE__));
exit;
}
$test = function($iterations) use ($dm) {
for ($i = 0; $i < $iterations; ++$i) {
$image = new Image();
$image->name = 'foo';
$image->file = __FILE__;
$profile = new Profile();
$profile->name = 'bar';
$profile->image = $image;
$dm->persist($profile);
}
$dm->flush();
};
$units = array('b', 'k', 'm', 'g', 't', 'p');
for ($i = $argv[1]; $i <= $argv[2]; $i += $argv[3]) {
$dm->getDocumentCollection('Profile')->drop();
$dm->getDocumentCollection('Image')->drop();
$start = microtime(true);
$test($i);
$elapsedTime = microtime(true) - $start;
$dm->clear();
$size = memory_get_peak_usage(true);
$peakMem = round($size / pow(1024, ($_ = floor(log($size, 1024)))), 2).$units[$_];
printf("size: %d, time: %f, peak: %s\n", $i, $elapsedTime, $peakMem);
}
/** @ODM\Document(collection="profiles") */
class Profile
{
/** @ODM\Id */
public $id;
/** @ODM\String */
public $name;
/** @ODM\ReferenceOne(targetDocument="Image", cascade={"all"}) */
public $image;
}
/** @ODM\Document(collection="images") */
class Image
{
/** @ODM\Id */
public $id;
/** @ODM\String */
public $name;
/** @ODM\File */
public $file;
}
@jmikola
Copy link
Author

jmikola commented Aug 10, 2012

Results for batches in the early thousand range:

$ ./gridfs_batch.php 1000 5000 250
size: 1000, time: 1.272885, peak: 17m
size: 1250, time: 1.723116, peak: 21.5m
size: 1500, time: 1.789574, peak: 27m
size: 1750, time: 2.159052, peak: 31.75m
size: 2000, time: 2.668756, peak: 37m
size: 2250, time: 3.434415, peak: 42.5m
size: 2500, time: 3.754999, peak: 50m
size: 2750, time: 4.480971, peak: 56m
size: 3000, time: 4.785448, peak: 62m
size: 3250, time: 5.453854, peak: 68.5m
size: 3500, time: 6.070818, peak: 75.25m
size: 3750, time: 5.266147, peak: 82.25m
size: 4000, time: 6.976347, peak: 93.5m
size: 4250, time: 7.477715, peak: 101.5m
size: 4500, time: 8.117807, peak: 109.25m
size: 4750, time: 6.296180, peak: 117.25m
size: 5000, time: 8.855719, peak: 125.75m

And for a large 50k batch size, with mongostat output (starting with actual Mongo insertions three minutes after the test began):

$ date; ./gridfs_batch.php 50000 50000 10000
Fri Aug 10 13:46:35 EDT 2012
size: 50000, time: 282.066245, peak: 667m
$ mongostat
connected to: 127.0.0.1
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
     0      0      0      0       0       1       0  1.22g  8.74g  4.41g      0        0          0       0|0     0|0    62b     1k     3   13:49:41 
   383      0      0      0       0     385       0  1.22g  8.74g  4.41g      0      1.9          0       0|0     0|0   362k    36k     3   13:49:42 
  2085      0      0      0       0    2087       0  1.22g  8.74g  4.41g      0     14.3          0       0|0     0|0     1m   193k     3   13:49:43 
  2934      0      0      0       0    2935       0  1.22g  8.74g  4.41g      0     13.5          0       0|0     0|0     2m   271k     3   13:49:44 
  2709      0      0      0       0    2710       0  1.22g  8.74g  4.41g      0     13.8          0       0|0     0|1     2m   250k     3   13:49:45 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
  3849      0      0      0       0    3850       0  1.22g  8.74g  4.42g      0     13.5          0       0|0     0|1     3m   355k     3   13:49:46 
  3081      0      0      0       0    3082       0  1.22g  8.74g  4.42g      0     17.5          0       0|1     0|1     2m   284k     3   13:49:47 
  2748      0      0      0       0    2749       0  1.22g  8.74g  4.42g      0     13.5          0       0|0     0|0     2m   254k     3   13:49:48 
  2981      0      0      0       0    2981       0  1.22g  8.74g  4.42g      0     13.2          0       0|0     0|0     2m   275k     3   13:49:49 
  2684      0      0      0       0    2686       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     1|0     2m   248k     3   13:49:50 
  2614      0      0      0       0    2614       0  1.22g  8.74g  4.42g      0     17.8          0       0|0     0|0     2m   241k     3   13:49:51 
  2609      0      0      0       0    2611       0  1.22g  8.74g  4.41g      0     13.5          0       0|0     1|0     2m   241k     3   13:49:52 
  2652      0      0      0       0    2653       0  1.22g  8.74g  4.42g      0     13.2          0       0|0     0|0     2m   245k     3   13:49:53 
  2735      0      0      0       0    2736       0  1.22g  8.74g  4.41g      0     13.4          0       0|0     0|0     2m   253k     3   13:49:54 
  2712      0      0      0       0    2712       0  1.22g  8.74g  4.42g      0     12.6          0       0|0     0|0     2m   250k     3   13:49:55 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
  2693      0      0      0       0    2694       0  1.22g  8.74g  4.41g      0     13.4          0       0|0     0|0     2m   249k     3   13:49:56 
  2683      0      0      0       0    2685       0  1.22g  8.74g  4.42g      0     13.3          0       0|0     0|0     2m   248k     3   13:49:57 
  2685      0      0      0       0    2686       0  1.22g  8.74g  4.42g      0     13.2          0       0|0     0|1     2m   248k     3   13:49:58 
  2560      0      0      0       0    2561       0  1.22g  8.74g  4.42g      0     17.5          0       0|0     1|0     2m   237k     3   13:49:59 
  2816      0      0      0       0    2816       0  1.22g  8.74g  4.42g      0     13.6          0       0|0     0|0     2m   260k     3   13:50:00 
  2660      0      0      0       0    2661       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     0|0     2m   246k     3   13:50:01 
  2632      0      0      0       0    2634       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     0|1     2m   243k     3   13:50:02 
  2568      0      0      0       0    2569       0  1.22g  8.74g  4.42g      0     17.8          0       0|0     0|0     2m   237k     3   13:50:03 
  2682      0      0      0       0    2683       0  1.22g  8.74g  4.41g      0     13.3          0       0|0     0|1     2m   248k     3   13:50:04 
  2712      0      0      0       0    2712       0  1.22g  8.74g  4.42g      0     13.3          0       0|0     0|0     2m   250k     3   13:50:05 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
  2651      0      0      0       0    2652       0  1.22g  8.74g  4.41g      0     13.4          0       0|0     0|0     2m   245k     3   13:50:06 
  5575      0      0      0       0    5577       0  1.22g  8.74g  4.42g      0     18.4          0       0|0     0|1     5m   514k     3   13:50:07 
  2946      0      0      0       0    2946       0  1.22g  8.74g  4.41g      0     13.5          0       0|0     0|0     2m   272k     3   13:50:08 
  2600      0      0      0       0    2601       0  1.22g  8.74g  4.42g      0     13.3          0       0|0     0|1     2m   240k     3   13:50:09 
  2725      0      0      0       0    2726       0  1.22g  8.74g  4.42g      0     13.5          0       0|0     0|0     2m   252k     3   13:50:10 
  2508      0      0      0       0    2510       0  1.22g  8.74g  4.42g      0     12.8          0       0|0     0|0     2m   232k     3   13:50:11 
  2704      0      0      0       0    2705       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     0|0     2m   250k     3   13:50:12 
  2711      0      0      0       0    2712       0  1.22g  8.74g  4.42g      0     13.3          0       0|0     0|0     2m   250k     3   13:50:13 
  2751      0      0      0       0    2752       0  1.22g  8.74g  4.42g      0     13.3          0       0|0     0|0     2m   254k     3   13:50:14 
  2506      0      0      0       0    2507       0  1.22g  8.74g  4.42g      0     17.8          0       0|0     0|0     2m   232k     3   13:50:15 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
  2720      0      0      0       0    2720       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     0|0     2m   251k     3   13:50:16 
  2705      0      0      0       0    2706       0  1.22g  8.74g  4.42g      0     13.3          0       0|0     0|0     2m   250k     3   13:50:17 
  2677      0      0      0       0    2679       0  1.22g  8.74g  4.41g      0     13.5          0       0|0     0|0     2m   247k     3   13:50:18 
  2863      0      0      0       0    2864       1  1.22g  8.74g  4.42g      0     12.9          0       0|0     1|0     2m   264k     3   13:50:19 
  2681      0      0      0       0    2682       0  1.22g  8.74g  4.41g      0     13.4          0       0|0     1|0     2m   248k     3   13:50:20 
  2758      0      0      0       0    2759       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     1|0     2m   255k     3   13:50:21 
  2787      0      0      0       0    2787       0  1.22g  8.74g  4.42g      0     13.6          0       0|0     0|0     2m   257k     3   13:50:22 
  3341      0      0      0       0    3343       0  1.22g  8.74g  4.42g      0     17.9          0       0|0     0|0     3m   308k     3   13:50:23 
  5424      0      0      0       0    5424       0  1.22g  8.74g  4.42g      0     13.4          0       0|0     0|0     5m   500k     3   13:50:24 
  5644      0      0      0       0    5646       0  1.22g  8.74g  4.42g      0     13.6          0       0|0     1|0     5m   520k     3   13:50:25 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
  3504      0      0      0       0    3505       0  1.22g  8.74g  4.42g      0     13.6          0       0|0     1|0     3m   323k     3   13:50:26 
  3216      0      0      0       0    3217       0  1.22g  8.74g  4.42g      0       13          0       0|0     1|0     3m   297k     3   13:50:27 
  5125      0      0      0       0    5125       0  1.22g  8.74g  4.42g      0     13.5          0       0|0     0|1     4m   472k     3   13:50:28 
  2673      0      0      0       0    2674       0  1.22g  8.74g  4.42g      0     13.7          0       0|0     0|0     2m   247k     3   13:50:29 
  4795      0      0      0       0    4797       0  1.22g  8.74g  4.42g      0     13.7          0       0|0     0|0     4m   442k     3   13:50:30 
  2425      0      0      0       0    2426       0  1.22g  8.74g  4.41g      0     18.1          0       0|0     0|0     2m   224k     3   13:50:31 
  1518      0      0      0       0    1519       0  1.22g  8.74g  4.42g      0      6.9          0       0|0     0|0     1m   141k     3   13:50:32 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:33 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:34 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:35 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:36 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:37 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:38 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:39 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:40 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:41 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:42 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:43 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:44 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:45 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:46 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:47 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:48 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:49 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:50 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:51 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:52 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:53 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:54 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:55 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:56 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:57 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:58 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:50:59 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:00 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:01 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:02 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:03 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:04 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:05 
insert  query update delete getmore command flushes mapped  vsize    res faults locked % idx miss %     qr|qw   ar|aw  netIn netOut  conn       time 
 50000      0      0      0       0       2       0  1.22g  8.74g  4.42g      1     26.6          0       0|0     0|0     5m     1k     3   13:51:06 
     0      0      0      0       0       1       0  1.22g  8.74g  4.42g      0        0          0       0|0     0|0    62b     1k     3   13:51:07

@jmikola
Copy link
Author

jmikola commented Aug 10, 2012

To add some context to this gist, it's related to: https://jira.mongodb.org/browse/PHP-453

Thanks to a one-line optimization in UnitOfWork::computeAssociationChanges() (doctrine/mongodb-odm@6653a60), execution time is more than halved:

$ ./gridfs_batch.php 50000 50000 10000
size: 50000, time: 128.727638, peak: 667m

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment