Last active
December 16, 2021 22:03
-
-
Save BenMorel/78f742356391d41c91d1d733f47dcb13 to your computer and use it in GitHub Desktop.
(Medium) BulkInserter benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Benchmarks BulkInserter at various batch sizes. | |
* | |
* Before running this script, make sure to: | |
* | |
* - set your PDO parameters in this file | |
* - run composer install | |
* - import setup.sql into your database | |
* - uncompress z-employees.csv.xz and rename z-employees.csv to employees.csv | |
* (the file has been named this way to appear last in the gist) | |
* | |
* This script will output results in real time, and append results to a results.tsv file. | |
* Results can be aggregated using the results.php script. | |
* | |
* Test data is a fabricated 1.2M employees list extracted from this repo: | |
* https://github.com/datacharmer/test_db | |
* | |
* The original employees table contains 300k entries, it has been duplicated 4 times. | |
* | |
* The test data is fully loaded into memory to ensure that loading it is not a bottleneck. | |
* Therefore this script requires a lot of memory to run, be sure that you have at least 1 GB free RAM. | |
*/ | |
use Brick\Db\Bulk\BulkInserter; | |
require 'vendor/autoload.php'; | |
ini_set('memory_limit', '1G'); | |
set_time_limit(0); | |
// Configure the batch sizes to benchmark | |
$batchSizes = [ | |
1, 2, 3, 4, 5, 6, 7, 8, 9, | |
10, 20, 30, 40, 50, 60, 70, 80, 90, | |
100, 200, 300, 400, 500, 600, 700, 800, 900, | |
1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, | |
10000 | |
]; | |
$iterations = 10; | |
// Configure PDO | |
$pdo = new PDO('mysql:host=localhost;dbname=bench', 'root', ''); | |
$pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); | |
$pdo->setAttribute(PDO::ATTR_EMULATE_PREPARES, false); | |
// Load the employees data | |
$employees = []; | |
$fp = fopen('employees.csv', 'rb'); | |
if ($fp === false) { | |
die("Could not open employees.csv\n"); | |
} | |
echo 'Loading test data '; | |
$n = 0; | |
while (! feof($fp)) { | |
$employee = fgetcsv($fp); | |
if (count($employee) === 1) { | |
continue; | |
} | |
$employee[0] = (int) $employee[0]; | |
$employees[] = $employee; | |
if (++$n % 100000 === 0) { | |
echo '.'; | |
} | |
} | |
echo "\n"; | |
fclose($fp); | |
$employeeCount = count($employees); | |
$fields = ['emp_no', 'birth_date', 'first_name', 'last_name', 'gender', 'hire_date']; | |
// Run the benchmark | |
for ($n = 1; $n <= $iterations; $n++) { | |
foreach ($batchSizes as $batchSize) { | |
echo "[Iteration $n] Batch size = $batchSize ... "; | |
$pdo->exec('TRUNCATE TABLE employees'); | |
$inserter = new BulkInserter($pdo, 'employees', $fields, $batchSize); | |
$start = microtime(true); | |
$pdo->beginTransaction(); | |
foreach ($employees as $employee) { | |
$inserter->queue(...$employee); | |
} | |
$inserter->flush(); | |
$pdo->commit(); | |
$end = microtime(true); | |
$count = $pdo->query('SELECT COUNT(*) FROM employees')->fetchColumn(); | |
if ($count != $employeeCount) { | |
die("Expected $employeeCount records in employees table, found $count\n"); | |
} | |
$rps = (int) ($employeeCount / ($end - $start)); | |
echo "RPS = $rps\n"; | |
file_put_contents('results.tsv', "$batchSize\t$rps\n", FILE_APPEND); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"require": { | |
"brick/db": "dev-master" | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Extracts average & maximum RPS for each batch size from the results.tsv file. | |
*/ | |
$data = file_get_contents('results.tsv'); | |
$data = explode("\n", $data); | |
$values = []; | |
foreach ($data as $line) { | |
if ($line === '') { | |
continue; | |
} | |
$parts = explode("\t", $line); | |
if (count($parts) !== 2 || ! ctype_digit($parts[0]) || ! ctype_digit($parts[1])) { | |
echo "Bad line: $line\n"; | |
continue; | |
} | |
$size = (int) $parts[0]; | |
$rps = (int) $parts[1]; | |
$values[$size][] = $rps; | |
} | |
$average = []; | |
$max = []; | |
foreach ($values as $size => $benchs) { | |
$average[$size] = round(array_sum($benchs) / count($benchs)); | |
$max[$size] = max($benchs); | |
} | |
ksort($values); | |
foreach (array_keys($values) as $size) { | |
echo $size . "\t"; | |
echo $average[$size] . "\t"; | |
echo $max[$size] . "\n"; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE employees ( | |
emp_no INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, | |
birth_date DATE NOT NULL, | |
first_name VARCHAR(14) NOT NULL, | |
last_name VARCHAR(16) NOT NULL, | |
gender ENUM('M','F') NOT NULL, | |
hire_date DATE NOT NULL, | |
PRIMARY KEY (emp_no) | |
) ENGINE=InnoDB CHARSET=latin1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment