Created
January 25, 2024 05:20
-
-
Save divinity76/5729472dd5d77e94cd0acb245aac2226 to your computer and use it in GitHub Desktop.
blake3 portable vs optimized
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types=1); | |
/** | |
* better version of shell_exec(), | |
* supporting both stdin and stdout and stderr and os-level return code | |
* | |
* @param string $cmd | |
* command to execute | |
* @param string $stdin | |
* (optional) data to send to stdin, binary data is supported. | |
* @param string $stdout | |
* (optional) stdout data generated by cmd | |
* @param string $stderr | |
* (optional) stderr data generated by cmd | |
* @param bool $print_std | |
* (optional, default false) if you want stdout+stderr to be printed while it's running, | |
* set this to true. (useful for long-running commands) | |
* @return int | |
*/ | |
function hhb_exec(string $cmd, string $stdin = "", string &$stdout = null, string &$stderr = null, bool $print_std = false): int | |
{ | |
$stdouth = tmpfile(); | |
$stderrh = tmpfile(); | |
$descriptorspec = array( | |
0 => array( | |
"pipe", | |
"rb" | |
), // stdin | |
1 => array( | |
"file", | |
stream_get_meta_data($stdouth)['uri'], | |
'ab' | |
), | |
2 => array( | |
"file", | |
stream_get_meta_data($stderrh)['uri'], | |
'ab' | |
) | |
); | |
$pipes = array(); | |
$proc = proc_open($cmd, $descriptorspec, $pipes); | |
while (strlen($stdin) > 0) { | |
$written_now = fwrite($pipes[0], $stdin); | |
if ($written_now < 1 || $written_now === strlen($stdin)) { | |
// ... can add more error checking here | |
break; | |
} | |
$stdin = substr($stdin, $written_now); | |
} | |
fclose($pipes[0]); | |
unset($stdin, $pipes[0]); | |
if (! $print_std) { | |
$proc_ret = proc_close($proc); // this line will stall until the process has exited. | |
$stdout = stream_get_contents($stdouth); | |
$stderr = stream_get_contents($stderrh); | |
} else { | |
$stdout = ""; | |
$stderr = ""; | |
stream_set_blocking($stdouth, false); | |
stream_set_blocking($stderrh, false); | |
$fetchstd = function () use (&$stdout, &$stderr, &$stdouth, &$stderrh): bool { | |
$ret = false; | |
$tmp = stream_get_contents($stdouth); // fread($stdouth, 1); // | |
if (is_string($tmp) && strlen($tmp) > 0) { | |
$ret = true; | |
$stdout .= $tmp; | |
fwrite(STDOUT, $tmp); | |
} | |
$tmp = stream_get_contents($stderrh);// fread($stderrh, 1); // | |
// var_dump($tmp); | |
if (is_string($tmp) && strlen($tmp) > 0) { | |
$ret = true; | |
$stderr .= $tmp; | |
fwrite(STDERR, $tmp); | |
} | |
return $ret; | |
}; | |
while (($status = proc_get_status($proc))["running"]) { | |
if (! $fetchstd()) { | |
// 100 ms | |
usleep(100 * 1000); | |
} | |
} | |
$proc_ret = $status["exitcode"]; | |
proc_close($proc); | |
$fetchstd(); | |
} | |
fclose($stdouth); | |
fclose($stderrh); | |
return $proc_ret; | |
} | |
function exec2(string $cmd, ?string &$stdout = null, ?string &$stderr = null): void | |
{ | |
echo "$cmd\n"; | |
$ret = hhb_exec($cmd, "", $stdout, $stderr, true); | |
if ($ret !== 0) { | |
throw new Exception("Error: $ret: $cmd"); | |
} | |
} | |
function exec3(string $cmds): void | |
{ | |
$cmd = "/bin/bash -c " . escapeshellarg($cmds); | |
exec2($cmd); | |
} | |
function calculateThroughputMBPS(int $timeInNanoseconds) { | |
$sizeInKB = 16; // Size of the data in KB | |
$sizeInMB = $sizeInKB / 1024; // Convert KB to MB (16KB = 0.015625MB) | |
$timeInSeconds = $timeInNanoseconds / 1000000000; // Convert nanoseconds to seconds | |
$throughput = $sizeInMB / $timeInSeconds; // Calculate throughput in MB/s | |
return $throughput; | |
} | |
error_reporting(E_ALL); | |
ini_set("display_errors", "1"); | |
set_error_handler(function ($errno, $errstr, $errfile, $errline) { | |
if (error_reporting() & $errno) { | |
throw new ErrorException($errstr, 0, $errno, $errfile, $errline); | |
} | |
}); | |
if (!is_dir("upstream_blake3")) { | |
exec3( | |
<<<'CMD' | |
# fancy way of just fetching the "c" folder (the only thing we want) | |
git clone --branch '1.5.0' -n --depth=1 --filter=tree:0 'https://github.com/BLAKE3-team/BLAKE3.git' 'upstream_blake3' | |
cd upstream_blake3 | |
git sparse-checkout set --no-cone c | |
git checkout | |
rm -rf .git | |
cd c | |
# some stuff we don't need | |
rm -rf blake3_c_rust_bindings test.py example.c main.c Makefile.testing CMakeLists.txt blake3-config.cmake.in README.md .gitignore | |
CMD | |
); | |
} | |
$iterations = 999; | |
// EXT_HASH_BLAKE3_SOURCES="hash_blake3.c blake3/upstream_blake3/c/blake3.c blake3/upstream_blake3/c/blake3_dispatch.c blake3/upstream_blake3/c/blake3_portable.c" | |
$tests = array( | |
"O2-portable" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2", | |
"O2-portable-march" => "gcc -O2 -march=native -mtune=native -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2", | |
"O2-sse2" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2", | |
"O2-sse41" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2", | |
"O2-avx2" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S -DBLAKE3_NO_AVX512", | |
"O2-avx512" => "gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S", | |
); | |
$results = array(); | |
foreach ($tests as $test_name => $cmd) { | |
exec2($cmd); | |
exec2("./test $iterations", $stdout, $stderr); | |
$stdout = trim($stdout); | |
$stdout_parsed = filter_var($stdout, FILTER_VALIDATE_INT); | |
if ($stdout_parsed === false) { | |
throw new Exception("Error: could not parse $stdout as float"); | |
} | |
$microseconds_for_16_kib = $stdout_parsed; | |
$mb_per_second = calculateThroughputMBPS($microseconds_for_16_kib); | |
$results[$test_name] = [ | |
"microseconds_for_16_kib" => $microseconds_for_16_kib, | |
"mb_per_second" => $mb_per_second, | |
]; | |
} | |
uksort($results, function ($a, $b) use ($results) { | |
return $results[$a]["mb_per_second"] <=> $results[$b]["mb_per_second"]; | |
}); | |
var_dump($results); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ time php b3instructions.php | |
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 | |
./test 999 | |
13876 | |
gcc -O2 -march=native -mtune=native -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 | |
./test 999 | |
29295 | |
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 | |
./test 999 | |
4969 | |
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S -DBLAKE3_NO_AVX512 -DBLAKE3_NO_AVX2 | |
./test 999 | |
4688 | |
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S -DBLAKE3_NO_AVX512 | |
./test 999 | |
2384 | |
gcc -O2 -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S | |
./test 999 | |
1753 | |
array(6) { | |
["O2-portable-march"]=> | |
array(2) { | |
["microseconds_for_16_kib"]=> | |
int(29295) | |
["mb_per_second"]=> | |
float(533.3674688513398) | |
} | |
["O2-portable"]=> | |
array(2) { | |
["microseconds_for_16_kib"]=> | |
int(13876) | |
["mb_per_second"]=> | |
float(1126.0449697319111) | |
} | |
["O2-sse2"]=> | |
array(2) { | |
["microseconds_for_16_kib"]=> | |
int(4969) | |
["mb_per_second"]=> | |
float(3144.4958744214127) | |
} | |
["O2-sse41"]=> | |
array(2) { | |
["microseconds_for_16_kib"]=> | |
int(4688) | |
["mb_per_second"]=> | |
float(3332.977815699659) | |
} | |
["O2-avx2"]=> | |
array(2) { | |
["microseconds_for_16_kib"]=> | |
int(2384) | |
["mb_per_second"]=> | |
float(6554.1107382550335) | |
} | |
["O2-avx512"]=> | |
array(2) { | |
["microseconds_for_16_kib"]=> | |
int(1753) | |
["mb_per_second"]=> | |
float(8913.291500285226) | |
} | |
} | |
real 0m3.017s | |
user 0m1.913s | |
sys 0m0.105s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// compile: gcc -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c -DBLAKE3_NO_SSE2 -DBLAKE3_NO_AVX2 -DBLAKE3_NO_AVX512 -DBLAKE3_NO_NEON -DBLAKE3_NO_SSE41 | |
// gcc -o test test.c upstream_blake3/c/blake3.c upstream_blake3/c/blake3_dispatch.c upstream_blake3/c/blake3_portable.c upstream_blake3/c/blake3_sse2_x86-64_unix.S upstream_blake3/c/blake3_sse41_x86-64_unix.S upstream_blake3/c/blake3_avx2_x86-64_unix.S upstream_blake3/c/blake3_avx512_x86-64_unix.S | |
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include "upstream_blake3/c/blake3.h" | |
#include <sys/time.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <time.h> | |
int64_t nanoseconds() | |
{ | |
struct timespec ts; | |
clock_gettime(CLOCK_REALTIME, &ts); | |
return (int64_t)ts.tv_sec * 1000000000 + (int64_t)ts.tv_nsec; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
if (argc != 2) | |
{ | |
printf("Usage: %s <iterations>\n", argv[0]); | |
return 1; | |
} | |
int iterations = atoi(argv[1]); | |
char teststr[16 * 1024] = "Hello World!"; // 16kb is an important size: it's the size of the TLS record buffer. | |
int64_t best = INT64_MAX; | |
for (int i = 0; i < iterations; i++) | |
{ | |
int64_t start = nanoseconds(); | |
blake3_hasher hasher; | |
blake3_hasher_init(&hasher); | |
blake3_hasher_update(&hasher, teststr, sizeof(teststr)); | |
uint8_t output[BLAKE3_OUT_LEN]; | |
blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN); | |
int64_t end = nanoseconds(); | |
int64_t elapsed = end - start; | |
if (elapsed < best) | |
{ | |
best = elapsed; | |
} | |
} | |
//printf("Best time: %ld nanoseconds\n", best); | |
printf("%ld\n", best); | |
printf("\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment