So, I have come accorss a lot of poor PHP base conversion implementations all over the internet that are unsuitable or incapable of doing large base conversion on binary data. This really isn't a problem for most people in PHP until you start dealing with very large decimals that are sensitive to percision loss, like in cryptography. The following provides two implementations for base conversion that should be safe for large bases and binary data, for example, converting a base256 (binary string) to base85 representation and back again.
Using GMP
You can use GMP to accomplish this at the cost of converting bin<->hex two unneeded times as well as being limited to base62.
<?php
// Not bits, bytes.
$data = openssl_random_pseudo_bytes(256);
$base62 = gmp_strval(gmp_init( bin2hex($data), 16), 62 );
$decoded = hex2bin(gmp_strval( gmp_init($base62, 62), 16 ));
var_dump( strcmp($decoded, $data) === 0 ); // true
Pure PHP
If you would like to move beyond base62 to base85 or a slight performance improvement, you will need something like the following.
<?php
/**
* Divide a large number represented as a binary string in the specified base
* and return the remainder.
*
* @param string &$binary
* @param int $base
* @param int $start
*
* @return int
*/
function divmod(&$binary, $base, $divisor, $start = 0)
{
/** @var int $size */
$size = strlen($binary);
// Do long division from most to least significant byte, keep remainder.
$remainder = 0;
for ($i = $start; $i < $size; $i++) {
// Get the byte value, 0-255 inclusive.
$digit = ord($binary[$i]);
// Shift the remainder left by base N bits, append the last byte.
$temp = ($remainder * $base) + $digit;
// Calculate the value for the current byte.
$binary[$i] = chr($temp / $divisor);
// Carry the remainder to the next byte.
$remainder = $temp % $divisor;
}
return $remainder;
}
/**
* Produce a base62 encoded string from a large binary number.
*
* @param string $binary
* @return string
*/
function encodeBase62($binary)
{
$charMap = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
$base = strlen($charMap);
$size = strlen($binary);
$start = $size - strlen(ltrim($binary, "\0"));
$encoded = "";
for ($i = $start; $i < $size; ) {
// Do long division from most to least significant byte, keep remainder.
$idx = divmod($binary, 256, $base, $i);
$encoded = $charMap[$idx] . $encoded;
if (ord($binary[$i]) == 0) {
$i++; // Skip leading zeros produced by the long division.
}
}
$encoded = str_repeat("0", $start) . $encoded;
return $encoded;
}
/**
* Produce a large binary number from a base62 encoded string.
*
* @param string $ascii
* @return string
*/
function decodeBase62($ascii)
{
$charMap = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
$base = strlen($charMap);
$size = strlen($ascii);
$start = $size - strlen(ltrim($ascii, "0"));
// Convert the ascii representation to binary string.
$binary = "";
for ($i = $start; $i < $size; $i++) {
$byte = strpos($charMap, $ascii[$i]);
if ($byte === false) {
throw new OutOfBoundsException("Invlaid encoding at offset '{$ascii[$i]}'");
}
$binary .= chr($byte);
}
$size = strlen($binary);
$decode = "";
for ($i = 0; $i < $size; ) {
// Do long division from most to least significant byte, keep remainder.
$idx = divmod($binary, $base, 256, $i);
$decode = chr($idx) . $decode;
if (ord($binary[$i]) == 0) {
$i++; // Skip leading zeros produced by the long division.
}
}
$decode = ltrim($decode, "\0");
$decode = str_repeat("\0", $start) . $decode;
return $decode;
}
// Not bits, bytes.
$data = openssl_random_pseudo_bytes(256);
$base62 = encodeBase62($data);
$decoded = decodeBase62($base62);
var_dump( strcmp($decoded, $data) === 0 ); // true