Created
October 22, 2011 15:35
-
-
Save rodneyrehm/1306118 to your computer and use it in GitHub Desktop.
PHP: mb_range() - Unicode compatible range('A', 'Z')
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
mb_internal_encoding('UTF-8'); | |
/** | |
* multibyte string compatible range('A', 'Z') | |
* | |
* @param string $start Character to start from (included) | |
* @param string $end Character to end with (included) | |
* @return array list of characters in unicode alphabet from $start to $end | |
* @author Rodney Rehm | |
*/ | |
function mb_range($start, $end) { | |
// if start and end are the same, well, there's nothing to do | |
if ($start == $end) { | |
return array($start); | |
} | |
$_result = array(); | |
// get unicodes of start and end | |
list(, $_start, $_end) = unpack("N*", mb_convert_encoding($start . $end, "UTF-32BE", "UTF-8")); | |
// determine movement direction | |
$_offset = $_start < $_end ? 1 : -1; | |
$_current = $_start; | |
while ($_current != $_end) { | |
$_result[] = mb_convert_encoding(pack("N*", $_current), "UTF-8", "UTF-32BE"); | |
$_current += $_offset; | |
} | |
$_result[] = $end; | |
return $_result; | |
} | |
var_dump(mb_range('क', 'म')); | |
var_dump(mb_range('A', 'C')); | |
var_dump(mb_range('A', 'Z')); | |
// beware, ä…ö are not a sequence you'd expect! | |
var_dump(mb_range('ä', 'ö')); |
@tegardazzly
How to use?
You can encode decimals to Nth base number more than N=64.
For example, the code below you can shorten MD5 hash to 10 Chinese characters.
<?php
include('mb_range.php');
mb_internal_encoding('UTF-8');
// Sample usage
$s = 'This is a sample string to be hashed.';
echo $s . PHP_EOL; // 'This is a sample string to be hashed.'
echo md5($s) . PHP_EOL; // 'baad33e1e97f316b9750c27c86bf64d6'
echo mb_base_encode(hexdec(md5($s))) . PHP_EOL; // '䙔倁屩劷䋾彨䏔䂌䤘剒'
echo mb_md5($s) . PHP_EOL; // '䙔倁屩劷䋾彨䏔䂌䤘剒'
/**
* Returns base Nth encoded string from decimal number input.
*
* @param integer $number Decimal number to encode
* @return string Encoded string
*/
function mb_base_encode($number)
{
$char = array_merge(
mb_range('䀀', '䶵'), // U+4000 - U+4DB5
mb_range('一', '俿'), // U+4E00 - U+4FFF
mb_range('倀', '忿') // U+5000 - U+5FFF
// REF: http://www.utf8-chartable.com/unicode-utf8-table.pl?start=16384&number=1024
);
$base = count($char);
$result = "";
while ($number > 0) {
$result = $char[ fmod($number, $base) ] . $result;
$number = floor($number / $base);
}
return ( $result == "" ) ? 0 : $result;
}
function mb_md5($string)
{
return mb_base_encode(hexdec(md5($string)));
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to use?