Last active
December 15, 2015 11:49
-
-
Save thekid/5256060 to your computer and use it in GitHub Desktop.
The iconv and mbstring implementations of the `String` class used for comparison in xp-framework/xp-framework#63
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /* This class is part of the XP framework | |
| * | |
| * $Id$ | |
| */ | |
| uses('lang.types.Character', 'lang.types.Bytes'); | |
| if (extension_loaded('mbstring')) { | |
| fputs(STDERR, "String.mbstring.partial.php\n"); | |
| require(__DIR__.DIRECTORY_SEPARATOR.'String.mbstring.partial.php'); | |
| } else { | |
| fputs(STDERR, "String.iconv.partial.php\n"); | |
| require(__DIR__.DIRECTORY_SEPARATOR.'String.iconv.partial.php'); | |
| } | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| define('STR_ENC', 'utf-8'); | |
| /** | |
| * Represents a string | |
| * | |
| * @ext iconv | |
| * @test xp://net.xp_framework.unittest.core.types.StringTest | |
| * @purpose Wrapper type | |
| */ | |
| class String extends Object implements ArrayAccess { | |
| protected | |
| $buffer= '', | |
| $length= 0; | |
| public static $EMPTY = NULL; | |
| static function __static() { | |
| self::$EMPTY= new self('', STR_ENC); | |
| } | |
| /** | |
| * Convert a string to internal encoding | |
| * | |
| * @param string string | |
| * @param string charset default NULL | |
| * @return string | |
| * @throws lang.FormatException in case a conversion error occurs | |
| */ | |
| protected function asIntern($arg, $charset= NULL) { | |
| if ($arg instanceof self) { | |
| return $arg->buffer; | |
| } else if ($arg instanceof Character) { | |
| return $arg->getBytes(STR_ENC)->buffer; | |
| } else if (is_string($arg) || $arg instanceof Bytes) { | |
| $charset= strtoupper($charset ? $charset : iconv_get_encoding('input_encoding')); | |
| // Convert the input to internal encoding | |
| $buffer= iconv($charset, STR_ENC, $arg); | |
| if (xp::errorAt(__FILE__, __LINE__ - 1)) { | |
| $message= key(xp::$registry['errors'][__FILE__][__LINE__ - 2]); | |
| xp::gc(__FILE__); | |
| throw new FormatException($message.($charset == STR_ENC | |
| ? ' with charset '.$charset | |
| : $message.' while converting input from '.$charset.' to '.STR_ENC | |
| )); | |
| } | |
| return $buffer; | |
| } else { | |
| return (string)$arg; | |
| } | |
| } | |
| /** | |
| * Constructor | |
| * | |
| * @param string initial default '' | |
| * @param string charset default NULL | |
| */ | |
| public function __construct($initial= '', $charset= NULL) { | |
| $this->buffer= $this->asIntern($initial, $charset); | |
| $this->length= iconv_strlen($this->buffer, STR_ENC); | |
| } | |
| /** | |
| * = list[] overloading | |
| * | |
| * @param int offset | |
| * @return lang.types.Character | |
| * @throws lang.IndexOutOfBoundsException if key does not exist | |
| */ | |
| public function offsetGet($offset) { | |
| return $this->charAt($offset); | |
| } | |
| /** | |
| * list[]= overloading | |
| * | |
| * @param int offset | |
| * @param var value | |
| * @throws lang.IllegalArgumentException if key is neither numeric (set) nor NULL (add) | |
| */ | |
| public function offsetSet($offset, $value) { | |
| if (!is_int($offset)) { | |
| throw new IllegalArgumentException('Incorrect type '.gettype($offset).' for index'); | |
| } | |
| if ($offset >= $this->length || $offset < 0) { | |
| raise('lang.IndexOutOfBoundsException', 'Offset '.$offset.' out of bounds'); | |
| } | |
| $char= $this->asIntern($value); | |
| if (1 != iconv_strlen($char, STR_ENC)) { | |
| throw new IllegalArgumentException('Set only allows to set one character!'); | |
| } | |
| $this->buffer= ( | |
| iconv_substr($this->buffer, 0, $offset, STR_ENC). | |
| $char. | |
| iconv_substr($this->buffer, $offset+ 1, $this->length, STR_ENC) | |
| ); | |
| } | |
| /** | |
| * isset() overloading | |
| * | |
| * @param int offset | |
| * @return bool | |
| */ | |
| public function offsetExists($offset) { | |
| return ($offset >= 0 && $offset < $this->length); | |
| } | |
| /** | |
| * unset() overloading | |
| * | |
| * @param int offset | |
| */ | |
| public function offsetUnset($offset) { | |
| if ($offset >= $this->length || $offset < 0) { | |
| raise('lang.IndexOutOfBoundsException', 'Offset '.$offset.' out of bounds'); | |
| } | |
| $this->buffer= ( | |
| iconv_substr($this->buffer, 0, $offset, STR_ENC). | |
| iconv_substr($this->buffer, $offset+ 1, $this->length, STR_ENC) | |
| ); | |
| $this->length= iconv_strlen($this->buffer, STR_ENC); | |
| } | |
| /** | |
| * Returns the string's length (the number of characters in this | |
| * string, not the number of bytes) | |
| * | |
| * @return string | |
| */ | |
| public function length() { | |
| return $this->length; | |
| } | |
| /** | |
| * Returns the character at the given position | |
| * | |
| * @param int offset | |
| * @return lang.types.Character | |
| * @throws lang.IndexOutOfBoundsException if key does not exist | |
| */ | |
| public function charAt($offset) { | |
| if ($offset >= $this->length || $offset < 0) { | |
| raise('lang.IndexOutOfBoundsException', 'Offset '.$offset.' out of bounds'); | |
| } | |
| return new Character(iconv_substr($this->buffer, $offset, 1, STR_ENC), STR_ENC); | |
| } | |
| /** | |
| * Returns the index within this string of the first occurrence of | |
| * the specified substring. | |
| * | |
| * @param var arg either a string or a String | |
| * @param int start default 0 | |
| * @return bool | |
| */ | |
| public function indexOf($arg, $start= 0) { | |
| $r= iconv_strpos($this->buffer, $this->asIntern($arg), $start, STR_ENC); | |
| return FALSE === $r ? -1 : $r; | |
| } | |
| /** | |
| * Returns the index within this string of the last occurrence of | |
| * the specified substring. | |
| * | |
| * @param var arg either a string or a String | |
| * @return bool | |
| */ | |
| public function lastIndexOf($arg) { | |
| $r= iconv_strrpos($this->buffer, $this->asIntern($arg), STR_ENC); | |
| return FALSE === $r ? -1 : $r; | |
| } | |
| /** | |
| * Returns a new string that is a substring of this string. | |
| * | |
| * @param int start | |
| * @param int length default 0 | |
| * @return lang.types.String | |
| */ | |
| public function substring($start, $length= 0) { | |
| if (0 === $length) $length= $this->length; | |
| return new self(iconv_substr($this->buffer, $start, $length, STR_ENC), STR_ENC); | |
| } | |
| /** | |
| * Returns whether a given substring is contained in this string | |
| * | |
| * @param var arg | |
| * @return bool | |
| */ | |
| public function contains($arg) { | |
| return -1 != $this->indexOf($arg); | |
| } | |
| /** | |
| * Returns whether a given substring is contained in this string | |
| * | |
| * @param var old | |
| * @param var new default '' | |
| * @return lang.types.String this string | |
| */ | |
| public function replace($old, $new= '') { | |
| $this->buffer= str_replace($this->asIntern($old), $this->asIntern($new), $this->buffer); | |
| $this->length= iconv_strlen($this->buffer, STR_ENC); | |
| return $this; | |
| } | |
| /** | |
| * Concatenates the given argument to the end of this string and returns | |
| * this String so it can be used in chained calls: | |
| * | |
| * <code> | |
| * $s= new String('Hello'); | |
| * $s->concat(' ')->concat('World'); | |
| * </code> | |
| * | |
| * @param var arg | |
| * @return lang.types.String this string | |
| */ | |
| public function concat($arg) { | |
| $this->buffer.= $this->asIntern($arg); | |
| $this->length= iconv_strlen($this->buffer, STR_ENC); | |
| return $this; | |
| } | |
| /** | |
| * Returns whether this string starts with a given argument. | |
| * | |
| * @param var arg either a string or a String | |
| * @return bool | |
| */ | |
| public function startsWith($arg) { | |
| return 0 == $this->indexOf($arg); | |
| } | |
| /** | |
| * Returns whether this string starts with a given argument. | |
| * | |
| * @param var arg either a string or a String | |
| * @return bool | |
| */ | |
| public function endsWith($arg) { | |
| $bytes= $this->asIntern($arg); | |
| return ( | |
| iconv_strlen($this->buffer, STR_ENC) - iconv_strlen($bytes, STR_ENC) === | |
| iconv_strrpos($this->buffer, $bytes, STR_ENC) | |
| ); | |
| } | |
| /** | |
| * Returns whether a given object is equal to this object | |
| * | |
| * @param lang.Generic cmp | |
| * @return bool | |
| */ | |
| public function equals($cmp) { | |
| return $cmp instanceof self && $this->buffer === $cmp->buffer; | |
| } | |
| /** | |
| * Returns a hashcode for this string object | |
| * | |
| * @return string | |
| */ | |
| public function hashCode() { | |
| return md5($this->buffer); | |
| } | |
| /** | |
| * Returns a string representation of this string. Uses the current | |
| * output encoding and transliteration. | |
| * | |
| * @return string | |
| */ | |
| public function toString() { | |
| return iconv(STR_ENC, iconv_get_encoding('output_encoding').'//TRANSLIT', $this->buffer); | |
| } | |
| /** | |
| * Returns a string representation of this string. Uses the current | |
| * output encoding and transliteration. | |
| * | |
| * @return string | |
| */ | |
| public function __toString() { | |
| return iconv(STR_ENC, iconv_get_encoding('output_encoding').'//TRANSLIT', $this->buffer); | |
| } | |
| /** | |
| * Returns the bytes representing this string | |
| * | |
| * @param string charset default 'utf-8' | |
| * @return lang.types.Bytes | |
| */ | |
| public function getBytes($charset= NULL) { | |
| $charset= strtoupper($charset ? $charset : iconv_get_encoding('input_encoding')); | |
| if (STR_ENC === $charset) { | |
| return new Bytes($this->buffer); | |
| } | |
| $bytes= iconv(STR_ENC, $charset, $this->buffer); | |
| if (xp::errorAt(__FILE__, __LINE__ - 1)) { | |
| $message= key(xp::$registry['errors'][__FILE__][__LINE__ - 2]); | |
| xp::gc(__FILE__); | |
| throw new FormatException($message.' while converting input from '.STR_ENC.' to '.$charset); | |
| } | |
| return new Bytes($bytes); | |
| } | |
| } | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /* This class is part of the XP framework | |
| * | |
| * $Id$ | |
| */ | |
| define('STR_ENC', 'utf-8'); | |
| uses('lang.types.Character', 'lang.types.Bytes'); | |
| /** | |
| * Represents a string | |
| * | |
| * @ext iconv | |
| * @ext mbstring | |
| * @test xp://net.xp_framework.unittest.core.types.StringTest | |
| * @purpose Wrapper type | |
| */ | |
| class String extends Object implements ArrayAccess { | |
| protected | |
| $buffer= '', | |
| $length= 0; | |
| public static $EMPTY = NULL; | |
| static function __static() { | |
| self::$EMPTY= new self('', STR_ENC); | |
| } | |
| /** | |
| * Convert a string to internal encoding | |
| * | |
| * @param string string | |
| * @param string charset default NULL | |
| * @return string | |
| * @throws lang.FormatException in case a conversion error occurs | |
| */ | |
| protected function asIntern($arg, $charset= NULL) { | |
| if ($arg instanceof self) { | |
| return $arg->buffer; | |
| } else if ($arg instanceof Character) { | |
| return $arg->getBytes(STR_ENC)->buffer; | |
| } else if (is_string($arg) || $arg instanceof Bytes) { | |
| $charset= strtoupper($charset ? $charset : iconv_get_encoding('input_encoding')); | |
| // Convert the input to internal encoding | |
| $buffer= iconv($charset, STR_ENC, $arg); | |
| if (xp::errorAt(__FILE__, __LINE__ - 1)) { | |
| $message= key(xp::$registry['errors'][__FILE__][__LINE__ - 2]); | |
| xp::gc(__FILE__); | |
| throw new FormatException($message.($charset == STR_ENC | |
| ? ' with charset '.$charset | |
| : $message.' while converting input from '.$charset.' to '.STR_ENC | |
| )); | |
| } | |
| return $buffer; | |
| } else { | |
| return (string)$arg; | |
| } | |
| } | |
| /** | |
| * Constructor | |
| * | |
| * @param string initial default '' | |
| * @param string charset default NULL | |
| */ | |
| public function __construct($initial= '', $charset= NULL) { | |
| if (NULL === $initial) return; | |
| $this->buffer= $this->asIntern($initial, $charset); | |
| $this->length= mb_strlen($this->buffer, STR_ENC); | |
| } | |
| /** | |
| * = list[] overloading | |
| * | |
| * @param int offset | |
| * @return lang.types.Character | |
| * @throws lang.IndexOutOfBoundsException if key does not exist | |
| */ | |
| public function offsetGet($offset) { | |
| return $this->charAt($offset); | |
| } | |
| /** | |
| * list[]= overloading | |
| * | |
| * @param int offset | |
| * @param var value | |
| * @throws lang.IllegalArgumentException if key is neither numeric (set) nor NULL (add) | |
| */ | |
| public function offsetSet($offset, $value) { | |
| if (!is_int($offset)) { | |
| throw new IllegalArgumentException('Incorrect type '.gettype($offset).' for index'); | |
| } | |
| if ($offset >= $this->length || $offset < 0) { | |
| raise('lang.IndexOutOfBoundsException', 'Offset '.$offset.' out of bounds'); | |
| } | |
| $char= $this->asIntern($value); | |
| if (1 != mb_strlen($char, STR_ENC)) { | |
| throw new IllegalArgumentException('Set only allows to set one character!'); | |
| } | |
| $this->buffer= ( | |
| mb_substr($this->buffer, 0, $offset, STR_ENC). | |
| $char. | |
| mb_substr($this->buffer, $offset+ 1, $this->length, STR_ENC) | |
| ); | |
| } | |
| /** | |
| * isset() overloading | |
| * | |
| * @param int offset | |
| * @return bool | |
| */ | |
| public function offsetExists($offset) { | |
| return ($offset >= 0 && $offset < $this->length); | |
| } | |
| /** | |
| * unset() overloading | |
| * | |
| * @param int offset | |
| */ | |
| public function offsetUnset($offset) { | |
| if ($offset >= $this->length || $offset < 0) { | |
| raise('lang.IndexOutOfBoundsException', 'Offset '.$offset.' out of bounds'); | |
| } | |
| $this->buffer= ( | |
| mb_substr($this->buffer, 0, $offset, STR_ENC). | |
| mb_substr($this->buffer, $offset+ 1, $this->length, STR_ENC) | |
| ); | |
| $this->length= mb_strlen($this->buffer, STR_ENC); | |
| } | |
| /** | |
| * Returns the string's length (the number of characters in this | |
| * string, not the number of bytes) | |
| * | |
| * @return string | |
| */ | |
| public function length() { | |
| return $this->length; | |
| } | |
| /** | |
| * Returns the character at the given position | |
| * | |
| * @param int offset | |
| * @return lang.types.Character | |
| * @throws lang.IndexOutOfBoundsException if key does not exist | |
| */ | |
| public function charAt($offset) { | |
| if ($offset >= $this->length || $offset < 0) { | |
| raise('lang.IndexOutOfBoundsException', 'Offset '.$offset.' out of bounds'); | |
| } | |
| return new Character(mb_substr($this->buffer, $offset, 1, STR_ENC), STR_ENC); | |
| } | |
| /** | |
| * Returns the index within this string of the first occurrence of | |
| * the specified substring. | |
| * | |
| * @param var arg either a string or a String | |
| * @param int start default 0 | |
| * @return bool | |
| */ | |
| public function indexOf($arg, $start= 0) { | |
| if ('' === ($needle= $this->asIntern($arg))) return -1; | |
| $r= mb_strpos($this->buffer, $needle, $start, STR_ENC); | |
| return FALSE === $r ? -1 : $r; | |
| } | |
| /** | |
| * Returns the index within this string of the last occurrence of | |
| * the specified substring. | |
| * | |
| * @param var arg either a string or a String | |
| * @return bool | |
| */ | |
| public function lastIndexOf($arg) { | |
| if ('' === ($needle= $this->asIntern($arg))) return -1; | |
| $r= mb_strrpos($this->buffer, $needle, STR_ENC); | |
| return FALSE === $r ? -1 : $r; | |
| } | |
| /** | |
| * Returns a new string that is a substring of this string. | |
| * | |
| * @param int start | |
| * @param int length default 0 | |
| * @return lang.types.String | |
| */ | |
| public function substring($start, $length= 0) { | |
| if (0 === $length) $length= $this->length; | |
| $self= new self(NULL); | |
| $self->buffer= mb_substr($this->buffer, $start, $length, STR_ENC); | |
| $self->length= mb_strlen($self->buffer); | |
| return $self; | |
| } | |
| /** | |
| * Returns whether a given substring is contained in this string | |
| * | |
| * @param var arg | |
| * @return bool | |
| */ | |
| public function contains($arg) { | |
| return -1 != $this->indexOf($arg); | |
| } | |
| /** | |
| * Returns whether a given substring is contained in this string | |
| * | |
| * @param var old | |
| * @param var new default '' | |
| * @return lang.types.String this string | |
| */ | |
| public function replace($old, $new= '') { | |
| $this->buffer= str_replace($this->asIntern($old), $this->asIntern($new), $this->buffer); | |
| $this->length= mb_strlen($this->buffer, STR_ENC); | |
| return $this; | |
| } | |
| /** | |
| * Concatenates the given argument to the end of this string and returns | |
| * this String so it can be used in chained calls: | |
| * | |
| * <code> | |
| * $s= new String('Hello'); | |
| * $s->concat(' ')->concat('World'); | |
| * </code> | |
| * | |
| * @param var arg | |
| * @return lang.types.String this string | |
| */ | |
| public function concat($arg) { | |
| $this->buffer.= $this->asIntern($arg); | |
| $this->length= mb_strlen($this->buffer, STR_ENC); | |
| return $this; | |
| } | |
| /** | |
| * Returns whether this string starts with a given argument. | |
| * | |
| * @param var arg either a string or a String | |
| * @return bool | |
| */ | |
| public function startsWith($arg) { | |
| return 0 == $this->indexOf($arg); | |
| } | |
| /** | |
| * Returns whether this string starts with a given argument. | |
| * | |
| * @param var arg either a string or a String | |
| * @return bool | |
| */ | |
| public function endsWith($arg) { | |
| $bytes= $this->asIntern($arg); | |
| return ( | |
| mb_strlen($this->buffer, STR_ENC) - mb_strlen($bytes, STR_ENC) === | |
| mb_strrpos($this->buffer, $bytes, STR_ENC) | |
| ); | |
| } | |
| /** | |
| * Returns whether a given object is equal to this object | |
| * | |
| * @param lang.Generic cmp | |
| * @return bool | |
| */ | |
| public function equals($cmp) { | |
| return $cmp instanceof self && $this->buffer === $cmp->buffer; | |
| } | |
| /** | |
| * Returns a hashcode for this string object | |
| * | |
| * @return string | |
| */ | |
| public function hashCode() { | |
| return md5($this->buffer); | |
| } | |
| /** | |
| * Returns a string representation of this string. Uses the current | |
| * output encoding and transliteration. | |
| * | |
| * @return string | |
| */ | |
| public function toString() { | |
| return iconv(STR_ENC, iconv_get_encoding('output_encoding').'//TRANSLIT', $this->buffer); | |
| } | |
| /** | |
| * Returns a string representation of this string. Uses the current | |
| * output encoding and transliteration. | |
| * | |
| * @return string | |
| */ | |
| public function __toString() { | |
| return iconv(STR_ENC, iconv_get_encoding('output_encoding').'//TRANSLIT', $this->buffer); | |
| } | |
| /** | |
| * Returns the bytes representing this string | |
| * | |
| * @param string charset default 'utf-8' | |
| * @return lang.types.Bytes | |
| */ | |
| public function getBytes($charset= NULL) { | |
| $charset= strtoupper($charset ? $charset : iconv_get_encoding('input_encoding')); | |
| if (STR_ENC === $charset) { | |
| return new Bytes($this->buffer); | |
| } | |
| $bytes= iconv(STR_ENC, $charset, $this->buffer); | |
| if (xp::errorAt(__FILE__, __LINE__ - 1)) { | |
| $message= key(xp::$registry['errors'][__FILE__][__LINE__ - 2]); | |
| xp::gc(__FILE__); | |
| throw new FormatException($message.' while converting input from '.STR_ENC.' to '.$charset); | |
| } | |
| return new Bytes($bytes); | |
| } | |
| } | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| use \util\profiling\Timer; | |
| class Test extends Object { | |
| public static function main($args) { | |
| foreach (['', 'Hello World', file_get_contents(__FILE__), file_get_contents('README.md')] as $fixture) { | |
| $t= Timer::measure(function() use($fixture) { | |
| $s= new String($fixture); | |
| for ($i= 0; $i < 10000; $i++) { | |
| $s->substring(1, 5); | |
| $s->contains('hello'); | |
| $s->endsWith('end'); | |
| $s->startsWith('begin'); | |
| $s->indexOf('a'); | |
| $s->lastIndexOf('b'); | |
| } | |
| }); | |
| Console::writeLinef('%d bytes: %.3f seconds', strlen($fixture), $t->elapsedTime()); | |
| } | |
| } | |
| } | |
| ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment