Created
June 23, 2022 00:32
-
-
Save collei/6186278d37d944de3fd4038339fff275 to your computer and use it in GitHub Desktop.
A class used to convert string representations of php arrays to a live array without using eval()
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @author user1441149 from https://stackoverflow.com/ | |
* @since 2015-06-14 | |
* @link https://stackoverflow.com/a/30833466 | |
* @link https://stackoverflow.com/questions/12212796/parse-string-as-array-in-php/30833466#30833466 | |
* | |
* A class used to convert string representations of php arrays | |
* to a live array without using eval() | |
* | |
* as its author said: | |
* "Here is something I have been working on. There are no unit tests yet, | |
* but it seems to work pretty well. | |
* I do not support the use of functions, instantiation of objects, | |
* conditionals, etc. from within the array structure. | |
* I don't want to support those for my use case. But feel free to add | |
* whatever functionality you need." | |
* | |
*/ | |
class ArrayTokenScanner | |
{ | |
/** @var array */ | |
protected $arrayKeys = []; | |
/** | |
* @param string $string e.g. array('foo' => 123, 'bar' => [0 => 123, 1 => 12345]) | |
* | |
* @return array | |
*/ | |
public function scan($string) | |
{ | |
// Remove whitespace and semi colons | |
$sanitized = trim($string, " \t\n\r\0\x0B;"); | |
if(preg_match('/^(\[|array\().*(\]|\))$/', $sanitized)) { | |
if($tokens = $this->tokenize("<?php {$sanitized}")) { | |
$this->initialize($tokens); | |
return $this->parse($tokens); | |
} | |
} | |
// Given array format is invalid | |
throw new InvalidArgumentException("Invalid array format."); | |
} | |
/** | |
* @param array $tokens | |
*/ | |
protected function initialize(array $tokens) | |
{ | |
$this->arrayKeys = []; | |
while($current = current($tokens)) { | |
$next = next($tokens); | |
if(($next[0] ?? '') === T_DOUBLE_ARROW) { | |
$this->arrayKeys[] = $current[1]; | |
} | |
} | |
} | |
/** | |
* @param array $tokens | |
* @return array | |
*/ | |
protected function parse(array &$tokens) | |
{ | |
$array = []; | |
$token = current($tokens); | |
if(in_array($token[0], [T_ARRAY, T_BRACKET_OPEN])) { | |
// Is array! | |
$assoc = false; | |
$index = 0; | |
$discriminator = ($token[0] === T_ARRAY) ? T_ARRAY_CLOSE : T_BRACKET_CLOSE; | |
while($token = $this->until($tokens, $discriminator)) { | |
// Skip arrow ( => ) | |
if(in_array($token[0], [T_DOUBLE_ARROW])) { | |
continue; | |
} | |
// Reset associative array key | |
if($token[0] === T_COMMA_SEPARATOR) { | |
$assoc = false; | |
continue; | |
} | |
// Look for array keys | |
$next = next($tokens); | |
prev($tokens); | |
if($next[0] === T_DOUBLE_ARROW) { | |
// Is assoc key | |
$assoc = $token[1]; | |
if(preg_match('/^-?(0|[1-9][0-9]*)$/', $assoc)) { | |
$index = $assoc = (int) $assoc; | |
} | |
continue; | |
} | |
// Parse array contents recursively | |
if(in_array($token[0], [T_ARRAY, T_BRACKET_OPEN])) { | |
$array[($assoc !== false) ? $assoc : $this->createKey($index)] = $this->parse($tokens); | |
continue; | |
} | |
// Parse atomic string | |
if(in_array($token[0], [T_STRING, T_NUM_STRING, T_CONSTANT_ENCAPSED_STRING])) { | |
$array[($assoc !== false) ? $assoc : $this->createKey($index)] = $this->parseAtomic($token[1]); | |
} | |
// Parse atomic number | |
if(in_array($token[0], [T_LNUMBER, T_DNUMBER])) { | |
// Check if number is negative | |
$prev = prev($tokens); | |
$value = $token[1]; | |
if($prev[0] === T_MINUS) { | |
$value = "-{$value}"; | |
} | |
next($tokens); | |
$array[($assoc !== false) ? $assoc : $this->createKey($index)] = $this->parseAtomic($value); | |
} | |
// Increment index unless a associative key is used. In this case we want too reuse the current value. | |
if(!is_string($assoc)) { | |
$index++; | |
} | |
} | |
return $array; | |
} | |
} | |
/** | |
* @param array $tokens | |
* @param int|string $discriminator | |
* | |
* @return array|false | |
*/ | |
protected function until(array &$tokens, $discriminator) | |
{ | |
$next = next($tokens); | |
if($next === false or $next[0] === $discriminator) { | |
return false; | |
} | |
return $next; | |
} | |
protected function createKey(&$index) | |
{ | |
do { | |
if(!in_array($index, $this->arrayKeys, true)) { | |
return $index; | |
} | |
} while(++$index); | |
} | |
/** | |
* @param $string | |
* @return array|false | |
*/ | |
protected function tokenize($string) | |
{ | |
$tokens = token_get_all($string); | |
if(is_array($tokens)) { | |
// Filter tokens | |
$tokens = array_values(array_filter($tokens, [$this, 'accept'])); | |
// Normalize token format, make syntax characters look like tokens for consistent parsing | |
return $this->normalize($tokens); | |
} | |
return false; | |
} | |
/** | |
* Method used to accept or deny tokens so that we only have to deal with the allowed tokens | |
* | |
* @param array|string $value A token or syntax character | |
* @return bool | |
*/ | |
protected function accept($value) | |
{ | |
if(is_string($value)) { | |
// Allowed syntax characters: comma's and brackets. | |
return in_array($value, [',', '[', ']', ')', '-']); | |
} | |
if(!in_array($value[0], [T_ARRAY, T_CONSTANT_ENCAPSED_STRING, T_DOUBLE_ARROW, T_STRING, T_NUM_STRING, T_LNUMBER, T_DNUMBER])) { | |
// Token did not match requirement. The token is not listed in the collection above. | |
return false; | |
} | |
// Token is accepted. | |
return true; | |
} | |
/** | |
* Normalize tokens so that each allowed syntax character looks like a token for consistent parsing. | |
* | |
* @param array $tokens | |
* | |
* @return array | |
*/ | |
protected function normalize(array $tokens) | |
{ | |
// Define some constants for consistency. These characters are not "real" tokens. | |
defined('T_MINUS') ?: define('T_MINUS', '-'); | |
defined('T_BRACKET_OPEN') ?: define('T_BRACKET_OPEN', '['); | |
defined('T_BRACKET_CLOSE') ?: define('T_BRACKET_CLOSE', ']'); | |
defined('T_COMMA_SEPARATOR') ?: define('T_COMMA_SEPARATOR', ','); | |
defined('T_ARRAY_CLOSE') ?: define('T_ARRAY_CLOSE', ')'); | |
// Normalize the token array | |
return array_map( function($token) { | |
// If the token is a syntax character ($token[0] will be string) than use the token (= $token[0]) as value (= $token[1]) as well. | |
return [ | |
0 => $token[0], | |
1 => (is_string($token[0])) ? $token[0] : $token[1] | |
]; | |
}, $tokens); | |
} | |
/** | |
* @param $value | |
* | |
* @return mixed | |
*/ | |
protected function parseAtomic($value) | |
{ | |
// If the parameter type is a string than it will be enclosed with quotes | |
if(preg_match('/^["\'].*["\']$/', $value)) { | |
// is (already) a string | |
return $value; | |
} | |
// Parse integer | |
if(preg_match('/^-?(0|[1-9][0-9]*)$/', $value)) { | |
return (int) $value; | |
} | |
// Parse other sorts of numeric values (floats, scientific notation etc) | |
if(is_numeric($value)) { | |
return (float) $value; | |
} | |
// Parse bool | |
if(in_array(strtolower($value), ['true', 'false'])) { | |
return ($value == 'true') ? true : false; | |
} | |
// Parse null | |
if(strtolower($value) === 'null') { | |
return null; | |
} | |
// Use string for any remaining values. | |
// For example, bitsets are not supported. 0x2,1x2 etc | |
return $value; | |
} | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
include './ArrayTokenScanner.php'; | |
$tokenScanner = new ArrayTokenScanner(); | |
$stringWithPhpArrayCode = '[array("foo" => -123, "foobie" => "5x2", "bar" => \'456\', 111 => 12, "bar", ["null" => null], "bool" => false), 123 => E_ERROR];'; | |
$array1 = $tokenScanner->scan($stringWithPhpArrayCode); | |
?> | |
<fieldset> | |
<legend>Array #1</legend> | |
<fieldset> | |
<legend>Original String</legend> | |
<?=($stringWithPhpArrayCode)?> | |
</fieldset> | |
<fieldset> | |
<legend>Resulting Array</legend> | |
<pre><?=(print_r($array1, true))?></pre> | |
</fieldset> | |
</fieldset> | |
<?php | |
$stringWithAnotherphpArrayCode = '[array("foo" => 123, "foobie" => "5x2", "bar" => \'456\', 111 => 12, "bar", ["null" => null], "bool" => false), 123 => E_ERROR];'; | |
$array2 = $tokenScanner->scan($stringWithAnotherphpArrayCode); | |
?> | |
<fieldset> | |
<legend>Array #2</legend> | |
<fieldset> | |
<legend>Original String</legend> | |
<?=($stringWithAnotherphpArrayCode)?> | |
</fieldset> | |
<fieldset> | |
<legend>Resulting Array</legend> | |
<pre><?=(print_r($array2, true))?></pre> | |
</fieldset> | |
</fieldset> | |
<?php | |
$aComplexConfigString = "[ | |
'plugin' => 'Bacon/BaconQrCode | |
with delays from another irls', | |
'description' => 'QR Code Generator for PHP', | |
'version' => '2022-04-22T12:39:45Z', | |
'dependencies' => [ | |
], | |
'classes_folder' => 'src', | |
]"; | |
// USING implode(explode()) BECAUSE THE CLASS DOES NOT SUPPORT MULTILINE STRINGS | |
// (like those we find in php config files, for example) | |
$array3 = $tokenScanner->scan(implode('',explode("\r\n",$aComplexConfigString))); | |
?> | |
<fieldset> | |
<legend>Array #2</legend> | |
<fieldset> | |
<legend>Original String</legend> | |
<?=($aComplexConfigString)?> | |
</fieldset> | |
<fieldset> | |
<legend>Resulting Array</legend> | |
<pre><?=(print_r($array3, true))?></pre> | |
</fieldset> | |
</fieldset> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment