Last active
September 26, 2015 00:37
-
-
Save MidnightLightning/1011017 to your computer and use it in GitHub Desktop.
Nested Substrings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Return a substring, honoring nested strings | |
* | |
* Find from "start" to "end" in a given "haystack" string, | |
* but if a second "start" is encountered before the first "end", | |
* skip over the nested repetition and return all the way up to the proper "end" for the outer "start" | |
* | |
* @author Brooks Boyd <[email protected]> | |
* @param string $str The "haystack" string to search through for a substring | |
* @param string $start A regular expression to be passed to preg_match() to find the beginning of the string to extract | |
* @param string $end A regular expression to be passed to preg_match() to find the ending of the string to extract | |
* @param int $offset Optional parameter to indicate how far into $str to start searching | |
* @return bool|string Returns FALSE if error was encountered, or returns the substring (including $start and $end pieces) | |
*/ | |
function find_nested($str, $start, $end, $offset = 0) { | |
if (empty($str) || empty($start) || empty($end)) return false; | |
if (preg_match($start, $str, $matches, PREG_OFFSET_CAPTURE, $offset)) { | |
// Match was found | |
$i = $matches[0][1]; // Grab the matched location | |
$beginning = $i; // Where the target starts | |
$level = 1; // One level deep at this point | |
$offset = $i+strlen($matches[0][0]); | |
while(true) { | |
if (!preg_match($end, $str, $matches, PREG_OFFSET_CAPTURE, $offset)) { | |
trigger_error('No ending string found', E_USER_WARNING); | |
return false; | |
} | |
$next_end = $matches[0][1]; | |
$end_str = $matches[0][0]; | |
preg_match($start, $str, $matches, PREG_OFFSET_CAPTURE, $offset); | |
$next_start = (count($matches) > 0)? $matches[0][1] : false; | |
$start_str = (count($matches) > 0)? $matches[0][0] : false; | |
if ($next_start !== false && $next_start < $next_end) { | |
// There's a nested item here | |
$level++; | |
$offset = $next_start+strlen($start_str); | |
} else { | |
$level--; | |
$offset = $next_end+strlen($end_str); | |
} | |
if ($level == 0) { | |
// That's it, we're done! | |
return substr($str, $beginning, $offset-$beginning); | |
} | |
if ($level > 100) { | |
trigger_error('Too many levels deep!', E_USER_WARNING); | |
return false; | |
} | |
} | |
} else { | |
trigger_error('No starting string found', E_USER_WARNING); | |
return false; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment