Last active
July 14, 2023 12:51
-
-
Save rdlowrey/5f56cc540099de9d5006 to your computer and use it in GitHub Desktop.
Remove dot segments from a URI path according to RFC3986 Section 5.2.4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Remove dot segments from a URI path according to RFC3986 Section 5.2.4 | |
* | |
* @param $path | |
* @return string | |
* @link http://www.ietf.org/rfc/rfc3986.txt | |
*/ | |
function removeDotPathSegments($path) { | |
if (strpos($path, '.') === false) { | |
return $path; | |
} | |
$inputBuffer = $path; | |
$outputStack = []; | |
/** | |
* 2. While the input buffer is not empty, loop as follows: | |
*/ | |
while ($inputBuffer != '') { | |
/** | |
* A. If the input buffer begins with a prefix of "../" or "./", | |
* then remove that prefix from the input buffer; otherwise, | |
*/ | |
if (strpos($inputBuffer, "./") === 0) { | |
$inputBuffer = substr($inputBuffer, 2); | |
continue; | |
} | |
if (strpos($inputBuffer, "../") === 0) { | |
$inputBuffer = substr($inputBuffer, 3); | |
continue; | |
} | |
/** | |
* B. if the input buffer begins with a prefix of "/./" or "/.", | |
* where "." is a complete path segment, then replace that | |
* prefix with "/" in the input buffer; otherwise, | |
*/ | |
if ($inputBuffer === "/.") { | |
$outputStack[] = '/'; | |
break; | |
} | |
if (substr($inputBuffer, 0, 3) === "/./") { | |
$inputBuffer = substr($inputBuffer, 2); | |
continue; | |
} | |
/** | |
* C. if the input buffer begins with a prefix of "/../" or "/..", | |
* where ".." is a complete path segment, then replace that | |
* prefix with "/" in the input buffer and remove the last | |
* segment and its preceding "/" (if any) from the output | |
* buffer; otherwise, | |
*/ | |
if ($inputBuffer === "/..") { | |
array_pop($outputStack); | |
$outputStack[] = '/'; | |
break; | |
} | |
if (substr($inputBuffer, 0, 4) === "/../") { | |
array_pop($outputStack); | |
$inputBuffer = substr($inputBuffer, 3); | |
continue; | |
} | |
/** | |
* D. if the input buffer consists only of "." or "..", then remove | |
* that from the input buffer; otherwise, | |
*/ | |
if ($inputBuffer === '.' || $inputBuffer === '..') { | |
break; | |
} | |
/** | |
* E. move the first path segment in the input buffer to the end of | |
* the output buffer, including the initial "/" character (if | |
* any) and any subsequent characters up to, but not including, | |
* the next "/" character or the end of the input buffer. | |
*/ | |
if (($slashPos = stripos($inputBuffer, '/', 1)) === false) { | |
$outputStack[] = $inputBuffer; | |
break; | |
} else { | |
$outputStack[] = substr($inputBuffer, 0, $slashPos); | |
$inputBuffer = substr($inputBuffer, $slashPos); | |
} | |
} | |
return implode($outputStack); | |
} | |
// --- Test --- | |
$expectations = [ | |
['bar', '../bar'], | |
['bar', './bar'], | |
['bar', '.././bar'], | |
['bar', '.././bar'], | |
['/foo/bar', '/foo/./bar'], | |
['/bar/', '/bar/./'], | |
['/', '/.'], | |
['/bar/', '/bar/.'], | |
['/bar', '/foo/../bar'], | |
['/', '/bar/../'], | |
['/', '/..'], | |
['/', '/bar/..'], | |
['/foo/', '/foo/bar/..'], | |
['', '.'], | |
['', '..'], | |
]; | |
foreach ($expectations as $struct) { | |
list($expected, $input) = $struct; | |
$result = removeDotPathSegments($input); | |
echo ($expected === $result) ? "Success\n" : "Failure: {$expected} !== {$result}\n"; | |
} |
PHP 8.2 version:
/**
* https://gist.github.com/rdlowrey/5f56cc540099de9d5006.
*
* Remove dot segments from a URI path according to RFC3986 Section 5.2.4
*
* @return string
*
* @see http://www.ietf.org/rfc/rfc3986.txt
*/
function normalizePath(string $path)
{
if (!str_contains($path, '.')) {
return $path;
}
$inputBuffer = $path;
$outputStack = [];
// 2. While the input buffer is not empty, loop as follows:
while ('' !== $inputBuffer) {
/*
* A. If the input buffer begins with a prefix of "../" or "./",
* then remove that prefix from the input buffer; otherwise,
*/
if (str_starts_with($inputBuffer, './')) {
$inputBuffer = substr($inputBuffer, 2);
continue;
}
if (str_starts_with($inputBuffer, '../')) {
$inputBuffer = substr($inputBuffer, 3);
continue;
}
/*
* B. if the input buffer begins with a prefix of "/./" or "/.",
* where "." is a complete path segment, then replace that
* prefix with "/" in the input buffer; otherwise,
*/
if ('/.' === $inputBuffer) {
$outputStack[] = '/';
break;
}
if (str_starts_with($inputBuffer, '/./')) {
$inputBuffer = substr($inputBuffer, 2);
continue;
}
/*
* C. if the input buffer begins with a prefix of "/../" or "/..",
* where ".." is a complete path segment, then replace that
* prefix with "/" in the input buffer and remove the last
* segment and its preceding "/" (if any) from the output
* buffer; otherwise,
*/
if ('/..' === $inputBuffer) {
array_pop($outputStack);
$outputStack[] = '/';
break;
}
if (str_starts_with($inputBuffer, '/../')) {
array_pop($outputStack);
$inputBuffer = substr($inputBuffer, 3);
continue;
}
/*
* D. if the input buffer consists only of "." or "..", then remove
* that from the input buffer; otherwise,
*/
if ('.' === $inputBuffer || '..' === $inputBuffer) {
break;
}
/*
* E. move the first path segment in the input buffer to the end of
* the output buffer, including the initial "/" character (if
* any) and any subsequent characters up to, but not including,
* the next "/" character or the end of the input buffer.
*/
if (($slashPos = stripos($inputBuffer, '/', 1)) === false) {
$outputStack[] = $inputBuffer;
break;
}
$outputStack[] = substr($inputBuffer, 0, $slashPos);
$inputBuffer = substr($inputBuffer, $slashPos);
}
return implode('', $outputStack);
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Very useful, thanks!