Created
February 23, 2025 19:29
-
-
Save oli-laban/f98375b73bed82d8eb808e8c071f3da6 to your computer and use it in GitHub Desktop.
Opinionated class for converting Unicode Technical Standard #35 date format strings (i.e. date-fns) to PHP compatible format strings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Support\I18n; | |
class DateFormatConverter | |
{ | |
/** | |
* A map of UTS #35 date format tokens to PHP DateTime format tokens. | |
* | |
* A UTS token key with a trailing + will match to any token as long or longer than the number | |
* of chars in the key (minus the +). | |
* | |
* @var array<string, string> | |
*/ | |
private array $formatMap = [ | |
// Era | |
'G' => '', | |
'GG' => '', | |
'GGG' => '', | |
'GGGG' => '', | |
'GGGGG' => '', | |
// Year | |
'y' => 'Y', | |
'yy' => 'y', | |
'yyy+' => 'Y', | |
'Y+' => 'o', | |
'u+' => '', | |
'U' => '', | |
'UU' => '', | |
'UUU' => '', | |
'UUUU' => '', | |
'UUUUU' => '', | |
'r+' => '', | |
// Quarter | |
'Q' => '', | |
'QQ' => '', | |
'QQQ' => '', | |
'QQQQ' => '', | |
'QQQQQ' => '', | |
'q' => '', | |
'qq' => '', | |
'qqq' => '', | |
'qqqq' => '', | |
'qqqqq' => '', | |
// Month | |
'M' => 'n', | |
'MM' => 'm', | |
'MMM' => 'M', | |
'MMMM' => 'F', | |
'MMMMM' => 'M', | |
'L' => 'n', | |
'LL' => 'm', | |
'LLL' => 'M', | |
'LLLL' => 'F', | |
'LLLLL' => 'M', | |
// Week | |
'w' => 'W', | |
'ww' => 'W', | |
'W' => '', | |
// Day | |
'd' => 'j', | |
'dd' => 'd', | |
'D' => 'z', | |
'DD' => 'z', | |
'DDD' => 'z', | |
'F' => '', | |
'g+' => '', | |
// Weekday | |
'E' => 'D', | |
'EE' => 'D', | |
'EEE' => 'D', | |
'EEEE' => 'l', | |
'EEEEE' => 'D', | |
'EEEEEE' => 'D', | |
'e' => 'N', | |
'ee' => 'N', | |
'eee' => 'D', | |
'eeee' => 'l', | |
'eeeee' => 'D', | |
'eeeeee' => 'D', | |
'c' => 'N', | |
'cc' => 'N', | |
'ccc' => 'D', | |
'cccc' => 'l', | |
'ccccc' => 'D', | |
'cccccc' => 'D', | |
'ccccccc' => 'D', | |
// Period | |
'a' => 'a', | |
'aa' => 'a', | |
'aaa' => 'a', | |
'aaaa' => 'a', | |
'aaaaa' => 'a', | |
'b' => 'a', | |
'bb' => 'a', | |
'bbb' => 'a', | |
'bbbb' => 'a', | |
'bbbbb' => 'a', | |
'B' => '', | |
'BB' => '', | |
'BBB' => '', | |
'BBBB' => '', | |
'BBBBB' => '', | |
// Hour | |
'h' => 'g', | |
'hh' => 'h', | |
'H' => 'G', | |
'HH' => 'H', | |
'K' => 'g', | |
'KK' => 'h', | |
'k' => 'G', | |
'kk' => 'H', | |
'j' => '', | |
'jj' => '', | |
'jjj' => '', | |
'jjjj' => '', | |
'jjjjj' => '', | |
'jjjjjj' => '', | |
'J' => '', | |
'JJ' => '', | |
'C' => '', | |
'CC' => '', | |
'CCC' => '', | |
'CCCC' => '', | |
'CCCCC' => '', | |
'CCCCCC' => '', | |
// Minute | |
'm' => 'i', | |
'mm' => 'i', | |
// Second | |
's' => 's', | |
'ss' => 's', | |
'S+' => 'u', | |
'A+' => '', | |
// Timezone | |
'z' => 'T', | |
'zz' => 'T', | |
'zzz' => 'T', | |
'zzzz' => 'e', | |
'Z' => 'O', | |
'ZZ' => 'O', | |
'ZZZ' => 'O', | |
'ZZZZ' => '\G\M\TP', | |
'ZZZZZ' => 'P', | |
'V' => '', | |
'VV' => 'e', | |
'VVV' => 'e', | |
'VVVV' => 'e', | |
'O' => '\G\M\TP', | |
'OOOO' => '\G\M\TP', | |
'X' => 'p', | |
'XX' => 'p', | |
'XXX' => 'p', | |
'XXXX' => 'p', | |
'XXXXX' => 'p', | |
'x' => 'O', | |
'xx' => 'O', | |
'xxx' => 'P', | |
'xxxx' => 'O', | |
'xxxxx' => 'P', | |
]; | |
/** | |
* Attempt to convert a UTS #35 date format string to a PHP DateTime format string. | |
* | |
* As there are major differences between the formats, if there is no applicable conversion for | |
* a token, it is simply omitted. Otherwise a closest match (or exact match) is used. | |
* | |
* Important: In anything but simple cases this will not be a 1:1 match, so should not be used | |
* wherever an exact format is required. | |
* | |
* Format references: | |
* - https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns | |
* - https://www.php.net/manual/en/datetime.format.php | |
* | |
* @param string $utsString | |
* @return string | |
*/ | |
public function convertUtsToPhp(string $utsString): string | |
{ | |
$phpString = ''; | |
if (empty($utsString)) { | |
return ''; | |
} | |
for ($i = 0, $length = strlen($utsString); $i < $length; $i++) { | |
$char = $utsString[$i]; | |
if ($char !== "'" && ! ctype_alpha($char)) { | |
$phpString .= $char; | |
continue; | |
} | |
if ($char === "'") { | |
// Two consecutive single quotes always resolves to a single quote. | |
if (! empty($utsString[$i + 1]) && $utsString[$i + 1] === "'") { | |
$phpString .= "'"; | |
$i += 2; | |
continue; | |
} | |
// If the start of escape sequence is at the end of the string, ignore it. | |
if ($i === $length - 1) { | |
continue; | |
} | |
// Match anything up until the next *single* single quote, or the end of the string | |
// to find the full escaped sequence. | |
preg_match( | |
pattern: "/^((?:[^']|'')*?)(?:'(?!')|$)/", | |
subject: substr($utsString, $i + 1), | |
matches: $matches, | |
); | |
$escaped = $matches[0] ?? null; | |
if (! $escaped) { | |
continue; | |
} | |
$phpString .= $this->handleEscapedSegment($escaped); | |
$i += strlen($escaped); | |
continue; | |
} | |
$token = $this->getTokenAtIndex($utsString, $i); | |
$phpToken = $this->convertUtsTokenToPhp($token); | |
$phpString .= $phpToken; | |
$i += strlen($token) - 1; | |
} | |
return trim($phpString); | |
} | |
/** | |
* Find the full UTS #35 token starting at the provided index. | |
* | |
* All tokens are simply repetitions of the same char, so loop until | |
* a different char is encountered. | |
* | |
* @param string $string | |
* @param int $index | |
* @return string | |
*/ | |
private function getTokenAtIndex(string $string, int $index): string | |
{ | |
$char = $string[$index]; | |
$result = $char; | |
for ($i = $index + 1, $length = strlen($string); $i < $length; $i++) { | |
if ($string[$i] === $char) { | |
$result .= $char; | |
} else { | |
break; | |
} | |
} | |
return $result; | |
} | |
/** | |
* Convert the provided UTS #35 token to its PHP counterpart using the $formatMap. | |
* | |
* @param string $utcToken | |
* @return string | |
*/ | |
private function convertUtsTokenToPhp(string $utcToken): string | |
{ | |
if (! empty($this->formatMap[$utcToken])) { | |
return $this->formatMap[$utcToken]; | |
} | |
$tokenChar = $utcToken[0]; | |
$extendedToken = array_find_key( | |
$this->formatMap, | |
function (string $php, string $utc) use ($utcToken, $tokenChar) { | |
$length = strlen($utc); | |
if ($utc[0] !== $tokenChar || $utc[$length - 1] !== '+') { | |
return false; | |
} | |
return strlen($utcToken) >= $length - 1; | |
} | |
); | |
return $this->formatMap[$extendedToken] ?? ''; | |
} | |
/** | |
* Convert a UTS #35 escaped segment to its PHP counterpart. | |
* | |
* Escaped text will be appended as-is, while escaping any necessary PHP tokens with | |
* a backslash. | |
* | |
* @param string $segment | |
* @return string | |
*/ | |
private function handleEscapedSegment(string $segment): string | |
{ | |
// Even within escaped text, two consecutive single quotes resolves to a single quote. | |
$segment = str_replace("''", "'", $segment); | |
$segment = rtrim($segment, "'"); | |
$result = ''; | |
$escapePhpChars = [ | |
'd', 'D', 'j', 'l', 'N', 'S', 'w', 'z', 'W', 'F', 'm', 'M', 'n', 't', 'L', 'o', 'X', 'x', 'Y', 'y', 'a', | |
'A', 'B', 'g', 'G', 'h', 'H', 'i', 's', 'u', 'v', 'e', 'I', 'O', 'P', 'p', 'T', 'Z', 'c', 'r', 'u', | |
]; | |
for ($i = 0, $length = strlen($segment); $i < $length; $i++) { | |
if (in_array($segment[$i], $escapePhpChars, true)) { | |
$result .= '\\'.$segment[$i]; | |
} else { | |
$result .= $segment[$i]; | |
} | |
} | |
return $result; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
use App\Support\I18n\DateFormatConverter; | |
beforeEach(function () { | |
$this->converter = new DateFormatConverter; | |
}); | |
it('correctly converts uts formats to php formats', function (string $uts, string $expected) { | |
$php = $this->converter->convertUtsToPhp($uts); | |
expect($php)->toBe($expected); | |
})->with([ | |
['yyyy-MM-dd', 'Y-m-d'], | |
['dd/MM/yyyy', 'd/m/Y'], | |
['MM/dd/yyyy', 'm/d/Y'], | |
['HH:mm:ss', 'H:i:s'], | |
['hh:mm a', 'h:i a'], | |
['HH:mm', 'H:i'], | |
['yyyy-MM-dd HH:mm:ss', 'Y-m-d H:i:s'], | |
['dd/MM/yyyy hh:mm a', 'd/m/Y h:i a'], | |
['MMMM d, yyyy', 'F j, Y'], | |
['MMM d, yyyy', 'M j, Y'], | |
['M/d/yyyy', 'n/j/Y'], | |
['EEEE, MMMM d, yyyy', 'l, F j, Y'], | |
['EEE, MMM d, yyyy', 'D, M j, Y'], | |
['w', 'W'], | |
['ww', 'W'], | |
['yy', 'y'], | |
['yyyy', 'Y'], | |
['YYYY', 'o'], | |
['h', 'g'], | |
['hh', 'h'], | |
['H', 'G'], | |
['HH', 'H'], | |
['h a', 'g a'], | |
['ZZZZ', '\G\M\TP'], | |
['ZZZZZ', 'P'], | |
['zzzz', 'e'], | |
['X', 'p'], | |
['XXXX', 'p'], | |
['yyy', 'Y'], | |
['yyyy', 'Y'], | |
['yyyyy', 'Y'], | |
["'Year: 'yyyy", '\\Y\\e\\a\\r: Y'], | |
["MMMM 'in the year' yyyy", 'F \\i\\n \\t\\h\\e \\y\\e\\a\\r Y'], | |
["'T'HH:mm:ss", '\\TH:i:s'], | |
["'Date:'yyyy-MM-dd", '\\D\\a\\t\\e:Y-m-d'], | |
["'o''clock' HH:mm", "\\o'\\c\\l\\o\\ck H:i"], | |
["yyyy'W'ww", 'Y\\WW'], | |
["yyyy-MM-dd'T'HH:mm", 'Y-m-d\\TH:i'], | |
['', ''], | |
["'", ''], | |
["''", "'"], | |
["'''", "'"], | |
["''''''", "''"], | |
["'test", '\\t\\e\\s\\t'], | |
['GG yyyy', 'Y'], | |
['yyyy QQQ', 'Y'], | |
['yyyy-MM-dd BBBBB', 'Y-m-d'], | |
['YYYY-MM-dd UU', 'o-m-d'], | |
['yyyy-MM-dd CC', 'Y-m-d'], | |
['yyyy-MM-dd\'T\'HH:mm:ssXXX', 'Y-m-d\\TH:i:sp'], | |
['yyyy-MM-dd\'T\'HH:mm:ss.SSSXXX', 'Y-m-d\\TH:i:s.up'], | |
['dd-MMM-yyyy', 'd-M-Y'], | |
['MM/dd/yyyy hh:mm:ss a', 'm/d/Y h:i:s a'], | |
["'Calendar'", 'C\\a\\l\\e\\n\\d\\a\\r'], | |
["'Monday'", '\\M\\o\\n\\d\\a\\y'], | |
["'Time'", '\\T\\i\\m\\e'], | |
["'Zone'", '\\Z\\o\\n\\e'], | |
["'PHP'", '\\P\\H\\P'], | |
["'Format'", '\\F\\o\\r\\m\\a\\t'], | |
["'Test-_/'", '\\T\\e\\s\\t-_/'], | |
[ | |
"'dDjlNSwzWFmMntLoXxYyaABgGhHisuveIOPpTZcru'", | |
'\\d\\D\\j\\l\\N\\S\\w\\z\\W\\F\\m\\M\\n\\t\\L\\o\\X\\x\\Y\\y\\a\\A\\B\\g\\G\\h\\H\\i\\s\\u\\v\\e\\I\\O\\P\\p\\T\\Z\\c\\r\\u', | |
], | |
]); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment