Skip to content

Instantly share code, notes, and snippets.

@oli-laban
Created February 23, 2025 19:29
Show Gist options
  • Save oli-laban/f98375b73bed82d8eb808e8c071f3da6 to your computer and use it in GitHub Desktop.
Save oli-laban/f98375b73bed82d8eb808e8c071f3da6 to your computer and use it in GitHub Desktop.
Opinionated class for converting Unicode Technical Standard #35 date format strings (i.e. date-fns) to PHP compatible format strings
<?php
namespace App\Support\I18n;
class DateFormatConverter
{
/**
* A map of UTS #35 date format tokens to PHP DateTime format tokens.
*
* A UTS token key with a trailing + will match to any token as long or longer than the number
* of chars in the key (minus the +).
*
* @var array<string, string>
*/
private array $formatMap = [
// Era
'G' => '',
'GG' => '',
'GGG' => '',
'GGGG' => '',
'GGGGG' => '',
// Year
'y' => 'Y',
'yy' => 'y',
'yyy+' => 'Y',
'Y+' => 'o',
'u+' => '',
'U' => '',
'UU' => '',
'UUU' => '',
'UUUU' => '',
'UUUUU' => '',
'r+' => '',
// Quarter
'Q' => '',
'QQ' => '',
'QQQ' => '',
'QQQQ' => '',
'QQQQQ' => '',
'q' => '',
'qq' => '',
'qqq' => '',
'qqqq' => '',
'qqqqq' => '',
// Month
'M' => 'n',
'MM' => 'm',
'MMM' => 'M',
'MMMM' => 'F',
'MMMMM' => 'M',
'L' => 'n',
'LL' => 'm',
'LLL' => 'M',
'LLLL' => 'F',
'LLLLL' => 'M',
// Week
'w' => 'W',
'ww' => 'W',
'W' => '',
// Day
'd' => 'j',
'dd' => 'd',
'D' => 'z',
'DD' => 'z',
'DDD' => 'z',
'F' => '',
'g+' => '',
// Weekday
'E' => 'D',
'EE' => 'D',
'EEE' => 'D',
'EEEE' => 'l',
'EEEEE' => 'D',
'EEEEEE' => 'D',
'e' => 'N',
'ee' => 'N',
'eee' => 'D',
'eeee' => 'l',
'eeeee' => 'D',
'eeeeee' => 'D',
'c' => 'N',
'cc' => 'N',
'ccc' => 'D',
'cccc' => 'l',
'ccccc' => 'D',
'cccccc' => 'D',
'ccccccc' => 'D',
// Period
'a' => 'a',
'aa' => 'a',
'aaa' => 'a',
'aaaa' => 'a',
'aaaaa' => 'a',
'b' => 'a',
'bb' => 'a',
'bbb' => 'a',
'bbbb' => 'a',
'bbbbb' => 'a',
'B' => '',
'BB' => '',
'BBB' => '',
'BBBB' => '',
'BBBBB' => '',
// Hour
'h' => 'g',
'hh' => 'h',
'H' => 'G',
'HH' => 'H',
'K' => 'g',
'KK' => 'h',
'k' => 'G',
'kk' => 'H',
'j' => '',
'jj' => '',
'jjj' => '',
'jjjj' => '',
'jjjjj' => '',
'jjjjjj' => '',
'J' => '',
'JJ' => '',
'C' => '',
'CC' => '',
'CCC' => '',
'CCCC' => '',
'CCCCC' => '',
'CCCCCC' => '',
// Minute
'm' => 'i',
'mm' => 'i',
// Second
's' => 's',
'ss' => 's',
'S+' => 'u',
'A+' => '',
// Timezone
'z' => 'T',
'zz' => 'T',
'zzz' => 'T',
'zzzz' => 'e',
'Z' => 'O',
'ZZ' => 'O',
'ZZZ' => 'O',
'ZZZZ' => '\G\M\TP',
'ZZZZZ' => 'P',
'V' => '',
'VV' => 'e',
'VVV' => 'e',
'VVVV' => 'e',
'O' => '\G\M\TP',
'OOOO' => '\G\M\TP',
'X' => 'p',
'XX' => 'p',
'XXX' => 'p',
'XXXX' => 'p',
'XXXXX' => 'p',
'x' => 'O',
'xx' => 'O',
'xxx' => 'P',
'xxxx' => 'O',
'xxxxx' => 'P',
];
/**
* Attempt to convert a UTS #35 date format string to a PHP DateTime format string.
*
* As there are major differences between the formats, if there is no applicable conversion for
* a token, it is simply omitted. Otherwise a closest match (or exact match) is used.
*
* Important: In anything but simple cases this will not be a 1:1 match, so should not be used
* wherever an exact format is required.
*
* Format references:
* - https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns
* - https://www.php.net/manual/en/datetime.format.php
*
* @param string $utsString
* @return string
*/
public function convertUtsToPhp(string $utsString): string
{
$phpString = '';
if (empty($utsString)) {
return '';
}
for ($i = 0, $length = strlen($utsString); $i < $length; $i++) {
$char = $utsString[$i];
if ($char !== "'" && ! ctype_alpha($char)) {
$phpString .= $char;
continue;
}
if ($char === "'") {
// Two consecutive single quotes always resolves to a single quote.
if (! empty($utsString[$i + 1]) && $utsString[$i + 1] === "'") {
$phpString .= "'";
$i += 2;
continue;
}
// If the start of escape sequence is at the end of the string, ignore it.
if ($i === $length - 1) {
continue;
}
// Match anything up until the next *single* single quote, or the end of the string
// to find the full escaped sequence.
preg_match(
pattern: "/^((?:[^']|'')*?)(?:'(?!')|$)/",
subject: substr($utsString, $i + 1),
matches: $matches,
);
$escaped = $matches[0] ?? null;
if (! $escaped) {
continue;
}
$phpString .= $this->handleEscapedSegment($escaped);
$i += strlen($escaped);
continue;
}
$token = $this->getTokenAtIndex($utsString, $i);
$phpToken = $this->convertUtsTokenToPhp($token);
$phpString .= $phpToken;
$i += strlen($token) - 1;
}
return trim($phpString);
}
/**
* Find the full UTS #35 token starting at the provided index.
*
* All tokens are simply repetitions of the same char, so loop until
* a different char is encountered.
*
* @param string $string
* @param int $index
* @return string
*/
private function getTokenAtIndex(string $string, int $index): string
{
$char = $string[$index];
$result = $char;
for ($i = $index + 1, $length = strlen($string); $i < $length; $i++) {
if ($string[$i] === $char) {
$result .= $char;
} else {
break;
}
}
return $result;
}
/**
* Convert the provided UTS #35 token to its PHP counterpart using the $formatMap.
*
* @param string $utcToken
* @return string
*/
private function convertUtsTokenToPhp(string $utcToken): string
{
if (! empty($this->formatMap[$utcToken])) {
return $this->formatMap[$utcToken];
}
$tokenChar = $utcToken[0];
$extendedToken = array_find_key(
$this->formatMap,
function (string $php, string $utc) use ($utcToken, $tokenChar) {
$length = strlen($utc);
if ($utc[0] !== $tokenChar || $utc[$length - 1] !== '+') {
return false;
}
return strlen($utcToken) >= $length - 1;
}
);
return $this->formatMap[$extendedToken] ?? '';
}
/**
* Convert a UTS #35 escaped segment to its PHP counterpart.
*
* Escaped text will be appended as-is, while escaping any necessary PHP tokens with
* a backslash.
*
* @param string $segment
* @return string
*/
private function handleEscapedSegment(string $segment): string
{
// Even within escaped text, two consecutive single quotes resolves to a single quote.
$segment = str_replace("''", "'", $segment);
$segment = rtrim($segment, "'");
$result = '';
$escapePhpChars = [
'd', 'D', 'j', 'l', 'N', 'S', 'w', 'z', 'W', 'F', 'm', 'M', 'n', 't', 'L', 'o', 'X', 'x', 'Y', 'y', 'a',
'A', 'B', 'g', 'G', 'h', 'H', 'i', 's', 'u', 'v', 'e', 'I', 'O', 'P', 'p', 'T', 'Z', 'c', 'r', 'u',
];
for ($i = 0, $length = strlen($segment); $i < $length; $i++) {
if (in_array($segment[$i], $escapePhpChars, true)) {
$result .= '\\'.$segment[$i];
} else {
$result .= $segment[$i];
}
}
return $result;
}
}
<?php
use App\Support\I18n\DateFormatConverter;
beforeEach(function () {
$this->converter = new DateFormatConverter;
});
it('correctly converts uts formats to php formats', function (string $uts, string $expected) {
$php = $this->converter->convertUtsToPhp($uts);
expect($php)->toBe($expected);
})->with([
['yyyy-MM-dd', 'Y-m-d'],
['dd/MM/yyyy', 'd/m/Y'],
['MM/dd/yyyy', 'm/d/Y'],
['HH:mm:ss', 'H:i:s'],
['hh:mm a', 'h:i a'],
['HH:mm', 'H:i'],
['yyyy-MM-dd HH:mm:ss', 'Y-m-d H:i:s'],
['dd/MM/yyyy hh:mm a', 'd/m/Y h:i a'],
['MMMM d, yyyy', 'F j, Y'],
['MMM d, yyyy', 'M j, Y'],
['M/d/yyyy', 'n/j/Y'],
['EEEE, MMMM d, yyyy', 'l, F j, Y'],
['EEE, MMM d, yyyy', 'D, M j, Y'],
['w', 'W'],
['ww', 'W'],
['yy', 'y'],
['yyyy', 'Y'],
['YYYY', 'o'],
['h', 'g'],
['hh', 'h'],
['H', 'G'],
['HH', 'H'],
['h a', 'g a'],
['ZZZZ', '\G\M\TP'],
['ZZZZZ', 'P'],
['zzzz', 'e'],
['X', 'p'],
['XXXX', 'p'],
['yyy', 'Y'],
['yyyy', 'Y'],
['yyyyy', 'Y'],
["'Year: 'yyyy", '\\Y\\e\\a\\r: Y'],
["MMMM 'in the year' yyyy", 'F \\i\\n \\t\\h\\e \\y\\e\\a\\r Y'],
["'T'HH:mm:ss", '\\TH:i:s'],
["'Date:'yyyy-MM-dd", '\\D\\a\\t\\e:Y-m-d'],
["'o''clock' HH:mm", "\\o'\\c\\l\\o\\ck H:i"],
["yyyy'W'ww", 'Y\\WW'],
["yyyy-MM-dd'T'HH:mm", 'Y-m-d\\TH:i'],
['', ''],
["'", ''],
["''", "'"],
["'''", "'"],
["''''''", "''"],
["'test", '\\t\\e\\s\\t'],
['GG yyyy', 'Y'],
['yyyy QQQ', 'Y'],
['yyyy-MM-dd BBBBB', 'Y-m-d'],
['YYYY-MM-dd UU', 'o-m-d'],
['yyyy-MM-dd CC', 'Y-m-d'],
['yyyy-MM-dd\'T\'HH:mm:ssXXX', 'Y-m-d\\TH:i:sp'],
['yyyy-MM-dd\'T\'HH:mm:ss.SSSXXX', 'Y-m-d\\TH:i:s.up'],
['dd-MMM-yyyy', 'd-M-Y'],
['MM/dd/yyyy hh:mm:ss a', 'm/d/Y h:i:s a'],
["'Calendar'", 'C\\a\\l\\e\\n\\d\\a\\r'],
["'Monday'", '\\M\\o\\n\\d\\a\\y'],
["'Time'", '\\T\\i\\m\\e'],
["'Zone'", '\\Z\\o\\n\\e'],
["'PHP'", '\\P\\H\\P'],
["'Format'", '\\F\\o\\r\\m\\a\\t'],
["'Test-_/'", '\\T\\e\\s\\t-_/'],
[
"'dDjlNSwzWFmMntLoXxYyaABgGhHisuveIOPpTZcru'",
'\\d\\D\\j\\l\\N\\S\\w\\z\\W\\F\\m\\M\\n\\t\\L\\o\\X\\x\\Y\\y\\a\\A\\B\\g\\G\\h\\H\\i\\s\\u\\v\\e\\I\\O\\P\\p\\T\\Z\\c\\r\\u',
],
]);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment