<?php /** * Lithium: the most rad php framework * * @copyright Copyright 2012, Union of RAD (http://union-of-rad.org) * @license http://opensource.org/licenses/bsd-license.php The BSD License */ namespace lithium\g11n; use BadMethodCallException; use InvalidArgumentException; use lithium\action\Request as ActionRequest; use lithium\console\Request as ConsoleRequest; /** * The `Locale` class provides methods to deal with locale identifiers. The locale * (here: _locale identifier_) is used to distinguish among different sets of common * preferences. * * In order to avoid unnecessary overhead all methods throughout the framework accepting * a locale require it to be well-formed according to the structure laid out below. For * assuring the correct format use `Locale::canonicalize()` once on the locale. * * However the methods within this class will also work with not-so-well-formed locales. * They accept both underscores and hyphens as separators between and don't care about the * case of the individual tags. * * The identifier used by Lithium is based in its structure upon Unicode's * language identifier and is compliant to BCP 47. * * `language[_Script][_TERRITORY][_VARIANT]` * - `language` The spoken language, here represented by an ISO 639-1 code, * where not available ISO 639-3 and ISO 639-5 codes are allowed too) tag. * The tag should be lower-cased and is required. * - `Script` The tag should have it's first character capitalized, all others * lower-cased. The tag is optional. * - `TERRITORY` A geographical area, here represented by an ISO 3166-1 code. * Should be all upper-cased and is optional. * - `VARIANT` Should be all upper-cased and is optional. * * @link http://www.unicode.org/reports/tr35/tr35-12.html#Identifiers * @link http://www.rfc-editor.org/rfc/bcp/bcp47.txt * @link http://www.iana.org/assignments/language-subtag-registry */ class Locale extends \lithium\core\StaticObject { /** * Properties for locale tags. * * @var array */ protected static $_tags = array( 'language' => array('formatter' => 'strtolower'), 'script' => array('formatter' => array('strtolower', 'ucfirst')), 'territory' => array('formatter' => 'strtoupper'), 'variant' => array('formatter' => 'strtoupper') ); /** * Magic method enabling `language`, `script`, `territory` and `variant` * methods to parse and retrieve individual tags from a locale. * * {{{ * Locale::language('en_US'); // returns 'en' * Locale::territory('en_US'); // returns 'US' * }}} * * @see lithium\g11n\Locale::$_tags * @see lithium\g11n\Locale::decompose() * @param string $method * @param array $params * @return mixed */ public static function __callStatic($method, $params = array()) { $tags = static::invokeMethod('decompose', $params); if (!isset(static::$_tags[$method])) { throw new BadMethodCallException("Invalid locale tag `{$method}`."); } return isset($tags[$method]) ? $tags[$method] : null; } /** * Composes a locale from locale tags. This is the pendant to `Locale::decompose()`. * * @param array $tags An array as obtained from `Locale::decompose()`. * @return string A locale with tags separated by underscores or `null` * if none of the passed tags could be used to compose a locale. */ public static function compose($tags) { $result = array(); foreach (static::$_tags as $name => $tag) { if (isset($tags[$name])) { $result[] = $tags[$name]; } } if ($result) { return implode('_', $result); } } /** * Parses a locale into locale tags. This is the pendant to `Locale::compose()``. * * @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`). * @return array Parsed language, script, territory and variant tags. * @throws InvalidArgumentException */ public static function decompose($locale) { $regex = '(?P<language>[a-z]{2,3})'; $regex .= '(?:[_-](?P<script>[a-z]{4}))?'; $regex .= '(?:[_-](?P<territory>[a-z]{2}))?'; $regex .= '(?:[_-](?P<variant>[a-z]{5,}))?'; if (!preg_match("/^{$regex}$/i", $locale, $matches)) { throw new InvalidArgumentException("Locale `{$locale}` could not be parsed."); } return array_filter(array_intersect_key($matches, static::$_tags)); } /** * Returns a locale in its canonical form with tags formatted properly. * * @param string $locale A locale in an arbitrary form (i.e. `'ZH-HANS-HK_REVISED'`). * @return string A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`). */ public static function canonicalize($locale) { $tags = static::decompose($locale); foreach ($tags as $name => &$tag) { foreach ((array) static::$_tags[$name]['formatter'] as $formatter) { $tag = $formatter($tag); } } return static::compose($tags); } /** * Cascades a locale. * * Usage: * {{{ * Locale::cascade('en_US'); * // returns array('en_US', 'en', 'root') * * Locale::cascade('zh_Hans_HK_REVISED'); * // returns array('zh_Hans_HK_REVISED', 'zh_Hans_HK', 'zh_Hans', 'zh', 'root') * }}} * * @link http://www.unicode.org/reports/tr35/tr35-13.html#Locale_Inheritance * @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`). * @return array Indexed array of locales (starting with the most specific one). */ public static function cascade($locale) { $locales[] = $locale; if ($locale === 'root') { return $locales; } $tags = static::decompose($locale); while (count($tags) > 1) { array_pop($tags); $locales[] = static::compose($tags); } $locales[] = 'root'; return $locales; } /** * Searches an array of locales for the best match to a locale. The locale * is iteratively simplified until either it matches one of the locales * in the list or the locale can't be further simplified. * * This method partially implements the lookup matching scheme as described * in RFC 4647, section 3.4 and thus does not strictly conform to the * specification. * * Differences to specification: * - No support for wildcards in the to-be-matched locales. * - No support for locales with private subtags. * - No support for a default return value. * - Passed locales are required to be in canonical form (i.e. `'ja_JP'`). * * @link http://www.ietf.org/rfc/rfc4647.txt * @param array $locales Locales to match against `$locale`. * @param string $locale A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`). * @return string The matched locale. */ public static function lookup($locales, $locale) { $tags = static::decompose($locale); $count = count($tags); while ($count > 0) { if (($key = array_search(static::compose($tags), $locales)) !== false) { return $locales[$key]; } elseif ($count == 1) { foreach ($locales as $currentLocale) { if (strpos($currentLocale, current($tags) . '_') === 0) { return $currentLocale; } } } if (($key = array_search(static::compose($tags), $locales)) !== false) { return $locales[$key]; } array_pop($tags); $count = count($tags); } } /** * Determines the preferred locale from a request or array. Optionally negotiates * the preferred locale with available locales. * * @see lithium\g11n\Locale::_preferredAction() * @see lithium\g11n\Locale::_preferredConsole() * @see lithium\g11n\Locale::lookup() * @param object|array $request An action or console request object or an array of locales. * @param array $available A list of locales to negotiate the preferred locale with. * @return string The preferred locale in it's canonical form (i.e. `'fr_CA'`). * @todo Rewrite this to remove hard-coded class names. */ public static function preferred($request, $available = null) { if (is_array($request)) { $result = $request; } elseif ($request instanceof ActionRequest) { $result = static::_preferredAction($request); } elseif ($request instanceof ConsoleRequest) { $result = static::_preferredConsole($request); } else { return null; } if (!$available) { return array_shift($result); } foreach ((array) $result as $locale) { if ($match = static::lookup($available, $locale)) { return $match; } } } /** * Detects preferred locales from an action request by looking at the * `'Accept-Language'` header as described by RFC 2616, section 14.4. * * @link http://www.ietf.org/rfc/rfc2616.txt * @param object $request An instance of `lithium\action\Request`. * @return array Preferred locales in their canonical form (i.e. `'fr_CA'`). */ protected static function _preferredAction($request) { $result = array(); $regex = "/^\s*(?P<locale>\w\w(?:[-]\w\w)?)(?:;q=(?P<quality>(0|1|0\.\d+)))?\s*$/"; foreach (explode(',', $request->env('HTTP_ACCEPT_LANGUAGE')) as $part) { if (preg_match($regex, $part, $matches)) { $locale = static::canonicalize($matches['locale']); $quality = isset($matches['quality']) ? $matches['quality'] : 1; $result[$quality][] = $locale; } } krsort($result); $return = array(); foreach ($result as $locales) { $return = array_merge($return, array_values($locales)); } return $return; } /** * Detects preferred locales from a console request by looking at certain * environment variables. The environment variables may be present or not * depending on your system. If multiple variables are present the following * hierarchy is used: `'LANGUAGE'`, `'LC_ALL'`, `'LANG'`. * * The locales of the `'LC_ALL'` and the `'LANG'` are formatted according * to the posix standard: `language(_territory)(.encoding)(@modifier)`. * Locales having such a format are automatically canonicalized and transformed * into the `Locale` class' format. * * @link http://www.linux.com/archive/feature/53781 * @param object $request An instance of `lithium\console\Request`. * @return array Preferred locales in their canonical form (i.e. `'fr_CA'`). */ protected static function _preferredConsole($request) { $regex = '(?P<locale>[\w\_]+)(\.|@|$)+'; $result = array(); if ($value = $request->env('LANGUAGE')) { return explode(':', $value); } foreach (array('LC_ALL', 'LANG') as $variable) { $value = $request->env($variable); if (!$value || $value == 'C' || $value == 'POSIX') { continue; } if (preg_match("/{$regex}/", $value, $matches)) { return (array) $matches['locale']; } } return $result; } } ?>