Created
November 5, 2010 05:08
-
-
Save aerith/663666 to your computer and use it in GitHub Desktop.
Ruby のソースほぼまるパク
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace URI\RegExp\Pattern { | |
# | |
# Patterns used to parse URI's | |
# | |
# RFC 2396 (URI Generic Syntax) | |
# RFC 2732 (IPv6 Literal Addresses in URL's) | |
# RFC 2373 (IPv6 Addressing Architecture) | |
# alpha lowalpha | upalpha | |
define('URI\RegExp\Pattern\ALPHA', 'a-zA-Z'); | |
# alphanum alpha | digit | |
define('URI\RegExp\Pattern\ALNUM', sprintf('%s\d', ALPHA)); | |
# hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | | |
# "a" | "b" | "c" | "d" | "e" | "f" | |
define('URI\RegExp\Pattern\HEX', 'a-fA-F\d'); | |
# escaped', '%" hex hex | |
define('URI\RegExp\Pattern\ESCAPED', sprintf('%%[%s]{2}', HEX)); | |
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | | |
# "(" | ")" | |
# unreserved = alphanum | mark | |
define('URI\RegExp\Pattern\UNRESERVED', sprintf('-_\.!~\*\'()%s', ALNUM)); | |
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | |
# "$" | "," | |
define('URI\RegExp\Pattern\RESERVED', ';\/?:@&=+$,\[\]'); | |
# uric = reserved | unreserved | escaped | |
define('URI\RegExp\Pattern\URIC', sprintf('(?:[%s%s]|%s)', UNRESERVED, RESERVED, ESCAPED)); | |
# uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | | |
# "&" | "=" | "+" | "$" | "," | |
define('URI\RegExp\Pattern\URIC_NO_SLASH', sprintf('(?:[%s;?:@&=+$,]|%s)', UNRESERVED, ESCAPED)); | |
# query = uric | |
define('URI\RegExp\Pattern\QUERY', sprintf('%s*', URIC)); | |
# fragment = uric | |
define('URI\RegExp\Pattern\FRAGMENT', sprintf('%s*', URIC)); | |
# domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum | |
define('URI\RegExp\Pattern\DOMLABEL', sprintf('(?:[%s](?:[-%s]*[%s])?)', ALNUM, ALNUM, ALNUM)); | |
# toplabel = alpha | alpha *( alphanum | "-" ) alphanum | |
define('URI\RegExp\Pattern\TOPLABEL', sprintf('(?:[%s](?:[-%s]*[%s])?)', ALPHA, ALNUM, ALNUM)); | |
# hostname *( domainlabel "." ) toplabel [ "." ] | |
define('URI\RegExp\Pattern\HOSTNAME', sprintf('(?:%s\.)*%s\.?', DOMLABEL, TOPLABEL)); | |
# RFC 2373, APPENDIX B: | |
# IPv6address = hexpart [ ":" IPv4address ] | |
# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT | |
# hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ] | |
# hexseq = hex4 *( ":" hex4) | |
# hex4 = 1*4HEXDIG | |
# | |
# XXX: This definition has a flaw. "::" + IPv4address must be | |
# allowed too. Here is a replacement. | |
# | |
# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT | |
define('URI\RegExp\Pattern\IPV4ADDR', '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'); | |
# hex4 = 1*4HEXDIG | |
define('URI\RegExp\Pattern\HEX4', sprintf('[%s]{1,4}', HEX)); | |
# lastpart = hex4 | IPv4address | |
define('URI\RegExp\Pattern\LASTPART', sprintf('(?:%s|%s)', HEX4, IPV4ADDR)); | |
# hexseq1 = *( hex4 ":" ) hex4 | |
define('URI\RegExp\Pattern\HEXSEQ1', sprintf('(?:%s:)*%s', HEX4, HEX4)); | |
# hexseq2 = *( hex4 ":" ) lastpart | |
define('URI\RegExp\Pattern\HEXSEQ2', sprintf('(?:%s:)*%s', HEX4, LASTPART)); | |
# IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ] | |
define('URI\RegExp\Pattern\IPV6ADDR', sprintf('(?:%s|(?:%s)?::(?:%s)?)', HEXSEQ2, HEXSEQ1, HEXSEQ2)); | |
# IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT | |
# unused | |
# ipv6reference = "[" IPv6address "]" (RFC 2732) | |
define('URI\RegExp\Pattern\IPV6REF', sprintf('\[%s\]', IPV6ADDR)); | |
# host = hostname | IPv4address | |
# host = hostname | IPv4address | IPv6reference (RFC 2732) | |
define('URI\RegExp\Pattern\HOST', sprintf('(?:%s|%s|%s)', HOSTNAME, IPV4ADDR, IPV6REF)); | |
# port = *digit | |
define('URI\RegExp\Pattern\PORT', '\d*'); | |
# hostport = host [ ":" port] | |
define('URI\RegExp\Pattern\HOSTPORT', sprintf('%s(?::%s)?', HOST, PORT)); | |
# userinfo = *( unreserved | escaped | | |
# ";" | ":" | "&" | "=" | "+" | "$" | "," ) | |
define('URI\RegExp\Pattern\USERINFO', sprintf('(?:[%s;:&=+$,]|%s)*', UNRESERVED, ESCAPED)); | |
# pchar = unreserved | escaped | | |
# ":" | "@" | "&" | "=" | "+" | "$" | ",' | |
define('URI\RegExp\Pattern\PCHAR', sprintf('(?:[%s:@&=+$,]|%s)', UNRESERVED, ESCAPED)); | |
# param = *pchar | |
define('URI\RegExp\Pattern\PARAM', sprintf('%s*', PCHAR)); | |
# segment = *pchar *( ";" param ) | |
define('URI\RegExp\Pattern\SEGMENT', sprintf('%s*(?:;%s)*', PCHAR, PARAM)); | |
# path_segments = segment *( "/" segment ) | |
define('URI\RegExp\Pattern\PATH_SEGMENTS', sprintf('%s(?:\/%s)*', SEGMENT, SEGMENT)); | |
# server = = [ [ userinfo "@" ] hostport ] | |
define('URI\RegExp\Pattern\SERVER', sprintf('(?:%s@)?%s', USERINFO, HOSTPORT)); | |
# reg_name = 1*( unreserved | escaped | "$" | "," | | |
# ";" | ":" | "@" | "&" | "=" | "+" ) | |
define('URI\RegExp\Pattern\REG_NAME', sprintf('(?:[%s$,;:@&=+]|%s)+', UNRESERVED, ESCAPED)); | |
# authority = server | reg_name | |
define('URI\RegExp\Pattern\AUTHORITY', sprintf('(?:%s|%s)', SERVER, REG_NAME)); | |
# rel_segment = 1*( unreserved | escaped | | |
# ";" | "@" | "&" | "=" | "+" | "$" | "," ) | |
define('URI\RegExp\Pattern\REL_SEGMENT', sprintf('(?:[%s;@&=+$,]|%s)+', UNRESERVED, ESCAPED)); | |
# scheme = alpha *( alpha | digit | "+" | "-" | "." ) | |
define('URI\RegExp\Pattern\SCHEME', sprintf('[%s][-+.%s\d]*', ALPHA, ALPHA)); | |
# abs_path = "/" path_segments | |
define('URI\RegExp\Pattern\ABS_PATH', sprintf('\/%s', PATH_SEGMENTS)); | |
# rel_path = rel_segment [ abs_path ] | |
define('URI\RegExp\Pattern\REL_PATH', sprintf('%s(?:%s)?', REL_SEGMENT, ABS_PATH)); | |
# net_path = "//" authority [ abs_path ] | |
define('URI\RegExp\Pattern\NET_PATH', sprintf('\/\/%s(?:%s)?', AUTHORITY, ABS_PATH)); | |
# hier_part = ( net_path | abs_path ) [ "?" query ] | |
define('URI\RegExp\Pattern\HIER_PART', sprintf('(?:%s|%s)(?:\?(?:%s))?', NET_PATH, ABS_PATH, QUERY)); | |
# opaque_part = uric_no_slash *uric | |
define('URI\RegExp\Pattern\OPAQUE_PART', sprintf('%s%s*', URIC_NO_SLASH, URIC)); | |
# absoluteURI = scheme ":" ( hier_part | opaque_part ) | |
define('URI\RegExp\Pattern\ABS_URI', sprintf('%s:(?:%s|%s)', SCHEME, HIER_PART, OPAQUE_PART)); | |
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] | |
define('URI\RegExp\Pattern\REL_URI', sprintf('(?:%s|%s|%s)(?:\?%s)?', NET_PATH, ABS_PATH, REL_PATH, QUERY)); | |
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] | |
define('URI\RegExp\Pattern\URI_REF', sprintf('(?:%s|%s)?(?:\#%s)?', ABS_URI, REL_URI, FRAGMENT)); | |
# XXX: | |
define( | |
'URI\RegExp\Pattern\X_ABS_URI', | |
sprintf( | |
' | |
(%s): (?# 1: scheme) | |
(?: | |
(%s) (?# 2: opaque) | |
| | |
(?:(?: | |
\/\/(?: | |
(?:(?:(%s)@)? (?# 3: userinfo) | |
(?:(%s)(?:\:(\d*))?))? (?# 4: host, 5: port) | |
| | |
(%s) (?# 6: registry) | |
) | |
| | |
(?!\/\/)) (?# XXX: \'\/\/\' is the mark for hostport) | |
(%s)? (?# 7: path) | |
)(?:\?(%s))? (?# 8: query) | |
) | |
(?:\#(%s))? (?# 9: fragment) | |
', | |
SCHEME, | |
OPAQUE_PART, | |
USERINFO, | |
HOST, | |
REG_NAME, | |
ABS_PATH, | |
QUERY, | |
FRAGMENT | |
) | |
); | |
define( | |
'URI\RegExp\Pattern\X_REL_URI', | |
sprintf( | |
' | |
(?: | |
(?: | |
\/\/ | |
(?: | |
(?:(%s)@)? (?# 1: userinfo) | |
(%s)?(?::(\d*))? (?# 2: host, 3: port) | |
| | |
(%s) (?# 4: registry) | |
) | |
) | |
| | |
(%s) (?# 5: rel_segment) | |
)? | |
(%s)? (?# 6: abs_path) | |
(?:\?(%s))? (?# 7: query) | |
(?:\#(%s))? (?# 8: fragment) | |
', | |
USERINFO, | |
HOST, | |
REG_NAME, | |
REL_SEGMENT, | |
ABS_PATH, | |
QUERY, | |
FRAGMENT | |
) | |
); | |
} | |
namespace URI\RegExp { | |
// for URI::split | |
define('URI\RegExp\ABS_URI', '/^' . Pattern\X_ABS_URI .'$/x'); | |
define('URI\RegExp\REL_URI', '/^' . Pattern\X_REL_URI .'$/x'); | |
// for URI::extract | |
define('URI\RegExp\URI_REF', '/^' . Pattern\URI_REF .'$/x'); | |
define('URI\RegExp\ABS_URI_REF', '/^' . Pattern\X_ABS_URI .'$/x'); | |
define('URI\RegExp\REL_URI_REF', '/^' . Pattern\X_REL_URI .'$/x'); | |
// for URI::escape and URI::unescape | |
define('URI\RegExp\ESCAPED', '/' . Pattern\URI_REF .'/'); | |
define('URI\RegExp\UNSAFE', sprintf('/[^%s%s]/', Pattern\UNRESERVED, Pattern\RESERVED)); | |
define('URI\RegExp\SCHEME', '/^' . Pattern\SCHEME . '$/'); | |
define('URI\RegExp\USERINFO', '/^' . Pattern\USERINFO . '$/'); | |
define('URI\RegExp\HOST', '/^' . Pattern\HOST . '$/'); | |
define('URI\RegExp\PORT', '/^' . Pattern\PORT . '$/'); | |
define('URI\RegExp\OPAQUE', '/^' . Pattern\OPAQUE_PART . '$/'); | |
define('URI\RegExp\REGISTRY', '/^' . Pattern\REG_NAME . '$/'); | |
define('URI\RegExp\ABS_PATH', '/^' . Pattern\ABS_PATH . '$/'); | |
define('URI\RegExp\REL_PATH', '/^' . Pattern\REL_PATH . '$/'); | |
define('URI\RegExp\QUERY', '/^' . Pattern\QUERY . '$/'); | |
define('URI\RegExp\FRAGMENT', '/^' . Pattern\FRAGMENT . '$/'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment