Skip to content

Instantly share code, notes, and snippets.

@aerith
Created November 5, 2010 05:08
Show Gist options
  • Save aerith/663666 to your computer and use it in GitHub Desktop.
Save aerith/663666 to your computer and use it in GitHub Desktop.
Ruby のソースほぼまるパク
<?php
namespace URI\RegExp\Pattern {
#
# Patterns used to parse URI's
#
# RFC 2396 (URI Generic Syntax)
# RFC 2732 (IPv6 Literal Addresses in URL's)
# RFC 2373 (IPv6 Addressing Architecture)
# alpha lowalpha | upalpha
define('URI\RegExp\Pattern\ALPHA', 'a-zA-Z');
# alphanum alpha | digit
define('URI\RegExp\Pattern\ALNUM', sprintf('%s\d', ALPHA));
# hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
# "a" | "b" | "c" | "d" | "e" | "f"
define('URI\RegExp\Pattern\HEX', 'a-fA-F\d');
# escaped', '%" hex hex
define('URI\RegExp\Pattern\ESCAPED', sprintf('%%[%s]{2}', HEX));
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
# "(" | ")"
# unreserved = alphanum | mark
define('URI\RegExp\Pattern\UNRESERVED', sprintf('-_\.!~\*\'()%s', ALNUM));
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
# "$" | ","
define('URI\RegExp\Pattern\RESERVED', ';\/?:@&=+$,\[\]');
# uric = reserved | unreserved | escaped
define('URI\RegExp\Pattern\URIC', sprintf('(?:[%s%s]|%s)', UNRESERVED, RESERVED, ESCAPED));
# uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
# "&" | "=" | "+" | "$" | ","
define('URI\RegExp\Pattern\URIC_NO_SLASH', sprintf('(?:[%s;?:@&=+$,]|%s)', UNRESERVED, ESCAPED));
# query = uric
define('URI\RegExp\Pattern\QUERY', sprintf('%s*', URIC));
# fragment = uric
define('URI\RegExp\Pattern\FRAGMENT', sprintf('%s*', URIC));
# domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
define('URI\RegExp\Pattern\DOMLABEL', sprintf('(?:[%s](?:[-%s]*[%s])?)', ALNUM, ALNUM, ALNUM));
# toplabel = alpha | alpha *( alphanum | "-" ) alphanum
define('URI\RegExp\Pattern\TOPLABEL', sprintf('(?:[%s](?:[-%s]*[%s])?)', ALPHA, ALNUM, ALNUM));
# hostname *( domainlabel "." ) toplabel [ "." ]
define('URI\RegExp\Pattern\HOSTNAME', sprintf('(?:%s\.)*%s\.?', DOMLABEL, TOPLABEL));
# RFC 2373, APPENDIX B:
# IPv6address = hexpart [ ":" IPv4address ]
# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
# hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
# hexseq = hex4 *( ":" hex4)
# hex4 = 1*4HEXDIG
#
# XXX: This definition has a flaw. "::" + IPv4address must be
# allowed too. Here is a replacement.
#
# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
define('URI\RegExp\Pattern\IPV4ADDR', '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}');
# hex4 = 1*4HEXDIG
define('URI\RegExp\Pattern\HEX4', sprintf('[%s]{1,4}', HEX));
# lastpart = hex4 | IPv4address
define('URI\RegExp\Pattern\LASTPART', sprintf('(?:%s|%s)', HEX4, IPV4ADDR));
# hexseq1 = *( hex4 ":" ) hex4
define('URI\RegExp\Pattern\HEXSEQ1', sprintf('(?:%s:)*%s', HEX4, HEX4));
# hexseq2 = *( hex4 ":" ) lastpart
define('URI\RegExp\Pattern\HEXSEQ2', sprintf('(?:%s:)*%s', HEX4, LASTPART));
# IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ]
define('URI\RegExp\Pattern\IPV6ADDR', sprintf('(?:%s|(?:%s)?::(?:%s)?)', HEXSEQ2, HEXSEQ1, HEXSEQ2));
# IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT
# unused
# ipv6reference = "[" IPv6address "]" (RFC 2732)
define('URI\RegExp\Pattern\IPV6REF', sprintf('\[%s\]', IPV6ADDR));
# host = hostname | IPv4address
# host = hostname | IPv4address | IPv6reference (RFC 2732)
define('URI\RegExp\Pattern\HOST', sprintf('(?:%s|%s|%s)', HOSTNAME, IPV4ADDR, IPV6REF));
# port = *digit
define('URI\RegExp\Pattern\PORT', '\d*');
# hostport = host [ ":" port]
define('URI\RegExp\Pattern\HOSTPORT', sprintf('%s(?::%s)?', HOST, PORT));
# userinfo = *( unreserved | escaped |
# ";" | ":" | "&" | "=" | "+" | "$" | "," )
define('URI\RegExp\Pattern\USERINFO', sprintf('(?:[%s;:&=+$,]|%s)*', UNRESERVED, ESCAPED));
# pchar = unreserved | escaped |
# ":" | "@" | "&" | "=" | "+" | "$" | ",'
define('URI\RegExp\Pattern\PCHAR', sprintf('(?:[%s:@&=+$,]|%s)', UNRESERVED, ESCAPED));
# param = *pchar
define('URI\RegExp\Pattern\PARAM', sprintf('%s*', PCHAR));
# segment = *pchar *( ";" param )
define('URI\RegExp\Pattern\SEGMENT', sprintf('%s*(?:;%s)*', PCHAR, PARAM));
# path_segments = segment *( "/" segment )
define('URI\RegExp\Pattern\PATH_SEGMENTS', sprintf('%s(?:\/%s)*', SEGMENT, SEGMENT));
# server = = [ [ userinfo "@" ] hostport ]
define('URI\RegExp\Pattern\SERVER', sprintf('(?:%s@)?%s', USERINFO, HOSTPORT));
# reg_name = 1*( unreserved | escaped | "$" | "," |
# ";" | ":" | "@" | "&" | "=" | "+" )
define('URI\RegExp\Pattern\REG_NAME', sprintf('(?:[%s$,;:@&=+]|%s)+', UNRESERVED, ESCAPED));
# authority = server | reg_name
define('URI\RegExp\Pattern\AUTHORITY', sprintf('(?:%s|%s)', SERVER, REG_NAME));
# rel_segment = 1*( unreserved | escaped |
# ";" | "@" | "&" | "=" | "+" | "$" | "," )
define('URI\RegExp\Pattern\REL_SEGMENT', sprintf('(?:[%s;@&=+$,]|%s)+', UNRESERVED, ESCAPED));
# scheme = alpha *( alpha | digit | "+" | "-" | "." )
define('URI\RegExp\Pattern\SCHEME', sprintf('[%s][-+.%s\d]*', ALPHA, ALPHA));
# abs_path = "/" path_segments
define('URI\RegExp\Pattern\ABS_PATH', sprintf('\/%s', PATH_SEGMENTS));
# rel_path = rel_segment [ abs_path ]
define('URI\RegExp\Pattern\REL_PATH', sprintf('%s(?:%s)?', REL_SEGMENT, ABS_PATH));
# net_path = "//" authority [ abs_path ]
define('URI\RegExp\Pattern\NET_PATH', sprintf('\/\/%s(?:%s)?', AUTHORITY, ABS_PATH));
# hier_part = ( net_path | abs_path ) [ "?" query ]
define('URI\RegExp\Pattern\HIER_PART', sprintf('(?:%s|%s)(?:\?(?:%s))?', NET_PATH, ABS_PATH, QUERY));
# opaque_part = uric_no_slash *uric
define('URI\RegExp\Pattern\OPAQUE_PART', sprintf('%s%s*', URIC_NO_SLASH, URIC));
# absoluteURI = scheme ":" ( hier_part | opaque_part )
define('URI\RegExp\Pattern\ABS_URI', sprintf('%s:(?:%s|%s)', SCHEME, HIER_PART, OPAQUE_PART));
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
define('URI\RegExp\Pattern\REL_URI', sprintf('(?:%s|%s|%s)(?:\?%s)?', NET_PATH, ABS_PATH, REL_PATH, QUERY));
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
define('URI\RegExp\Pattern\URI_REF', sprintf('(?:%s|%s)?(?:\#%s)?', ABS_URI, REL_URI, FRAGMENT));
# XXX:
define(
'URI\RegExp\Pattern\X_ABS_URI',
sprintf(
'
(%s): (?# 1: scheme)
(?:
(%s) (?# 2: opaque)
|
(?:(?:
\/\/(?:
(?:(?:(%s)@)? (?# 3: userinfo)
(?:(%s)(?:\:(\d*))?))? (?# 4: host, 5: port)
|
(%s) (?# 6: registry)
)
|
(?!\/\/)) (?# XXX: \'\/\/\' is the mark for hostport)
(%s)? (?# 7: path)
)(?:\?(%s))? (?# 8: query)
)
(?:\#(%s))? (?# 9: fragment)
',
SCHEME,
OPAQUE_PART,
USERINFO,
HOST,
REG_NAME,
ABS_PATH,
QUERY,
FRAGMENT
)
);
define(
'URI\RegExp\Pattern\X_REL_URI',
sprintf(
'
(?:
(?:
\/\/
(?:
(?:(%s)@)? (?# 1: userinfo)
(%s)?(?::(\d*))? (?# 2: host, 3: port)
|
(%s) (?# 4: registry)
)
)
|
(%s) (?# 5: rel_segment)
)?
(%s)? (?# 6: abs_path)
(?:\?(%s))? (?# 7: query)
(?:\#(%s))? (?# 8: fragment)
',
USERINFO,
HOST,
REG_NAME,
REL_SEGMENT,
ABS_PATH,
QUERY,
FRAGMENT
)
);
}
namespace URI\RegExp {
// for URI::split
define('URI\RegExp\ABS_URI', '/^' . Pattern\X_ABS_URI .'$/x');
define('URI\RegExp\REL_URI', '/^' . Pattern\X_REL_URI .'$/x');
// for URI::extract
define('URI\RegExp\URI_REF', '/^' . Pattern\URI_REF .'$/x');
define('URI\RegExp\ABS_URI_REF', '/^' . Pattern\X_ABS_URI .'$/x');
define('URI\RegExp\REL_URI_REF', '/^' . Pattern\X_REL_URI .'$/x');
// for URI::escape and URI::unescape
define('URI\RegExp\ESCAPED', '/' . Pattern\URI_REF .'/');
define('URI\RegExp\UNSAFE', sprintf('/[^%s%s]/', Pattern\UNRESERVED, Pattern\RESERVED));
define('URI\RegExp\SCHEME', '/^' . Pattern\SCHEME . '$/');
define('URI\RegExp\USERINFO', '/^' . Pattern\USERINFO . '$/');
define('URI\RegExp\HOST', '/^' . Pattern\HOST . '$/');
define('URI\RegExp\PORT', '/^' . Pattern\PORT . '$/');
define('URI\RegExp\OPAQUE', '/^' . Pattern\OPAQUE_PART . '$/');
define('URI\RegExp\REGISTRY', '/^' . Pattern\REG_NAME . '$/');
define('URI\RegExp\ABS_PATH', '/^' . Pattern\ABS_PATH . '$/');
define('URI\RegExp\REL_PATH', '/^' . Pattern\REL_PATH . '$/');
define('URI\RegExp\QUERY', '/^' . Pattern\QUERY . '$/');
define('URI\RegExp\FRAGMENT', '/^' . Pattern\FRAGMENT . '$/');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment