Created
June 28, 2012 12:25
-
-
Save agentgt/3011049 to your computer and use it in GitHub Desktop.
Commons Httpclient Encoding tools that are missing from Commons Http Components
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.snaphop.util; | |
/* | |
* $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.27 2004/05/05 20:34:01 olegk Exp $ | |
* $Revision: 507321 $ | |
* $Date: 2007-02-14 01:10:51 +0100 (Wed, 14 Feb 2007) $ | |
* | |
* ==================================================================== | |
* | |
* Licensed to the Apache Software Foundation (ASF) under one or more | |
* contributor license agreements. See the NOTICE file distributed with | |
* this work for additional information regarding copyright ownership. | |
* The ASF licenses this file to You under the Apache License, Version 2.0 | |
* (the "License"); you may not use this file except in compliance with | |
* the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* ==================================================================== | |
* | |
* This software consists of voluntary contributions made by many | |
* individuals on behalf of the Apache Software Foundation. For more | |
* information on the Apache Software Foundation, please see | |
* <http://www.apache.org/>. | |
* | |
*/ | |
import java.util.BitSet; | |
import org.apache.commons.codec.DecoderException; | |
import org.apache.commons.codec.net.URLCodec; | |
import org.apache.http.util.EncodingUtils; | |
/** | |
* The URI escape and character encoding and decoding utility. | |
* It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather | |
* than {@link org.apache.commons.httpclient.URI}. | |
* | |
* @author <a href="mailto:[email protected]">Sung-Gu</a> | |
* @version $Revision: 507321 $ $Date: 2002/03/14 15:14:01 | |
*/ | |
public class HttpClientURIUtil { | |
// ----------------------------------------------------- Instance variables | |
protected static final BitSet empty = new BitSet(1); | |
// ---------------------------------------------------------- URI utilities | |
/** | |
* Get the basename of an URI. It's possibly an empty string. | |
* | |
* @param uri a string regarded an URI | |
* @return the basename string; an empty string if the path ends with slash | |
*/ | |
public static String getName(String uri) { | |
if (uri == null || uri.length() == 0) { return uri; } | |
String path = getPath(uri); | |
int at = path.lastIndexOf("/"); | |
int to = path.length(); | |
return (at >= 0) ? path.substring(at + 1, to) : path; | |
} | |
/** | |
* Get the query of an URI. | |
* | |
* @param uri a string regarded an URI | |
* @return the query string; <code>null</code> if empty or undefined | |
*/ | |
public static String getQuery(String uri) { | |
if (uri == null || uri.length() == 0) { return null; } | |
// consider of net_path | |
int at = uri.indexOf("//"); | |
int from = uri.indexOf( | |
"/", | |
at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 | |
); | |
// the authority part of URI ignored | |
int to = uri.length(); | |
// reuse the at and from variables to consider the query | |
at = uri.indexOf("?", from); | |
if (at >= 0) { | |
from = at + 1; | |
} else { | |
return null; | |
} | |
// check the fragment | |
if (uri.lastIndexOf("#") > from) { | |
to = uri.lastIndexOf("#"); | |
} | |
// get the path and query. | |
return (from < 0 || from == to) ? null : uri.substring(from, to); | |
} | |
/** | |
* Get the path of an URI. | |
* | |
* @param uri a string regarded an URI | |
* @return the path string | |
*/ | |
public static String getPath(String uri) { | |
if (uri == null) { | |
return null; | |
} | |
// consider of net_path | |
int at = uri.indexOf("//"); | |
int from = uri.indexOf( | |
"/", | |
at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 | |
); | |
// the authority part of URI ignored | |
int to = uri.length(); | |
// check the query | |
if (uri.indexOf('?', from) != -1) { | |
to = uri.indexOf('?', from); | |
} | |
// check the fragment | |
if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) { | |
to = uri.lastIndexOf("#"); | |
} | |
// get only the path. | |
return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); | |
} | |
/** | |
* Get the path and query of an URI. | |
* | |
* @param uri a string regarded an URI | |
* @return the path and query string | |
*/ | |
public static String getPathQuery(String uri) { | |
if (uri == null) { | |
return null; | |
} | |
// consider of net_path | |
int at = uri.indexOf("//"); | |
int from = uri.indexOf( | |
"/", | |
at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 | |
); | |
// the authority part of URI ignored | |
int to = uri.length(); | |
// Ignore the '?' mark so to ignore the query. | |
// check the fragment | |
if (uri.lastIndexOf("#") > from) { | |
to = uri.lastIndexOf("#"); | |
} | |
// get the path and query. | |
return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); | |
} | |
/** | |
* Get the path of an URI and its rest part. | |
* | |
* @param uri a string regarded an URI | |
* @return the string from the path part | |
*/ | |
public static String getFromPath(String uri) { | |
if (uri == null) { | |
return null; | |
} | |
// consider of net_path | |
int at = uri.indexOf("//"); | |
int from = uri.indexOf( | |
"/", | |
at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 | |
); | |
// get the path and its rest. | |
return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from); | |
} | |
// ----------------------------------------------------- Encoding utilities | |
/** | |
* Get the all escaped and encoded string with the default protocl charset. | |
* It's the same function to use <code>encode(String unescaped, Bitset | |
* empty, "UTF-8")</code>. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodeAll(String unescaped) throws URIException { | |
return encodeAll(unescaped, "UTF-8"); | |
} | |
/** | |
* Get the all escaped and encoded string with a given charset. | |
* It's the same function to use <code>encode(String unescaped, Bitset | |
* empty, String charset)</code>. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodeAll(String unescaped, String charset) | |
throws URIException { | |
return encode(unescaped, empty, charset); | |
} | |
/** | |
* Escape and encode a string regarded as within the authority component of | |
* an URI with the default protocol charset. | |
* Within the authority component, the characters ";", ":", "@", "?", and | |
* "/" are reserved. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodeWithinAuthority(String unescaped) | |
throws URIException { | |
return encodeWithinAuthority(unescaped, "UTF-8"); | |
} | |
/** | |
* Escape and encode a string regarded as within the authority component of | |
* an URI with a given charset. | |
* Within the authority component, the characters ";", ":", "@", "?", and | |
* "/" are reserved. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodeWithinAuthority(String unescaped, String charset) | |
throws URIException { | |
return encode(unescaped, allowed_within_authority, charset); | |
} | |
/** | |
* Escape and encode a string regarded as the path and query components of | |
* an URI with the default protocol charset. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodePathQuery(String unescaped) throws URIException { | |
return encodePathQuery(unescaped, "UTF-8"); | |
} | |
/** | |
* Escape and encode a string regarded as the path and query components of | |
* an URI with a given charset. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodePathQuery(String unescaped, String charset) | |
throws URIException { | |
int at = unescaped.indexOf('?'); | |
if (at < 0) { | |
return encode(unescaped, allowed_abs_path, charset); | |
} | |
// else | |
return encode(unescaped.substring(0, at), allowed_abs_path, charset) | |
+ '?' + encode(unescaped.substring(at + 1), allowed_query, charset); | |
} | |
/** | |
* Escape and encode a string regarded as within the path component of an | |
* URI with the default protocol charset. | |
* The path may consist of a sequence of path segments separated by a | |
* single slash "/" character. Within a path segment, the characters | |
* "/", ";", "=", and "?" are reserved. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodeWithinPath(String unescaped) | |
throws URIException { | |
return encodeWithinPath(unescaped, "UTF-8"); | |
} | |
/** | |
* Escape and encode a string regarded as within the path component of an | |
* URI with a given charset. | |
* The path may consist of a sequence of path segments separated by a | |
* single slash "/" character. Within a path segment, the characters | |
* "/", ";", "=", and "?" are reserved. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodeWithinPath(String unescaped, String charset) | |
throws URIException { | |
return encode(unescaped, allowed_within_path, charset); | |
} | |
/** | |
* Escape and encode a string regarded as the path component of an URI with | |
* the default protocol charset. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodePath(String unescaped) throws URIException { | |
return encodePath(unescaped, "UTF-8"); | |
} | |
/** | |
* Escape and encode a string regarded as the path component of an URI with | |
* a given charset. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodePath(String unescaped, String charset) | |
throws URIException { | |
return encode(unescaped, allowed_abs_path, charset); | |
} | |
/** | |
* Escape and encode a string regarded as within the query component of an | |
* URI with the default protocol charset. | |
* When a query comprise the name and value pairs, it is used in order | |
* to encode each name and value string. The reserved special characters | |
* within a query component are being included in encoding the query. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodeWithinQuery(String unescaped) | |
throws URIException { | |
return encodeWithinQuery(unescaped, "UTF-8"); | |
} | |
/** | |
* Escape and encode a string regarded as within the query component of an | |
* URI with a given charset. | |
* When a query comprise the name and value pairs, it is used in order | |
* to encode each name and value string. The reserved special characters | |
* within a query component are being included in encoding the query. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodeWithinQuery(String unescaped, String charset) | |
throws URIException { | |
return encode(unescaped, allowed_within_query, charset); | |
} | |
/** | |
* Escape and encode a string regarded as the query component of an URI with | |
* the default protocol charset. | |
* When a query string is not misunderstood the reserved special characters | |
* ("&", "=", "+", ",", and "$") within a query component, this method | |
* is recommended to use in encoding the whole query. | |
* | |
* @param unescaped an unescaped string | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
* @see #encode | |
*/ | |
public static String encodeQuery(String unescaped) throws URIException { | |
return encodeQuery(unescaped, "UTF-8"); | |
} | |
/** | |
* Escape and encode a string regarded as the query component of an URI with | |
* a given charset. | |
* When a query string is not misunderstood the reserved special characters | |
* ("&", "=", "+", ",", and "$") within a query component, this method | |
* is recommended to use in encoding the whole query. | |
* | |
* @param unescaped an unescaped string | |
* @param charset the charset | |
* @return the escaped string | |
* | |
* @throws URIException if the charset is not supported | |
* | |
* @see #encode | |
*/ | |
public static String encodeQuery(String unescaped, String charset) | |
throws URIException { | |
return encode(unescaped, allowed_query, charset); | |
} | |
/** | |
* Escape and encode a given string with allowed characters not to be | |
* escaped and the default protocol charset. | |
* | |
* @param unescaped a string | |
* @param allowed allowed characters not to be escaped | |
* @return the escaped string | |
* | |
* @throws URIException if the default protocol charset is not supported | |
* | |
* @see URI#getDefaultProtocolCharset | |
*/ | |
public static String encode(String unescaped, BitSet allowed) | |
throws URIException { | |
return encode(unescaped, allowed, "UTF-8"); | |
} | |
/** | |
* Escape and encode a given string with allowed characters not to be | |
* escaped and a given charset. | |
* | |
* @param unescaped a string | |
* @param allowed allowed characters not to be escaped | |
* @param charset the charset | |
* @return the escaped string | |
*/ | |
public static String encode(String unescaped, BitSet allowed, | |
String charset) throws URIException { | |
byte[] rawdata = URLCodec.encodeUrl(allowed, | |
EncodingUtils.getBytes(unescaped, charset)); | |
return EncodingUtils.getAsciiString(rawdata); | |
} | |
/** | |
* Unescape and decode a given string regarded as an escaped string with the | |
* default protocol charset. | |
* | |
* @param escaped a string | |
* @return the unescaped string | |
* | |
* @throws URIException if the string cannot be decoded (invalid) | |
* | |
* @see URI#getDefaultProtocolCharset | |
*/ | |
public static String decode(String escaped) throws URIException { | |
try { | |
byte[] rawdata = URLCodec.decodeUrl(EncodingUtils.getAsciiBytes(escaped)); | |
return EncodingUtils.getString(rawdata, "UTF-8"); | |
} catch (DecoderException e) { | |
throw new URIException(e.getMessage()); | |
} | |
} | |
/** | |
* The percent "%" character always has the reserved purpose of being the | |
* escape indicator, it must be escaped as "%25" in order to be used as | |
* data within a | |
*/ | |
protected static final BitSet percent = new BitSet(256); | |
// Static initializer for percent | |
static { | |
percent.set('%'); | |
} | |
/** | |
* BitSet for digit. | |
* <p><blockquote><pre> | |
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | | |
* "8" | "9" | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet digit = new BitSet(256); | |
// Static initializer for digit | |
static { | |
for (int i = '0'; i <= '9'; i++) { | |
digit.set(i); | |
} | |
} | |
/** | |
* BitSet for alpha. | |
* <p><blockquote><pre> | |
* alpha = lowalpha | upalpha | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet alpha = new BitSet(256); | |
// Static initializer for alpha | |
static { | |
for (int i = 'a'; i <= 'z'; i++) { | |
alpha.set(i); | |
} | |
for (int i = 'A'; i <= 'Z'; i++) { | |
alpha.set(i); | |
} | |
} | |
/** | |
* BitSet for alphanum (join of alpha & digit). | |
* <p><blockquote><pre> | |
* alphanum = alpha | digit | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet alphanum = new BitSet(256); | |
// Static initializer for alphanum | |
static { | |
alphanum.or(alpha); | |
alphanum.or(digit); | |
} | |
/** | |
* BitSet for hex. | |
* <p><blockquote><pre> | |
* hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | | |
* "a" | "b" | "c" | "d" | "e" | "f" | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet hex = new BitSet(256); | |
// Static initializer for hex | |
static { | |
hex.or(digit); | |
for (int i = 'a'; i <= 'f'; i++) { | |
hex.set(i); | |
} | |
for (int i = 'A'; i <= 'F'; i++) { | |
hex.set(i); | |
} | |
} | |
/** | |
* BitSet for escaped. | |
* <p><blockquote><pre> | |
* escaped = "%" hex hex | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet escaped = new BitSet(256); | |
// Static initializer for escaped | |
static { | |
escaped.or(percent); | |
escaped.or(hex); | |
} | |
/** | |
* BitSet for mark. | |
* <p><blockquote><pre> | |
* mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | | |
* "(" | ")" | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet mark = new BitSet(256); | |
// Static initializer for mark | |
static { | |
mark.set('-'); | |
mark.set('_'); | |
mark.set('.'); | |
mark.set('!'); | |
mark.set('~'); | |
mark.set('*'); | |
mark.set('\''); | |
mark.set('('); | |
mark.set(')'); | |
} | |
/** | |
* Data characters that are allowed in a URI but do not have a reserved | |
* purpose are called unreserved. | |
* <p><blockquote><pre> | |
* unreserved = alphanum | mark | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet unreserved = new BitSet(256); | |
// Static initializer for unreserved | |
static { | |
unreserved.or(alphanum); | |
unreserved.or(mark); | |
} | |
/** | |
* BitSet for reserved. | |
* <p><blockquote><pre> | |
* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | |
* "$" | "," | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet reserved = new BitSet(256); | |
// Static initializer for reserved | |
static { | |
reserved.set(';'); | |
reserved.set('/'); | |
reserved.set('?'); | |
reserved.set(':'); | |
reserved.set('@'); | |
reserved.set('&'); | |
reserved.set('='); | |
reserved.set('+'); | |
reserved.set('$'); | |
reserved.set(','); | |
} | |
/** | |
* BitSet for uric. | |
* <p><blockquote><pre> | |
* uric = reserved | unreserved | escaped | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet uric = new BitSet(256); | |
// Static initializer for uric | |
static { | |
uric.or(reserved); | |
uric.or(unreserved); | |
uric.or(escaped); | |
} | |
/** | |
* BitSet for fragment (alias for uric). | |
* <p><blockquote><pre> | |
* fragment = *uric | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet fragment = uric; | |
/** | |
* BitSet for query (alias for uric). | |
* <p><blockquote><pre> | |
* query = *uric | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet query = uric; | |
/** | |
* BitSet for pchar. | |
* <p><blockquote><pre> | |
* pchar = unreserved | escaped | | |
* ":" | "@" | "&" | "=" | "+" | "$" | "," | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet pchar = new BitSet(256); | |
// Static initializer for pchar | |
static { | |
pchar.or(unreserved); | |
pchar.or(escaped); | |
pchar.set(':'); | |
pchar.set('@'); | |
pchar.set('&'); | |
pchar.set('='); | |
pchar.set('+'); | |
pchar.set('$'); | |
pchar.set(','); | |
} | |
/** | |
* BitSet for param (alias for pchar). | |
* <p><blockquote><pre> | |
* param = *pchar | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet param = pchar; | |
/** | |
* BitSet for segment. | |
* <p><blockquote><pre> | |
* segment = *pchar *( ";" param ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet segment = new BitSet(256); | |
// Static initializer for segment | |
static { | |
segment.or(pchar); | |
segment.set(';'); | |
segment.or(param); | |
} | |
/** | |
* BitSet for path segments. | |
* <p><blockquote><pre> | |
* path_segments = segment *( "/" segment ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet path_segments = new BitSet(256); | |
// Static initializer for path_segments | |
static { | |
path_segments.set('/'); | |
path_segments.or(segment); | |
} | |
/** | |
* URI absolute path. | |
* <p><blockquote><pre> | |
* abs_path = "/" path_segments | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet abs_path = new BitSet(256); | |
// Static initializer for abs_path | |
static { | |
abs_path.set('/'); | |
abs_path.or(path_segments); | |
} | |
/** | |
* URI bitset for encoding typical non-slash characters. | |
* <p><blockquote><pre> | |
* uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | | |
* "&" | "=" | "+" | "$" | "," | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet uric_no_slash = new BitSet(256); | |
// Static initializer for uric_no_slash | |
static { | |
uric_no_slash.or(unreserved); | |
uric_no_slash.or(escaped); | |
uric_no_slash.set(';'); | |
uric_no_slash.set('?'); | |
uric_no_slash.set(';'); | |
uric_no_slash.set('@'); | |
uric_no_slash.set('&'); | |
uric_no_slash.set('='); | |
uric_no_slash.set('+'); | |
uric_no_slash.set('$'); | |
uric_no_slash.set(','); | |
} | |
/** | |
* URI bitset that combines uric_no_slash and uric. | |
* <p><blockquote><pre> | |
* opaque_part = uric_no_slash *uric | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet opaque_part = new BitSet(256); | |
// Static initializer for opaque_part | |
static { | |
// it's generous. because first character must not include a slash | |
opaque_part.or(uric_no_slash); | |
opaque_part.or(uric); | |
} | |
/** | |
* URI bitset that combines absolute path and opaque part. | |
* <p><blockquote><pre> | |
* path = [ abs_path | opaque_part ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet path = new BitSet(256); | |
// Static initializer for path | |
static { | |
path.or(abs_path); | |
path.or(opaque_part); | |
} | |
/** | |
* Port, a logical alias for digit. | |
*/ | |
protected static final BitSet port = digit; | |
/** | |
* Bitset that combines digit and dot fo IPv$address. | |
* <p><blockquote><pre> | |
* IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet IPv4address = new BitSet(256); | |
// Static initializer for IPv4address | |
static { | |
IPv4address.or(digit); | |
IPv4address.set('.'); | |
} | |
/** | |
* RFC 2373. | |
* <p><blockquote><pre> | |
* IPv6address = hexpart [ ":" IPv4address ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet IPv6address = new BitSet(256); | |
// Static initializer for IPv6address reference | |
static { | |
IPv6address.or(hex); // hexpart | |
IPv6address.set(':'); | |
IPv6address.or(IPv4address); | |
} | |
/** | |
* RFC 2732, 2373. | |
* <p><blockquote><pre> | |
* IPv6reference = "[" IPv6address "]" | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet IPv6reference = new BitSet(256); | |
// Static initializer for IPv6reference | |
static { | |
IPv6reference.set('['); | |
IPv6reference.or(IPv6address); | |
IPv6reference.set(']'); | |
} | |
/** | |
* BitSet for toplabel. | |
* <p><blockquote><pre> | |
* toplabel = alpha | alpha *( alphanum | "-" ) alphanum | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet toplabel = new BitSet(256); | |
// Static initializer for toplabel | |
static { | |
toplabel.or(alphanum); | |
toplabel.set('-'); | |
} | |
/** | |
* BitSet for domainlabel. | |
* <p><blockquote><pre> | |
* domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet domainlabel = toplabel; | |
/** | |
* BitSet for hostname. | |
* <p><blockquote><pre> | |
* hostname = *( domainlabel "." ) toplabel [ "." ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet hostname = new BitSet(256); | |
// Static initializer for hostname | |
static { | |
hostname.or(toplabel); | |
// hostname.or(domainlabel); | |
hostname.set('.'); | |
} | |
/** | |
* BitSet for host. | |
* <p><blockquote><pre> | |
* host = hostname | IPv4address | IPv6reference | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet host = new BitSet(256); | |
// Static initializer for host | |
static { | |
host.or(hostname); | |
// host.or(IPv4address); | |
host.or(IPv6reference); // IPv4address | |
} | |
/** | |
* BitSet for hostport. | |
* <p><blockquote><pre> | |
* hostport = host [ ":" port ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet hostport = new BitSet(256); | |
// Static initializer for hostport | |
static { | |
hostport.or(host); | |
hostport.set(':'); | |
hostport.or(port); | |
} | |
/** | |
* Bitset for userinfo. | |
* <p><blockquote><pre> | |
* userinfo = *( unreserved | escaped | | |
* ";" | ":" | "&" | "=" | "+" | "$" | "," ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet userinfo = new BitSet(256); | |
// Static initializer for userinfo | |
static { | |
userinfo.or(unreserved); | |
userinfo.or(escaped); | |
userinfo.set(';'); | |
userinfo.set(':'); | |
userinfo.set('&'); | |
userinfo.set('='); | |
userinfo.set('+'); | |
userinfo.set('$'); | |
userinfo.set(','); | |
} | |
/** | |
* BitSet for within the userinfo component like user and password. | |
*/ | |
public static final BitSet within_userinfo = new BitSet(256); | |
// Static initializer for within_userinfo | |
static { | |
within_userinfo.or(userinfo); | |
within_userinfo.clear(';'); // reserved within authority | |
within_userinfo.clear(':'); | |
within_userinfo.clear('@'); | |
within_userinfo.clear('?'); | |
within_userinfo.clear('/'); | |
} | |
/** | |
* Bitset for server. | |
* <p><blockquote><pre> | |
* server = [ [ userinfo "@" ] hostport ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet server = new BitSet(256); | |
// Static initializer for server | |
static { | |
server.or(userinfo); | |
server.set('@'); | |
server.or(hostport); | |
} | |
/** | |
* BitSet for reg_name. | |
* <p><blockquote><pre> | |
* reg_name = 1*( unreserved | escaped | "$" | "," | | |
* ";" | ":" | "@" | "&" | "=" | "+" ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet reg_name = new BitSet(256); | |
// Static initializer for reg_name | |
static { | |
reg_name.or(unreserved); | |
reg_name.or(escaped); | |
reg_name.set('$'); | |
reg_name.set(','); | |
reg_name.set(';'); | |
reg_name.set(':'); | |
reg_name.set('@'); | |
reg_name.set('&'); | |
reg_name.set('='); | |
reg_name.set('+'); | |
} | |
/** | |
* BitSet for authority. | |
* <p><blockquote><pre> | |
* authority = server | reg_name | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet authority = new BitSet(256); | |
// Static initializer for authority | |
static { | |
authority.or(server); | |
authority.or(reg_name); | |
} | |
/** | |
* BitSet for scheme. | |
* <p><blockquote><pre> | |
* scheme = alpha *( alpha | digit | "+" | "-" | "." ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet scheme = new BitSet(256); | |
// Static initializer for scheme | |
static { | |
scheme.or(alpha); | |
scheme.or(digit); | |
scheme.set('+'); | |
scheme.set('-'); | |
scheme.set('.'); | |
} | |
/** | |
* BitSet for rel_segment. | |
* <p><blockquote><pre> | |
* rel_segment = 1*( unreserved | escaped | | |
* ";" | "@" | "&" | "=" | "+" | "$" | "," ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet rel_segment = new BitSet(256); | |
// Static initializer for rel_segment | |
static { | |
rel_segment.or(unreserved); | |
rel_segment.or(escaped); | |
rel_segment.set(';'); | |
rel_segment.set('@'); | |
rel_segment.set('&'); | |
rel_segment.set('='); | |
rel_segment.set('+'); | |
rel_segment.set('$'); | |
rel_segment.set(','); | |
} | |
/** | |
* BitSet for rel_path. | |
* <p><blockquote><pre> | |
* rel_path = rel_segment [ abs_path ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet rel_path = new BitSet(256); | |
// Static initializer for rel_path | |
static { | |
rel_path.or(rel_segment); | |
rel_path.or(abs_path); | |
} | |
/** | |
* BitSet for net_path. | |
* <p><blockquote><pre> | |
* net_path = "//" authority [ abs_path ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet net_path = new BitSet(256); | |
// Static initializer for net_path | |
static { | |
net_path.set('/'); | |
net_path.or(authority); | |
net_path.or(abs_path); | |
} | |
/** | |
* BitSet for hier_part. | |
* <p><blockquote><pre> | |
* hier_part = ( net_path | abs_path ) [ "?" query ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet hier_part = new BitSet(256); | |
// Static initializer for hier_part | |
static { | |
hier_part.or(net_path); | |
hier_part.or(abs_path); | |
// hier_part.set('?'); aleady included | |
hier_part.or(query); | |
} | |
/** | |
* BitSet for relative | |
* <p><blockquote><pre> | |
* relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet relativeURI = new BitSet(256); | |
// Static initializer for relativeURI | |
static { | |
relativeURI.or(net_path); | |
relativeURI.or(abs_path); | |
relativeURI.or(rel_path); | |
// relativeURI.set('?'); aleady included | |
relativeURI.or(query); | |
} | |
/** | |
* BitSet for absoluteURI. | |
* <p><blockquote><pre> | |
* absoluteURI = scheme ":" ( hier_part | opaque_part ) | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet absoluteURI = new BitSet(256); | |
// Static initializer for absoluteURI | |
static { | |
absoluteURI.or(scheme); | |
absoluteURI.set(':'); | |
absoluteURI.or(hier_part); | |
absoluteURI.or(opaque_part); | |
} | |
/** | |
* BitSet for URI-reference. | |
* <p><blockquote><pre> | |
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] | |
* </pre></blockquote><p> | |
*/ | |
protected static final BitSet URI_reference = new BitSet(256); | |
// Static initializer for URI_reference | |
static { | |
URI_reference.or(absoluteURI); | |
URI_reference.or(relativeURI); | |
URI_reference.set('#'); | |
URI_reference.or(fragment); | |
} | |
// ---------------------------- Characters disallowed within the URI syntax | |
// Excluded US-ASCII Characters are like control, space, delims and unwise | |
/** | |
* BitSet for control. | |
*/ | |
public static final BitSet control = new BitSet(256); | |
// Static initializer for control | |
static { | |
for (int i = 0; i <= 0x1F; i++) { | |
control.set(i); | |
} | |
control.set(0x7F); | |
} | |
/** | |
* BitSet for space. | |
*/ | |
public static final BitSet space = new BitSet(256); | |
// Static initializer for space | |
static { | |
space.set(0x20); | |
} | |
/** | |
* BitSet for delims. | |
*/ | |
public static final BitSet delims = new BitSet(256); | |
// Static initializer for delims | |
static { | |
delims.set('<'); | |
delims.set('>'); | |
delims.set('#'); | |
delims.set('%'); | |
delims.set('"'); | |
} | |
/** | |
* BitSet for unwise. | |
*/ | |
public static final BitSet unwise = new BitSet(256); | |
// Static initializer for unwise | |
static { | |
unwise.set('{'); | |
unwise.set('}'); | |
unwise.set('|'); | |
unwise.set('\\'); | |
unwise.set('^'); | |
unwise.set('['); | |
unwise.set(']'); | |
unwise.set('`'); | |
} | |
/** | |
* Disallowed rel_path before escaping. | |
*/ | |
public static final BitSet disallowed_rel_path = new BitSet(256); | |
// Static initializer for disallowed_rel_path | |
static { | |
disallowed_rel_path.or(uric); | |
disallowed_rel_path.andNot(rel_path); | |
} | |
/** | |
* Disallowed opaque_part before escaping. | |
*/ | |
public static final BitSet disallowed_opaque_part = new BitSet(256); | |
// Static initializer for disallowed_opaque_part | |
static { | |
disallowed_opaque_part.or(uric); | |
disallowed_opaque_part.andNot(opaque_part); | |
} | |
// ----------------------- Characters allowed within and for each component | |
/** | |
* Those characters that are allowed for the authority component. | |
*/ | |
public static final BitSet allowed_authority = new BitSet(256); | |
// Static initializer for allowed_authority | |
static { | |
allowed_authority.or(authority); | |
allowed_authority.clear('%'); | |
} | |
/** | |
* Those characters that are allowed for the opaque_part. | |
*/ | |
public static final BitSet allowed_opaque_part = new BitSet(256); | |
// Static initializer for allowed_opaque_part | |
static { | |
allowed_opaque_part.or(opaque_part); | |
allowed_opaque_part.clear('%'); | |
} | |
/** | |
* Those characters that are allowed for the reg_name. | |
*/ | |
public static final BitSet allowed_reg_name = new BitSet(256); | |
// Static initializer for allowed_reg_name | |
static { | |
allowed_reg_name.or(reg_name); | |
// allowed_reg_name.andNot(percent); | |
allowed_reg_name.clear('%'); | |
} | |
/** | |
* Those characters that are allowed for the userinfo component. | |
*/ | |
public static final BitSet allowed_userinfo = new BitSet(256); | |
// Static initializer for allowed_userinfo | |
static { | |
allowed_userinfo.or(userinfo); | |
// allowed_userinfo.andNot(percent); | |
allowed_userinfo.clear('%'); | |
} | |
/** | |
* Those characters that are allowed for within the userinfo component. | |
*/ | |
public static final BitSet allowed_within_userinfo = new BitSet(256); | |
// Static initializer for allowed_within_userinfo | |
static { | |
allowed_within_userinfo.or(within_userinfo); | |
allowed_within_userinfo.clear('%'); | |
} | |
/** | |
* Those characters that are allowed for the IPv6reference component. | |
* The characters '[', ']' in IPv6reference should be excluded. | |
*/ | |
public static final BitSet allowed_IPv6reference = new BitSet(256); | |
// Static initializer for allowed_IPv6reference | |
static { | |
allowed_IPv6reference.or(IPv6reference); | |
// allowed_IPv6reference.andNot(unwise); | |
allowed_IPv6reference.clear('['); | |
allowed_IPv6reference.clear(']'); | |
} | |
/** | |
* Those characters that are allowed for the host component. | |
* The characters '[', ']' in IPv6reference should be excluded. | |
*/ | |
public static final BitSet allowed_host = new BitSet(256); | |
// Static initializer for allowed_host | |
static { | |
allowed_host.or(hostname); | |
allowed_host.or(allowed_IPv6reference); | |
} | |
/** | |
* Those characters that are allowed for the authority component. | |
*/ | |
public static final BitSet allowed_within_authority = new BitSet(256); | |
// Static initializer for allowed_within_authority | |
static { | |
allowed_within_authority.or(server); | |
allowed_within_authority.or(reg_name); | |
allowed_within_authority.clear(';'); | |
allowed_within_authority.clear(':'); | |
allowed_within_authority.clear('@'); | |
allowed_within_authority.clear('?'); | |
allowed_within_authority.clear('/'); | |
} | |
/** | |
* Those characters that are allowed for the abs_path. | |
*/ | |
public static final BitSet allowed_abs_path = new BitSet(256); | |
// Static initializer for allowed_abs_path | |
static { | |
allowed_abs_path.or(abs_path); | |
// allowed_abs_path.set('/'); // aleady included | |
allowed_abs_path.andNot(percent); | |
allowed_abs_path.clear('+'); | |
} | |
/** | |
* Those characters that are allowed for the rel_path. | |
*/ | |
public static final BitSet allowed_rel_path = new BitSet(256); | |
// Static initializer for allowed_rel_path | |
static { | |
allowed_rel_path.or(rel_path); | |
allowed_rel_path.clear('%'); | |
allowed_rel_path.clear('+'); | |
} | |
/** | |
* Those characters that are allowed within the path. | |
*/ | |
public static final BitSet allowed_within_path = new BitSet(256); | |
// Static initializer for allowed_within_path | |
static { | |
allowed_within_path.or(abs_path); | |
allowed_within_path.clear('/'); | |
allowed_within_path.clear(';'); | |
allowed_within_path.clear('='); | |
allowed_within_path.clear('?'); | |
} | |
/** | |
* Those characters that are allowed for the query component. | |
*/ | |
public static final BitSet allowed_query = new BitSet(256); | |
// Static initializer for allowed_query | |
static { | |
allowed_query.or(uric); | |
allowed_query.clear('%'); | |
} | |
/** | |
* Those characters that are allowed within the query component. | |
*/ | |
public static final BitSet allowed_within_query = new BitSet(256); | |
// Static initializer for allowed_within_query | |
static { | |
allowed_within_query.or(allowed_query); | |
allowed_within_query.andNot(reserved); // excluded 'reserved' | |
} | |
/** | |
* Those characters that are allowed for the fragment component. | |
*/ | |
public static final BitSet allowed_fragment = new BitSet(256); | |
// Static initializer for allowed_fragment | |
static { | |
allowed_fragment.or(uric); | |
allowed_fragment.clear('%'); | |
} | |
public static class URIException extends RuntimeException { | |
/** | |
* | |
*/ | |
private static final long serialVersionUID = 1L; | |
public URIException() { | |
super(); | |
// TODO Auto-generated constructor stub | |
} | |
public URIException(String message, Throwable cause) { | |
super(message, cause); | |
// TODO Auto-generated constructor stub | |
} | |
public URIException(String message) { | |
super(message); | |
// TODO Auto-generated constructor stub | |
} | |
public URIException(Throwable cause) { | |
super(cause); | |
// TODO Auto-generated constructor stub | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment