Last active
February 13, 2022 05:17
-
-
Save aparx/1e37f55c8ce65e2bf368d69aa089eb80 to your computer and use it in GitHub Desktop.
Escapes regex characters in a string relatively fast.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package io.github.sauranbone.plang; | |
import org.apache.commons.lang3.StringUtils; | |
import java.util.regex.Pattern; | |
/** | |
* @author Vinzent Zeband | |
* @version 06:07 CET, 13.02.2022 | |
* @since 1.0 | |
*/ | |
public class RegexEscaping { | |
/** | |
* All special characters of regular expression. | |
*/ | |
public static final String REGEX_SPECIAL_CHARACTERS = "<([{\\^-=$!|]})?*+.>"; | |
/** | |
* This is equivalent to commons-lang StringUtils#EMPTY. | |
**/ | |
private static final String EMPTY_STRING = ""; | |
/** | |
* Escapes every character in {@code str} that is one of regular | |
* expression special characters and returns the mutated and regex | |
* escaped string. | |
* <p>If {@code str} is null or empty, an empty string is returned. | |
* | |
* @param str the string to be escaped | |
* @return the escaped string | |
* @apiNote Alternative to {@link Pattern#quote(String)}. | |
* @see #REGEX_SPECIAL_CHARACTERS | |
*/ | |
public static String escapeRegex(String str) { | |
if (str == null || str.isEmpty()) | |
return EMPTY_STRING; | |
final int n = str.length(); | |
final char escp = '\\'; | |
if (n == 1) { | |
//Fastpath regex escaping for single character | |
final char cp = str.charAt(0); | |
return (isSpecialRegexCharacter(cp) | |
? String.valueOf(escp) : EMPTY_STRING) | |
+ cp; | |
} | |
StringBuilder builder = new StringBuilder(n); | |
for (int i = 0; i < n; i++) { | |
char ch = str.charAt(i); | |
if (isSpecialRegexCharacter(ch)) { | |
//Append escaping to this character | |
builder.append(escp); | |
} | |
builder.append(ch); | |
} | |
return builder.toString(); | |
} | |
/** | |
* Returns true if {@code ch} is one of the regular expressions special | |
* characters that are defined in this constant. | |
* | |
* @param ch the character to test | |
* @return false if {@code ch} is not a special character that can be | |
* escaped | |
* @see #REGEX_SPECIAL_CHARACTERS | |
*/ | |
public static boolean isSpecialRegexCharacter(char ch) { | |
if (ch < 21 || ch > 0x7d) | |
return false; | |
return REGEX_SPECIAL_CHARACTERS.indexOf(ch) != -1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment