Skip to content

Instantly share code, notes, and snippets.

@juanplopes
Created October 25, 2012 20:44
Show Gist options
  • Save juanplopes/3955273 to your computer and use it in GitHub Desktop.
Save juanplopes/3955273 to your computer and use it in GitHub Desktop.
package net.intelie.lognit.util.regex;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NamedMatcher implements NamedMatchResult {
private Matcher matcher;
private NamedPattern parentPattern;
NamedMatcher() {
}
NamedMatcher(NamedPattern parentPattern, MatchResult matcher) {
this.parentPattern = parentPattern;
this.matcher = (Matcher) matcher;
}
NamedMatcher(NamedPattern parentPattern, CharSequence input) {
this.parentPattern = parentPattern;
this.matcher = parentPattern.pattern().matcher(input);
}
public Pattern standardPattern() {
return matcher.pattern();
}
public NamedPattern namedPattern() {
return parentPattern;
}
public NamedMatcher usePattern(NamedPattern newPattern) {
this.parentPattern = newPattern;
matcher.usePattern(newPattern.pattern());
return this;
}
public NamedMatcher reset() {
matcher.reset();
return this;
}
public NamedMatcher reset(CharSequence input) {
matcher.reset(input);
return this;
}
public boolean matches() {
return matcher.matches();
}
public NamedMatchResult toMatchResult() {
return new NamedMatcher(this.parentPattern, matcher.toMatchResult());
}
public boolean find() {
return matcher.find();
}
public boolean find(int start) {
return matcher.find(start);
}
public boolean lookingAt() {
return matcher.lookingAt();
}
public NamedMatcher appendReplacement(StringBuffer sb, String replacement) {
matcher.appendReplacement(sb, replacement);
return this;
}
public StringBuffer appendTail(StringBuffer sb) {
return matcher.appendTail(sb);
}
public String group() {
return matcher.group();
}
public String group(int group) {
return matcher.group(group);
}
public int groupCount() {
return matcher.groupCount();
}
public String group(String groupName) {
return group(groupIndex(groupName));
}
public Map<String, String> namedGroups() {
Map<String, String> result = new LinkedHashMap<String, String>();
List<String> groupNames = parentPattern.groupNames();
for (int i = 0; i < groupCount() && i < groupNames.size(); i++) {
String groupName = groupNames.get(i);
if (groupName == null) continue;
String groupValue = matcher.group(i + 1);
result.put(groupName, groupValue);
}
return result;
}
private int groupIndex(String groupName) {
return parentPattern.groupIndex(groupName) + 1;
}
public int start() {
return matcher.start();
}
public int start(int group) {
return matcher.start(group);
}
public int start(String groupName) {
return start(groupIndex(groupName));
}
public int end() {
return matcher.end();
}
public int end(int group) {
return matcher.end(group);
}
public int end(String groupName) {
return end(groupIndex(groupName));
}
public NamedMatcher region(int start, int end) {
matcher.region(start, end);
return this;
}
public int regionEnd() {
return matcher.regionEnd();
}
public int regionStart() {
return matcher.regionStart();
}
public boolean hitEnd() {
return matcher.hitEnd();
}
public boolean requireEnd() {
return matcher.requireEnd();
}
public boolean hasAnchoringBounds() {
return matcher.hasAnchoringBounds();
}
public boolean hasTransparentBounds() {
return matcher.hasTransparentBounds();
}
public String replaceAll(String replacement) {
return matcher.replaceAll(replacement);
}
public String replaceFirst(String replacement) {
return matcher.replaceFirst(replacement);
}
public NamedMatcher useAnchoringBounds(boolean b) {
matcher.useAnchoringBounds(b);
return this;
}
public NamedMatcher useTransparentBounds(boolean b) {
matcher.useTransparentBounds(b);
return this;
}
public String toString() {
return matcher.toString();
}
public Matcher underlyingMatcher() {
return matcher;
}
}
package net.intelie.lognit.util.regex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NamedPattern {
private static final Logger logger = LoggerFactory.getLogger(NamedPattern.class);
private static final Pattern NAMED_GROUP_PATTERN = Pattern.compile("(?<!\\\\)\\((\\?<(\\w+)>)?");
private Pattern pattern;
private String namedPattern;
private List<String> groupNames;
private Map<String, Integer> groupIndices;
public static NamedPattern compile(String regex) {
return new NamedPattern(regex, 0);
}
public static NamedPattern compile(String regex, int flags) {
return new NamedPattern(regex, flags);
}
private NamedPattern(String regex, int flags) {
namedPattern = regex;
pattern = buildStandardPattern(regex, flags);
groupNames = extractGroupNames(regex);
groupIndices = extractGroupIndices(groupNames);
}
public int flags() {
return pattern.flags();
}
public NamedMatcher matcher(CharSequence input) {
return new NamedMatcher(this, input);
}
Pattern pattern() {
return pattern;
}
public String standardPattern() {
return pattern.pattern();
}
public String namedPattern() {
return namedPattern;
}
public List<String> groupNames() {
return groupNames;
}
public int groupIndex(String name) {
Integer index = groupIndices.get(name);
if (index == null) index = -1;
return index;
}
public String[] split(CharSequence input, int limit) {
return pattern.split(input, limit);
}
public String[] split(CharSequence input) {
return pattern.split(input);
}
public String toString() {
return namedPattern;
}
static List<String> extractGroupNames(String namedPattern) {
List<String> groupNames = new ArrayList<String>();
Matcher matcher = NAMED_GROUP_PATTERN.matcher(namedPattern);
while (matcher.find()) {
groupNames.add(matcher.group(2));
}
return groupNames;
}
static Map<String, Integer> extractGroupIndices(List<String> groups) {
Map<String, Integer> map = new HashMap<String, Integer>();
for (int i = 0; i < groups.size(); i++)
map.put(groups.get(i), i);
return map;
}
static Pattern buildStandardPattern(String namedPattern, int flags) {
return Pattern.compile(NAMED_GROUP_PATTERN.matcher(namedPattern).replaceAll("("), flags);
}
}
package net.intelie.lognit.util.regex;
import org.junit.Test;
import static org.fest.assertions.Assertions.assertThat;
public class NamedPatternExamplesTest {
@Test
public void whenExtractingUsingPatternWithNoNamedGroup() {
NamedPattern pattern = NamedPattern.compile("a(bcde)");
NamedMatcher matcher = pattern.matcher("abcde");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0, 5, "abcde");
assertIndexed(1, matcher, 1, 5, "bcde");
assertThat(matcher.groupCount()).isEqualTo(1);
assertThat(matcher.namedGroups()).isEmpty();
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithOneNamedGroup() {
NamedPattern pattern = NamedPattern.compile("(a)(?<test>bcde)");
NamedMatcher matcher = pattern.matcher("abcde");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0, 5, "abcde");
assertIndexed(1, matcher, 0, 1, "a");
assertIndexed(2, matcher, 1, 5, "bcde");
assertNamed("test", matcher, 1, 5, "bcde");
assertThat(matcher.namedGroups().size()).isEqualTo(1);
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcde");
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithOneRepeatingNamedGroup() {
NamedPattern pattern = NamedPattern.compile("(a)(?<test>bcd[ef])*");
NamedMatcher matcher = pattern.matcher("abcdebcdf");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0, 9, "abcdebcdf");
assertIndexed(1, matcher, 0, 1, "a");
assertIndexed(2, matcher, 5, 9, "bcdf");
assertNamed("test", matcher, 5, 9, "bcdf");
assertThat(matcher.namedGroups().size()).isEqualTo(1);
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcdf");
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithMoreThanOneNamedGroup() {
NamedPattern pattern = NamedPattern.compile("(a)(?<test>bcde)");
NamedMatcher matcher = pattern.matcher("abcdeabcde");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0, 5, "abcde");
assertIndexed(1, matcher, 0, 1, "a");
assertIndexed(2, matcher, 1, 5, "bcde");
assertNamed("test", matcher, 1, 5, "bcde");
assertThat(matcher.namedGroups().size()).isEqualTo(1);
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcde");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0 + 5, 5 + 5, "abcde");
assertIndexed(1, matcher, 0 + 5, 1 + 5, "a");
assertIndexed(2, matcher, 1 + 5, 5 + 5, "bcde");
assertNamed("test", matcher, 1 + 5, 5 + 5, "bcde");
assertThat(matcher.namedGroups().size()).isEqualTo(1);
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcde");
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithMixedNamedAndUnamedGroups() {
NamedPattern pattern = NamedPattern.compile("(a)(?<test1>b)(c)d(?<test2>e)");
NamedMatcher matcher = pattern.matcher("abcde");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0, 5, "abcde");
assertIndexed(1, matcher, 0, 1, "a");
assertIndexed(2, matcher, 1, 2, "b");
assertIndexed(3, matcher, 2, 3, "c");
assertIndexed(4, matcher, 4, 5, "e");
assertNamed("test1", matcher, 1, 2, "b");
assertNamed("test2", matcher, 4, 5, "e");
assertThat(matcher.namedGroups().size()).isEqualTo(2);
assertThat(matcher.namedGroups().get("test1")).isEqualTo("b");
assertThat(matcher.namedGroups().get("test2")).isEqualTo("e");
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithNestedNamedAndUnamedGroups() {
NamedPattern pattern = NamedPattern.compile("(a(?<test1>b(c)d(?<test2>e)))");
NamedMatcher matcher = pattern.matcher("abcde");
assertThat(matcher.find()).isTrue();
assertDefault(matcher, 0, 5, "abcde");
assertIndexed(1, matcher, 0, 5, "abcde");
assertIndexed(2, matcher, 1, 5, "bcde");
assertIndexed(3, matcher, 2, 3, "c");
assertIndexed(4, matcher, 4, 5, "e");
assertNamed("test1", matcher, 1, 5, "bcde");
assertNamed("test2", matcher, 4, 5, "e");
assertThat(matcher.namedGroups().size()).isEqualTo(2);
assertThat(matcher.namedGroups().get("test1")).isEqualTo("bcde");
assertThat(matcher.namedGroups().get("test2")).isEqualTo("e");
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithSquareBrackets() {
NamedPattern pattern = NamedPattern.compile("(?<foo>[4]2)");
NamedMatcher matcher = pattern.matcher("4");
assertThat(matcher.find()).isFalse();
matcher = pattern.matcher("2");
assertThat(matcher.find()).isFalse();
matcher = pattern.matcher("42");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().get("foo")).isEqualTo("42");
}
@Test
public void whenExtractingUsingPatternWithPlusQuantifier() {
NamedPattern pattern = NamedPattern.compile("(?<foo>[4]2)+");
NamedMatcher matcher = pattern.matcher("424242");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().get("foo")).isEqualTo("42");
matcher = pattern.matcher("");
assertThat(matcher.find()).isFalse();
}
@Test
public void whenExtractingUsingPatternWithStarQuantifier() {
NamedPattern pattern = NamedPattern.compile("(?<foo>[4])*");
NamedMatcher matcher = pattern.matcher("");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().isEmpty());
matcher = pattern.matcher("44444");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().get("foo")).isEqualTo("4");
}
@Test
public void whenExtractingUsingPatternWithDot() {
NamedPattern pattern = NamedPattern.compile("(?<foo>fo.)");
NamedMatcher matcher = pattern.matcher("foC");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().get("foo")).isEqualTo("foC");
matcher = pattern.matcher("foX");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().get("foo")).isEqualTo("foX");
}
@Test
public void whenExtractingUsingPatternWithDelimiters() {
NamedPattern pattern = NamedPattern.compile("^.*GET /(?<filename>(.*)).gif HTTP/(?<httpversion>[0-9][.][0-9])\" (?<code>[0-9]{3}).*$");
NamedMatcher matcher = pattern.matcher("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().get("filename")).isEqualTo("apache_pb");
assertThat(matcher.namedGroups().get("httpversion")).isEqualTo("1.0");
assertThat(matcher.namedGroups().get("code")).isEqualTo("200");
}
@Test
public void whenExtractingUsingPatternWithDanglingBrackets() {
NamedPattern pattern = NamedPattern.compile("ab<cde");
NamedMatcher matcher = pattern.matcher("ab<cde");
assertThat(matcher.find()).isTrue();
}
@Test
public void whenExtractingGroupNamesFromRegexWithEscapedParenthesis() {
NamedPattern pattern = NamedPattern.compile("\\((?<prefix>\\?<(?<name>\\w+)>)?");
NamedMatcher matcher = pattern.matcher("(?<test1>a)(?<test2>b)(c)(d)");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().size()).isEqualTo(2);
assertThat(matcher.namedGroups().get("prefix")).isEqualTo("?<test1>");
assertThat(matcher.namedGroups().get("name")).isEqualTo("test1");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().size()).isEqualTo(2);
assertThat(matcher.namedGroups().get("prefix")).isEqualTo("?<test2>");
assertThat(matcher.namedGroups().get("name")).isEqualTo("test2");
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().size()).isEqualTo(2);
assertThat(matcher.namedGroups().get("prefix")).isEqualTo(null);
assertThat(matcher.namedGroups().get("name")).isEqualTo(null);
assertThat(matcher.find()).isTrue();
assertThat(matcher.namedGroups().size()).isEqualTo(2);
assertThat(matcher.namedGroups().get("prefix")).isEqualTo(null);
assertThat(matcher.namedGroups().get("name")).isEqualTo(null);
assertThat(matcher.find()).isFalse();
}
private void assertDefault(NamedMatcher matcher, int start, int end, String value) {
assertThat(matcher.group()).isEqualTo(value);
assertThat(matcher.start()).isEqualTo(start);
assertThat(matcher.end()).isEqualTo(end);
}
private void assertIndexed(int index, NamedMatcher matcher, int start, int end, String value) {
assertThat(matcher.group(index)).isEqualTo(value);
assertThat(matcher.start(index)).isEqualTo(start);
assertThat(matcher.end(index)).isEqualTo(end);
}
private void assertNamed(String name, NamedMatcher matcher, int start, int end, String value) {
assertThat(matcher.group(name)).isEqualTo(value);
assertThat(matcher.start(name)).isEqualTo(start);
assertThat(matcher.end(name)).isEqualTo(end);
}
}
package net.intelie.lognit.util.regex;
import org.junit.Test;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import static org.fest.assertions.Assertions.assertThat;
public class NamedPatternTest {
@Test
public void whenCreatingPatternWithoutGroupsItShouldHaveNoGroups() {
NamedPattern pattern = NamedPattern.compile("abcde");
assertThat(pattern.groupNames()).isEmpty();
}
@Test
public void whenCreatingPatternWithOneGroupItShouldHaveOneGroup() {
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)");
assertThat(pattern.groupNames()).containsExactly("test");
}
@Test
public void whenCreatingPatternWithOnlyOneNamedGroupItShouldIncludeUnnamedGroupsAlso() {
NamedPattern pattern = NamedPattern.compile("abc(d)e(?<test>bla)a(b)");
assertThat(pattern.groupNames()).containsExactly(null, "test", null);
}
@Test
public void whenGettingGroupIndex() {
NamedPattern pattern = NamedPattern.compile("abc(d)e(?<test>bla)a(b)");
assertThat(pattern.groupIndex(null)).isEqualTo(2);
assertThat(pattern.groupIndex("test")).isEqualTo(1);
assertThat(pattern.groupIndex("notfound")).isEqualTo(-1);
}
@Test
public void toStringReturnsNamedPattern() {
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)");
assertThat(pattern.toString()).isEqualTo("abcde(?<test>bla)");
}
@Test
public void willNotDetectEscapedParethesisGroups() {
NamedPattern pattern = NamedPattern.compile("abcde\\(?<test>bla\\)");
assertThat(pattern.groupNames()).isEmpty();
}
@Test
public void whenCreatingPatternWithOneNamedGroupItShouldHaveOneGroup() {
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)");
assertThat(pattern.groupNames()).containsSequence("test");
}
@Test
public void underlyingPatternShouldBeEqualToParameterWithoutGroupNames() {
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)", Pattern.CASE_INSENSITIVE);
assertThat(pattern.pattern().pattern()).isEqualTo("abcde(bla)f(qwe)");
}
@Test
public void shouldKeepOriginalNamedPattern() {
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)", Pattern.CASE_INSENSITIVE);
assertThat(pattern.namedPattern()).isEqualTo("abcde(?<test>bla)f(qwe)");
assertThat(pattern.standardPattern()).isEqualTo("abcde(bla)f(qwe)");
}
@Test
public void whenSplitingWithNamedGroup() {
NamedPattern pattern = NamedPattern.compile("a(?<test>b)c", Pattern.CASE_INSENSITIVE);
assertThat(pattern.split("qqqabcqqqabczzz")).isEqualTo(new String[]{"qqq", "qqq", "zzz"});
}
@Test
public void whenSplitingWithNamedGroupWithLimit() {
NamedPattern pattern = NamedPattern.compile("a(?<test>b)c", Pattern.CASE_INSENSITIVE);
assertThat(pattern.split("qqqabcqqqabczzz", 2)).isEqualTo(new String[]{"qqq", "qqqabczzz"});
}
@Test
public void whenCreatingPatternWithFlagsUnderlyingPatternShouldHaveSameFlags() {
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)", Pattern.CASE_INSENSITIVE);
assertThat(pattern.flags()).isEqualTo(Pattern.CASE_INSENSITIVE);
assertThat(pattern.pattern().flags()).isEqualTo(Pattern.CASE_INSENSITIVE);
}
@Test(expected = PatternSyntaxException.class)
public void whenCreatingMalformedPattern() {
NamedPattern pattern = NamedPattern.compile("ab(cde");
}
@Test
public void whenCreatingPatternWithDanglingBrackets() {
NamedPattern pattern = NamedPattern.compile("ab<cde");
assertThat(pattern.groupNames()).isEmpty();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment