Created
October 25, 2012 20:44
-
-
Save juanplopes/3955273 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.intelie.lognit.util.regex; | |
import java.util.LinkedHashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.regex.MatchResult; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class NamedMatcher implements NamedMatchResult { | |
private Matcher matcher; | |
private NamedPattern parentPattern; | |
NamedMatcher() { | |
} | |
NamedMatcher(NamedPattern parentPattern, MatchResult matcher) { | |
this.parentPattern = parentPattern; | |
this.matcher = (Matcher) matcher; | |
} | |
NamedMatcher(NamedPattern parentPattern, CharSequence input) { | |
this.parentPattern = parentPattern; | |
this.matcher = parentPattern.pattern().matcher(input); | |
} | |
public Pattern standardPattern() { | |
return matcher.pattern(); | |
} | |
public NamedPattern namedPattern() { | |
return parentPattern; | |
} | |
public NamedMatcher usePattern(NamedPattern newPattern) { | |
this.parentPattern = newPattern; | |
matcher.usePattern(newPattern.pattern()); | |
return this; | |
} | |
public NamedMatcher reset() { | |
matcher.reset(); | |
return this; | |
} | |
public NamedMatcher reset(CharSequence input) { | |
matcher.reset(input); | |
return this; | |
} | |
public boolean matches() { | |
return matcher.matches(); | |
} | |
public NamedMatchResult toMatchResult() { | |
return new NamedMatcher(this.parentPattern, matcher.toMatchResult()); | |
} | |
public boolean find() { | |
return matcher.find(); | |
} | |
public boolean find(int start) { | |
return matcher.find(start); | |
} | |
public boolean lookingAt() { | |
return matcher.lookingAt(); | |
} | |
public NamedMatcher appendReplacement(StringBuffer sb, String replacement) { | |
matcher.appendReplacement(sb, replacement); | |
return this; | |
} | |
public StringBuffer appendTail(StringBuffer sb) { | |
return matcher.appendTail(sb); | |
} | |
public String group() { | |
return matcher.group(); | |
} | |
public String group(int group) { | |
return matcher.group(group); | |
} | |
public int groupCount() { | |
return matcher.groupCount(); | |
} | |
public String group(String groupName) { | |
return group(groupIndex(groupName)); | |
} | |
public Map<String, String> namedGroups() { | |
Map<String, String> result = new LinkedHashMap<String, String>(); | |
List<String> groupNames = parentPattern.groupNames(); | |
for (int i = 0; i < groupCount() && i < groupNames.size(); i++) { | |
String groupName = groupNames.get(i); | |
if (groupName == null) continue; | |
String groupValue = matcher.group(i + 1); | |
result.put(groupName, groupValue); | |
} | |
return result; | |
} | |
private int groupIndex(String groupName) { | |
return parentPattern.groupIndex(groupName) + 1; | |
} | |
public int start() { | |
return matcher.start(); | |
} | |
public int start(int group) { | |
return matcher.start(group); | |
} | |
public int start(String groupName) { | |
return start(groupIndex(groupName)); | |
} | |
public int end() { | |
return matcher.end(); | |
} | |
public int end(int group) { | |
return matcher.end(group); | |
} | |
public int end(String groupName) { | |
return end(groupIndex(groupName)); | |
} | |
public NamedMatcher region(int start, int end) { | |
matcher.region(start, end); | |
return this; | |
} | |
public int regionEnd() { | |
return matcher.regionEnd(); | |
} | |
public int regionStart() { | |
return matcher.regionStart(); | |
} | |
public boolean hitEnd() { | |
return matcher.hitEnd(); | |
} | |
public boolean requireEnd() { | |
return matcher.requireEnd(); | |
} | |
public boolean hasAnchoringBounds() { | |
return matcher.hasAnchoringBounds(); | |
} | |
public boolean hasTransparentBounds() { | |
return matcher.hasTransparentBounds(); | |
} | |
public String replaceAll(String replacement) { | |
return matcher.replaceAll(replacement); | |
} | |
public String replaceFirst(String replacement) { | |
return matcher.replaceFirst(replacement); | |
} | |
public NamedMatcher useAnchoringBounds(boolean b) { | |
matcher.useAnchoringBounds(b); | |
return this; | |
} | |
public NamedMatcher useTransparentBounds(boolean b) { | |
matcher.useTransparentBounds(b); | |
return this; | |
} | |
public String toString() { | |
return matcher.toString(); | |
} | |
public Matcher underlyingMatcher() { | |
return matcher; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.intelie.lognit.util.regex; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class NamedPattern { | |
private static final Logger logger = LoggerFactory.getLogger(NamedPattern.class); | |
private static final Pattern NAMED_GROUP_PATTERN = Pattern.compile("(?<!\\\\)\\((\\?<(\\w+)>)?"); | |
private Pattern pattern; | |
private String namedPattern; | |
private List<String> groupNames; | |
private Map<String, Integer> groupIndices; | |
public static NamedPattern compile(String regex) { | |
return new NamedPattern(regex, 0); | |
} | |
public static NamedPattern compile(String regex, int flags) { | |
return new NamedPattern(regex, flags); | |
} | |
private NamedPattern(String regex, int flags) { | |
namedPattern = regex; | |
pattern = buildStandardPattern(regex, flags); | |
groupNames = extractGroupNames(regex); | |
groupIndices = extractGroupIndices(groupNames); | |
} | |
public int flags() { | |
return pattern.flags(); | |
} | |
public NamedMatcher matcher(CharSequence input) { | |
return new NamedMatcher(this, input); | |
} | |
Pattern pattern() { | |
return pattern; | |
} | |
public String standardPattern() { | |
return pattern.pattern(); | |
} | |
public String namedPattern() { | |
return namedPattern; | |
} | |
public List<String> groupNames() { | |
return groupNames; | |
} | |
public int groupIndex(String name) { | |
Integer index = groupIndices.get(name); | |
if (index == null) index = -1; | |
return index; | |
} | |
public String[] split(CharSequence input, int limit) { | |
return pattern.split(input, limit); | |
} | |
public String[] split(CharSequence input) { | |
return pattern.split(input); | |
} | |
public String toString() { | |
return namedPattern; | |
} | |
static List<String> extractGroupNames(String namedPattern) { | |
List<String> groupNames = new ArrayList<String>(); | |
Matcher matcher = NAMED_GROUP_PATTERN.matcher(namedPattern); | |
while (matcher.find()) { | |
groupNames.add(matcher.group(2)); | |
} | |
return groupNames; | |
} | |
static Map<String, Integer> extractGroupIndices(List<String> groups) { | |
Map<String, Integer> map = new HashMap<String, Integer>(); | |
for (int i = 0; i < groups.size(); i++) | |
map.put(groups.get(i), i); | |
return map; | |
} | |
static Pattern buildStandardPattern(String namedPattern, int flags) { | |
return Pattern.compile(NAMED_GROUP_PATTERN.matcher(namedPattern).replaceAll("("), flags); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.intelie.lognit.util.regex; | |
import org.junit.Test; | |
import static org.fest.assertions.Assertions.assertThat; | |
public class NamedPatternExamplesTest { | |
@Test | |
public void whenExtractingUsingPatternWithNoNamedGroup() { | |
NamedPattern pattern = NamedPattern.compile("a(bcde)"); | |
NamedMatcher matcher = pattern.matcher("abcde"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0, 5, "abcde"); | |
assertIndexed(1, matcher, 1, 5, "bcde"); | |
assertThat(matcher.groupCount()).isEqualTo(1); | |
assertThat(matcher.namedGroups()).isEmpty(); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithOneNamedGroup() { | |
NamedPattern pattern = NamedPattern.compile("(a)(?<test>bcde)"); | |
NamedMatcher matcher = pattern.matcher("abcde"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0, 5, "abcde"); | |
assertIndexed(1, matcher, 0, 1, "a"); | |
assertIndexed(2, matcher, 1, 5, "bcde"); | |
assertNamed("test", matcher, 1, 5, "bcde"); | |
assertThat(matcher.namedGroups().size()).isEqualTo(1); | |
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcde"); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithOneRepeatingNamedGroup() { | |
NamedPattern pattern = NamedPattern.compile("(a)(?<test>bcd[ef])*"); | |
NamedMatcher matcher = pattern.matcher("abcdebcdf"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0, 9, "abcdebcdf"); | |
assertIndexed(1, matcher, 0, 1, "a"); | |
assertIndexed(2, matcher, 5, 9, "bcdf"); | |
assertNamed("test", matcher, 5, 9, "bcdf"); | |
assertThat(matcher.namedGroups().size()).isEqualTo(1); | |
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcdf"); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithMoreThanOneNamedGroup() { | |
NamedPattern pattern = NamedPattern.compile("(a)(?<test>bcde)"); | |
NamedMatcher matcher = pattern.matcher("abcdeabcde"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0, 5, "abcde"); | |
assertIndexed(1, matcher, 0, 1, "a"); | |
assertIndexed(2, matcher, 1, 5, "bcde"); | |
assertNamed("test", matcher, 1, 5, "bcde"); | |
assertThat(matcher.namedGroups().size()).isEqualTo(1); | |
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcde"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0 + 5, 5 + 5, "abcde"); | |
assertIndexed(1, matcher, 0 + 5, 1 + 5, "a"); | |
assertIndexed(2, matcher, 1 + 5, 5 + 5, "bcde"); | |
assertNamed("test", matcher, 1 + 5, 5 + 5, "bcde"); | |
assertThat(matcher.namedGroups().size()).isEqualTo(1); | |
assertThat(matcher.namedGroups().get("test")).isEqualTo("bcde"); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithMixedNamedAndUnamedGroups() { | |
NamedPattern pattern = NamedPattern.compile("(a)(?<test1>b)(c)d(?<test2>e)"); | |
NamedMatcher matcher = pattern.matcher("abcde"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0, 5, "abcde"); | |
assertIndexed(1, matcher, 0, 1, "a"); | |
assertIndexed(2, matcher, 1, 2, "b"); | |
assertIndexed(3, matcher, 2, 3, "c"); | |
assertIndexed(4, matcher, 4, 5, "e"); | |
assertNamed("test1", matcher, 1, 2, "b"); | |
assertNamed("test2", matcher, 4, 5, "e"); | |
assertThat(matcher.namedGroups().size()).isEqualTo(2); | |
assertThat(matcher.namedGroups().get("test1")).isEqualTo("b"); | |
assertThat(matcher.namedGroups().get("test2")).isEqualTo("e"); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithNestedNamedAndUnamedGroups() { | |
NamedPattern pattern = NamedPattern.compile("(a(?<test1>b(c)d(?<test2>e)))"); | |
NamedMatcher matcher = pattern.matcher("abcde"); | |
assertThat(matcher.find()).isTrue(); | |
assertDefault(matcher, 0, 5, "abcde"); | |
assertIndexed(1, matcher, 0, 5, "abcde"); | |
assertIndexed(2, matcher, 1, 5, "bcde"); | |
assertIndexed(3, matcher, 2, 3, "c"); | |
assertIndexed(4, matcher, 4, 5, "e"); | |
assertNamed("test1", matcher, 1, 5, "bcde"); | |
assertNamed("test2", matcher, 4, 5, "e"); | |
assertThat(matcher.namedGroups().size()).isEqualTo(2); | |
assertThat(matcher.namedGroups().get("test1")).isEqualTo("bcde"); | |
assertThat(matcher.namedGroups().get("test2")).isEqualTo("e"); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithSquareBrackets() { | |
NamedPattern pattern = NamedPattern.compile("(?<foo>[4]2)"); | |
NamedMatcher matcher = pattern.matcher("4"); | |
assertThat(matcher.find()).isFalse(); | |
matcher = pattern.matcher("2"); | |
assertThat(matcher.find()).isFalse(); | |
matcher = pattern.matcher("42"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().get("foo")).isEqualTo("42"); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithPlusQuantifier() { | |
NamedPattern pattern = NamedPattern.compile("(?<foo>[4]2)+"); | |
NamedMatcher matcher = pattern.matcher("424242"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().get("foo")).isEqualTo("42"); | |
matcher = pattern.matcher(""); | |
assertThat(matcher.find()).isFalse(); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithStarQuantifier() { | |
NamedPattern pattern = NamedPattern.compile("(?<foo>[4])*"); | |
NamedMatcher matcher = pattern.matcher(""); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().isEmpty()); | |
matcher = pattern.matcher("44444"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().get("foo")).isEqualTo("4"); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithDot() { | |
NamedPattern pattern = NamedPattern.compile("(?<foo>fo.)"); | |
NamedMatcher matcher = pattern.matcher("foC"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().get("foo")).isEqualTo("foC"); | |
matcher = pattern.matcher("foX"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().get("foo")).isEqualTo("foX"); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithDelimiters() { | |
NamedPattern pattern = NamedPattern.compile("^.*GET /(?<filename>(.*)).gif HTTP/(?<httpversion>[0-9][.][0-9])\" (?<code>[0-9]{3}).*$"); | |
NamedMatcher matcher = pattern.matcher("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().get("filename")).isEqualTo("apache_pb"); | |
assertThat(matcher.namedGroups().get("httpversion")).isEqualTo("1.0"); | |
assertThat(matcher.namedGroups().get("code")).isEqualTo("200"); | |
} | |
@Test | |
public void whenExtractingUsingPatternWithDanglingBrackets() { | |
NamedPattern pattern = NamedPattern.compile("ab<cde"); | |
NamedMatcher matcher = pattern.matcher("ab<cde"); | |
assertThat(matcher.find()).isTrue(); | |
} | |
@Test | |
public void whenExtractingGroupNamesFromRegexWithEscapedParenthesis() { | |
NamedPattern pattern = NamedPattern.compile("\\((?<prefix>\\?<(?<name>\\w+)>)?"); | |
NamedMatcher matcher = pattern.matcher("(?<test1>a)(?<test2>b)(c)(d)"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().size()).isEqualTo(2); | |
assertThat(matcher.namedGroups().get("prefix")).isEqualTo("?<test1>"); | |
assertThat(matcher.namedGroups().get("name")).isEqualTo("test1"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().size()).isEqualTo(2); | |
assertThat(matcher.namedGroups().get("prefix")).isEqualTo("?<test2>"); | |
assertThat(matcher.namedGroups().get("name")).isEqualTo("test2"); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().size()).isEqualTo(2); | |
assertThat(matcher.namedGroups().get("prefix")).isEqualTo(null); | |
assertThat(matcher.namedGroups().get("name")).isEqualTo(null); | |
assertThat(matcher.find()).isTrue(); | |
assertThat(matcher.namedGroups().size()).isEqualTo(2); | |
assertThat(matcher.namedGroups().get("prefix")).isEqualTo(null); | |
assertThat(matcher.namedGroups().get("name")).isEqualTo(null); | |
assertThat(matcher.find()).isFalse(); | |
} | |
private void assertDefault(NamedMatcher matcher, int start, int end, String value) { | |
assertThat(matcher.group()).isEqualTo(value); | |
assertThat(matcher.start()).isEqualTo(start); | |
assertThat(matcher.end()).isEqualTo(end); | |
} | |
private void assertIndexed(int index, NamedMatcher matcher, int start, int end, String value) { | |
assertThat(matcher.group(index)).isEqualTo(value); | |
assertThat(matcher.start(index)).isEqualTo(start); | |
assertThat(matcher.end(index)).isEqualTo(end); | |
} | |
private void assertNamed(String name, NamedMatcher matcher, int start, int end, String value) { | |
assertThat(matcher.group(name)).isEqualTo(value); | |
assertThat(matcher.start(name)).isEqualTo(start); | |
assertThat(matcher.end(name)).isEqualTo(end); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.intelie.lognit.util.regex; | |
import org.junit.Test; | |
import java.util.regex.Pattern; | |
import java.util.regex.PatternSyntaxException; | |
import static org.fest.assertions.Assertions.assertThat; | |
public class NamedPatternTest { | |
@Test | |
public void whenCreatingPatternWithoutGroupsItShouldHaveNoGroups() { | |
NamedPattern pattern = NamedPattern.compile("abcde"); | |
assertThat(pattern.groupNames()).isEmpty(); | |
} | |
@Test | |
public void whenCreatingPatternWithOneGroupItShouldHaveOneGroup() { | |
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)"); | |
assertThat(pattern.groupNames()).containsExactly("test"); | |
} | |
@Test | |
public void whenCreatingPatternWithOnlyOneNamedGroupItShouldIncludeUnnamedGroupsAlso() { | |
NamedPattern pattern = NamedPattern.compile("abc(d)e(?<test>bla)a(b)"); | |
assertThat(pattern.groupNames()).containsExactly(null, "test", null); | |
} | |
@Test | |
public void whenGettingGroupIndex() { | |
NamedPattern pattern = NamedPattern.compile("abc(d)e(?<test>bla)a(b)"); | |
assertThat(pattern.groupIndex(null)).isEqualTo(2); | |
assertThat(pattern.groupIndex("test")).isEqualTo(1); | |
assertThat(pattern.groupIndex("notfound")).isEqualTo(-1); | |
} | |
@Test | |
public void toStringReturnsNamedPattern() { | |
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)"); | |
assertThat(pattern.toString()).isEqualTo("abcde(?<test>bla)"); | |
} | |
@Test | |
public void willNotDetectEscapedParethesisGroups() { | |
NamedPattern pattern = NamedPattern.compile("abcde\\(?<test>bla\\)"); | |
assertThat(pattern.groupNames()).isEmpty(); | |
} | |
@Test | |
public void whenCreatingPatternWithOneNamedGroupItShouldHaveOneGroup() { | |
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)"); | |
assertThat(pattern.groupNames()).containsSequence("test"); | |
} | |
@Test | |
public void underlyingPatternShouldBeEqualToParameterWithoutGroupNames() { | |
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)", Pattern.CASE_INSENSITIVE); | |
assertThat(pattern.pattern().pattern()).isEqualTo("abcde(bla)f(qwe)"); | |
} | |
@Test | |
public void shouldKeepOriginalNamedPattern() { | |
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)", Pattern.CASE_INSENSITIVE); | |
assertThat(pattern.namedPattern()).isEqualTo("abcde(?<test>bla)f(qwe)"); | |
assertThat(pattern.standardPattern()).isEqualTo("abcde(bla)f(qwe)"); | |
} | |
@Test | |
public void whenSplitingWithNamedGroup() { | |
NamedPattern pattern = NamedPattern.compile("a(?<test>b)c", Pattern.CASE_INSENSITIVE); | |
assertThat(pattern.split("qqqabcqqqabczzz")).isEqualTo(new String[]{"qqq", "qqq", "zzz"}); | |
} | |
@Test | |
public void whenSplitingWithNamedGroupWithLimit() { | |
NamedPattern pattern = NamedPattern.compile("a(?<test>b)c", Pattern.CASE_INSENSITIVE); | |
assertThat(pattern.split("qqqabcqqqabczzz", 2)).isEqualTo(new String[]{"qqq", "qqqabczzz"}); | |
} | |
@Test | |
public void whenCreatingPatternWithFlagsUnderlyingPatternShouldHaveSameFlags() { | |
NamedPattern pattern = NamedPattern.compile("abcde(?<test>bla)f(qwe)", Pattern.CASE_INSENSITIVE); | |
assertThat(pattern.flags()).isEqualTo(Pattern.CASE_INSENSITIVE); | |
assertThat(pattern.pattern().flags()).isEqualTo(Pattern.CASE_INSENSITIVE); | |
} | |
@Test(expected = PatternSyntaxException.class) | |
public void whenCreatingMalformedPattern() { | |
NamedPattern pattern = NamedPattern.compile("ab(cde"); | |
} | |
@Test | |
public void whenCreatingPatternWithDanglingBrackets() { | |
NamedPattern pattern = NamedPattern.compile("ab<cde"); | |
assertThat(pattern.groupNames()).isEmpty(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment