Last active
August 19, 2019 16:57
-
-
Save tcw165/18e495073bd70fd14ff9aa7fc07b85f7 to your computer and use it in GitHub Desktop.
An automata matcher inspired by https://leetcode.com/problems/regular-expression-matching/. However, the problem is not a typical Regex problem.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Solution { | |
// Example: | |
// Input: | |
// search string: "missia" | |
// pattern string: "mis*is*p*." | |
// Output: | |
// Regex DFA graph: ["m", "i", "s*", "i", "s*", "p*", "."] | |
// [0]=m vs m | |
// [1]=i vs i | |
// [2]=s vs s* // optional 's' | |
// [3]=s vs s* | |
// [4]=i vs s* | |
// [4]=i vs i | |
// [5]=a vs s* | |
// [5]=a vs p* // optional 'p' | |
// [5]=a vs . | |
// Pattern match completes for [0..6], state=7 | |
// ans: matched! | |
// Note: This solution doesn't pass the test because the solution tries to | |
// match PARTIAL string with the pattern. | |
// However, the problem requires to match the ENTIRE string. | |
public boolean isMatch(String s, String p) { | |
final List<String> graph = createDFAGraph(p); | |
System.out.println(String.format("Regex DFA graph: %s", graph)); | |
final MatchResult result = traverseWithDFA(s, 0, graph); | |
return (result.matched && | |
result.endAt >= s.length()); | |
} | |
/** | |
* Represent the graph as a list of expression string. | |
* e.g., ["m","i","s*","."] | |
*/ | |
private List<String> createDFAGraph( | |
String p | |
) { | |
final List<String> graph = new ArrayList<String>(); | |
int end = p.length(); | |
final int lastIndex = p.length() - 1; | |
for (int i = lastIndex; i >= 0; --i) { | |
final char c = p.charAt(i); | |
if (c >= 'a' && c <= 'z' || c == '.') { | |
graph.add(0, p.substring(i, end)); | |
end = i; | |
} else if (c == '*') { | |
// No-op | |
} else { | |
throw new IllegalArgumentException(); | |
} | |
} | |
return graph; | |
} | |
private MatchResult traverseWithDFA( | |
String s, | |
int start, | |
List<String> graph | |
) { | |
// Use an integer to represent the state. | |
int state = 0; | |
// The cursor to the search string. | |
int i = start; | |
while (i < s.length() && | |
state < graph.size()) { | |
final char c = s.charAt(i); | |
final String expr = graph.get(state); | |
final char actualExpr = expr.charAt(0); | |
System.out.println(String.format("[%d]=%s vs %s", i, c, expr)); | |
if (expr.length() == 1) { | |
// Case as an alphabet. | |
if (actualExpr == '.' || actualExpr == c) { | |
++i; | |
// Transition to next state. | |
++state; | |
} else { | |
break; | |
} | |
} else { | |
// Case as optional alphabet, e.g., "p*" or ".*". | |
if (actualExpr == '.' || actualExpr == c) { | |
++i; | |
if (i >= s.length()) { | |
// Advance the state as we visit all the characters of the | |
// string to denote we actually consume the final state! | |
++state; | |
} | |
} else { | |
// Not found, transition to next state since this expression | |
// is optional. | |
++state; | |
} | |
} | |
} | |
// Check if we consume all the state in the DFA graph. | |
final boolean matched = state >= graph.size(); | |
final int endAt = i; | |
System.out.println(String.format("Pattern match completes for [%d..%d], state=%d", start, endAt, state)); | |
return new MatchResult(matched, endAt); | |
} | |
private static class MatchResult { | |
final boolean matched; | |
final int endAt; | |
MatchResult( | |
boolean matched, | |
int endAt | |
) { | |
this.matched = matched; | |
this.endAt = endAt; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment