Last active
January 6, 2021 13:31
-
-
Save lukbukkit/49778422dbd799ab6e3eddded6b782ed to your computer and use it in GitHub Desktop.
Advent of Code 2020 Day 19 - Implementation using a Earley Parser - https://adventofcode.com/2020/day/19
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"advent-of-code/common" | |
"log" | |
"strings" | |
) | |
const inputFile = "day19/input.txt" | |
// We're using a Earley Parser | |
// - https://en.wikipedia.org/wiki/Earley_parser | |
// - http://www.brawer.ch/prolog/earley.pdf | |
// -> https://loup-vaillant.fr/tutorials/earley-parsing/ | |
// We could improve the parser by checking for empty rules, but our input won't have empty rules | |
// https://loup-vaillant.fr/tutorials/earley-parsing/empty-rules | |
type Symbol struct { | |
text string | |
terminal bool | |
} | |
func (s Symbol) equal(other Symbol) bool { | |
return s.terminal == other.terminal && s.text == other.text | |
} | |
type Rule struct { | |
name string | |
symbols []Symbol | |
} | |
type Grammar struct { | |
rules []Rule | |
startRule string | |
} | |
type EarleyItem struct { | |
rule int | |
next int | |
start int | |
} | |
// Compares two items for equality (needed for safe append) | |
func (i EarleyItem) equal(other EarleyItem) bool { | |
return i.rule == other.rule && i.start == other.start && i.next == other.next | |
} | |
type EarleySet []EarleyItem | |
// Adds an item at the end of the Earley set | |
func (set EarleySet) unsafeAppend(item EarleyItem) EarleySet { | |
return append(set, item) | |
} | |
// Adds an item at the end of the Earley set, **unless already present** | |
func (set EarleySet) append(item EarleyItem) EarleySet { | |
for _, setItem := range set { | |
if item.equal(setItem) { | |
return set | |
} | |
} | |
return append(set, item) | |
} | |
type EarleyParser struct { | |
grammar *Grammar | |
input []Symbol | |
} | |
func (e *EarleyParser) Recognizes() bool { | |
S := e.buildItems() | |
// e.print(S) | |
// e.diagnose(S) | |
return e.hasCompleteParse(S) | |
} | |
func (e *EarleyParser) buildItems() []EarleySet { | |
// Earley sets | |
S := make([]EarleySet, 1) | |
for i := range S { | |
S[i] = make(EarleySet, 0) | |
} | |
// put start item(s) in S[0] | |
for i := 0; i < len(e.grammar.rules); i++ { | |
rule := e.grammar.rules[i] | |
if rule.name == e.grammar.startRule { | |
S[0] = S[0].unsafeAppend(EarleyItem{rule: i, start: 0, next: 0}) | |
} | |
} | |
// populate the rest of S[i] | |
for i := 0; i < len(S); i++ { | |
for j := 0; j < len(S[i]); j++ { | |
symbol, next := e.nextSymbol(S[i][j]) | |
if !next { | |
S = e.complete(S, i, j) | |
} else if symbol.terminal { | |
S = e.scan(S, i, j, *symbol) | |
} else if !symbol.terminal { | |
S = e.predict(S, i, *symbol) | |
} else { | |
log.Fatalf("illegal rule") | |
} | |
} | |
} | |
return S | |
} | |
func (e *EarleyParser) predict(S []EarleySet, i int, symbol Symbol) []EarleySet { | |
for ruleIndex, rule := range e.grammar.rules { | |
if rule.name == symbol.text { | |
S[i] = S[i].append(EarleyItem{rule: ruleIndex, next: 0, start: i}) | |
} | |
} | |
return S | |
} | |
func (e *EarleyParser) scan(S []EarleySet, i, j int, symbol Symbol) []EarleySet { | |
item := S[i][j] | |
if i < len(e.input) && symbol.equal(e.input[i]) { | |
if len(S) <= i+1 { | |
S = append(S, make(EarleySet, 0)) | |
} | |
S[i+1] = S[i+1].unsafeAppend(EarleyItem{rule: item.rule, next: item.next + 1, start: item.start}) | |
} | |
return S | |
} | |
func (e *EarleyParser) complete(S []EarleySet, i, j int) []EarleySet { | |
item := S[i][j] | |
for _, oldItem := range S[item.start] { | |
symbol, exists := e.nextSymbol(oldItem) | |
if exists && symbol.text == e.name(item) { | |
S[i] = S[i].append(EarleyItem{rule: oldItem.rule, next: oldItem.next + 1, start: oldItem.start}) | |
} | |
} | |
return S | |
} | |
// Next element in the rule of this item | |
func (e *EarleyParser) nextSymbol(item EarleyItem) (*Symbol, bool) { | |
symbols := e.grammar.rules[item.rule].symbols | |
next := item.next < len(symbols) | |
if next { | |
return &symbols[item.next], next | |
} | |
return nil, false | |
} | |
// Gets the name of the rule pointed by the item | |
func (e *EarleyParser) name(item EarleyItem) string { | |
return e.grammar.rules[item.rule].name | |
} | |
func (e *EarleyParser) hasPartialParse(S []EarleySet, i int) bool { | |
if len(S) <= i { | |
return false | |
} | |
for _, item := range S[i] { | |
rule := e.grammar.rules[item.rule] | |
if rule.name == e.grammar.startRule && item.next == len(rule.symbols) && item.start == 0 { | |
return true | |
} | |
} | |
return false | |
} | |
func (e *EarleyParser) hasCompleteParse(S []EarleySet) bool { | |
return e.hasPartialParse(S, len(e.input)) | |
} | |
func (e *EarleyParser) lastPartialParse(S []EarleySet) int { | |
for i := len(S) - 1; i >= 0; i-- { | |
if e.hasPartialParse(S, i) { | |
return i | |
} | |
} | |
return -1 | |
} | |
// Checks if there is a item in the last iteration which is | |
// * complete | |
// * has started at the beginning (state set 0) | |
// * has the same name as the start rule | |
func (e *EarleyParser) diagnose(S []EarleySet) { | |
if e.hasCompleteParse(S) { | |
log.Printf("the input has been reconised. congratulations!") | |
} else { | |
if len(S) == len(e.input) { | |
log.Printf("the input is maybe incomplete") | |
} else { | |
log.Printf("the input has stopped making sense at character %v", len(S)) | |
} | |
lpp := e.lastPartialParse(S) | |
if lpp >= 0 { | |
log.Printf("the first %v characters of the input have been recongnised", lpp) | |
} else { | |
log.Printf("the begining of the input couldn't be parsed") | |
} | |
} | |
} | |
// Print the sets of Earley states | |
func (e *EarleyParser) print(S []EarleySet) { | |
for i, set := range S { | |
log.Printf("=== %v ===", i) | |
for _, item := range set { | |
rule := e.grammar.rules[item.rule] | |
ruleStr := "" | |
for j, symbol := range rule.symbols { | |
if j == item.next { | |
ruleStr = ruleStr + "• " | |
} | |
if !symbol.terminal { | |
ruleStr = ruleStr + symbol.text + " " | |
} else { | |
ruleStr = ruleStr + "'" + symbol.text + "' " | |
} | |
} | |
if item.next == len(rule.symbols) { | |
ruleStr = ruleStr + "•" | |
} | |
log.Printf("%12v -> %-30v (%v)", rule.name, ruleStr, item.start) | |
} | |
log.Println() | |
} | |
} | |
func ParseGrammar(lines []string) *Grammar { | |
for i, line := range lines { | |
if line == "" { | |
lines = lines[:i] | |
break | |
} | |
} | |
rules := make([]Rule, 0, 2*len(lines)) | |
for _, line := range lines { | |
splitNameRules := strings.SplitN(line, ":", 2) | |
name := splitNameRules[0] | |
splitRules := strings.Split(splitNameRules[1], "|") | |
for _, ruleString := range splitRules { | |
fields := strings.Fields(ruleString) | |
symbols := make([]Symbol, 0, len(fields)) | |
for _, field := range fields { | |
if strings.HasPrefix(field, "\"") { | |
symbols = append(symbols, Symbol{text: strings.Trim(field, "\""), terminal: true}) | |
} else { | |
symbols = append(symbols, Symbol{text: field, terminal: false}) | |
} | |
} | |
rules = append(rules, Rule{name: name, symbols: symbols}) | |
} | |
} | |
return &Grammar{ | |
rules: rules, | |
startRule: "0", | |
} | |
} | |
func EveryRuneAsTerminalSymbol(s string) []Symbol { | |
runes := []rune(s) | |
symbols := make([]Symbol, len(s)) | |
for i, r := range runes { | |
symbols[i] = Symbol{ | |
text: string([]rune{r}), | |
terminal: true, | |
} | |
} | |
return symbols | |
} | |
func FilterMessages(lines []string) []string { | |
for i, line := range lines { | |
if line == "" { | |
return lines[i+1:] | |
} | |
} | |
return lines | |
} | |
// As of part 2 it's required that some rules are updated | |
func UpdateRules(lines []string) []string { | |
cpy := make([]string, len(lines)) | |
for i, line := range lines { | |
if strings.HasPrefix(line, "8:") { | |
cpy[i] = "8: 42 | 42 8" | |
} else if strings.HasPrefix(line, "11:") { | |
cpy[i] = "11: 42 31 | 42 11 31" | |
} else { | |
cpy[i] = line | |
} | |
} | |
return cpy | |
} | |
func CountAccepted(messages []string, grammar *Grammar) int { | |
sum := 0 | |
for _, line := range messages { | |
parser := EarleyParser{ | |
grammar: grammar, | |
input: EveryRuneAsTerminalSymbol(line), | |
} | |
if parser.Recognizes() { | |
sum++ | |
} | |
} | |
return sum | |
} | |
func main() { | |
lines, err := common.ReadInputLines(inputFile) | |
common.AbortIfInputErr(err, inputFile) | |
grammar := ParseGrammar(lines) | |
messages := FilterMessages(lines) | |
accepted := CountAccepted(messages, grammar) | |
log.Printf("part 1: %v strings were accpeted", accepted) | |
newGrammar := ParseGrammar(UpdateRules(lines)) | |
newAccepted := CountAccepted(messages, newGrammar) | |
log.Printf("part 2: %v strings were accepted", newAccepted) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"strconv" | |
"strings" | |
"testing" | |
) | |
func TS(s string) Symbol { | |
return Symbol{ | |
text: s, | |
terminal: true, | |
} | |
} | |
func NTS(s string) Symbol { | |
return Symbol{ | |
text: s, | |
terminal: false, | |
} | |
} | |
func GrammarOfTutorial() *Grammar { | |
rules := make([]Rule, 0) | |
sum := "Sum" | |
product := "Product" | |
factor := "Factor" | |
number := "Number" | |
plusMinus := "PlusMinus" | |
mulDiv := "MulDiv" | |
digit := "Digit" | |
rules = append(rules, Rule{name: sum, symbols: []Symbol{NTS(sum), NTS(plusMinus), NTS(product)}}) | |
rules = append(rules, Rule{name: sum, symbols: []Symbol{NTS(product)}}) | |
rules = append(rules, Rule{name: product, symbols: []Symbol{NTS(product), NTS(mulDiv), NTS(factor)}}) | |
rules = append(rules, Rule{name: product, symbols: []Symbol{NTS(factor)}}) | |
rules = append(rules, Rule{name: factor, symbols: []Symbol{TS("("), NTS(sum), TS(")")}}) | |
rules = append(rules, Rule{name: factor, symbols: []Symbol{NTS(number)}}) | |
rules = append(rules, Rule{name: number, symbols: []Symbol{NTS(digit), NTS(number)}}) | |
rules = append(rules, Rule{name: number, symbols: []Symbol{NTS(digit)}}) | |
rules = append(rules, Rule{name: plusMinus, symbols: []Symbol{TS("+")}}) | |
rules = append(rules, Rule{name: plusMinus, symbols: []Symbol{TS("-")}}) | |
rules = append(rules, Rule{name: mulDiv, symbols: []Symbol{TS("*")}}) | |
rules = append(rules, Rule{name: mulDiv, symbols: []Symbol{TS("/")}}) | |
for i := 0; i < 10; i++ { | |
rules = append(rules, Rule{name: digit, symbols: []Symbol{TS(strconv.Itoa(i))}}) | |
} | |
return &Grammar{ | |
rules: rules, | |
startRule: sum, | |
} | |
} | |
func TestEarleyParser_Recognizes(t *testing.T) { | |
type fields struct { | |
grammar *Grammar | |
input []Symbol | |
} | |
tests := []struct { | |
name string | |
fields fields | |
want bool | |
}{ | |
{name: "Example 1", fields: fields{ | |
grammar: GrammarOfTutorial(), | |
input: EveryRuneAsTerminalSymbol("1+(2*3+4)"), | |
}, want: true}, | |
{name: "Example 2", fields: fields{ | |
grammar: GrammarOfTutorial(), | |
input: EveryRuneAsTerminalSymbol("1+(2*3+"), | |
}, want: false}, | |
{name: "Example 3", fields: fields{ | |
grammar: GrammarOfTutorial(), | |
input: EveryRuneAsTerminalSymbol("2*3*4("), | |
}, want: false}, | |
} | |
for _, tt := range tests { | |
t.Run(tt.name, func(t *testing.T) { | |
e := &EarleyParser{ | |
grammar: tt.fields.grammar, | |
input: tt.fields.input, | |
} | |
if got := e.Recognizes(); got != tt.want { | |
t.Errorf("Recognizes() = %v, want %v", got, tt.want) | |
} | |
}) | |
} | |
} | |
func TestCountAccepted(t *testing.T) { | |
type args struct { | |
lines []string | |
grammar *Grammar | |
} | |
tests := []struct { | |
name string | |
args args | |
want int | |
}{ | |
{name: "Example Part 1", args: args{ | |
lines: FilterMessages(strings.Split(examplePart1, "\n")), | |
grammar: ParseGrammar(strings.Split(examplePart1, "\n")), | |
}, want: 2}, | |
{name: "Example Part 2", args: args{ | |
lines: FilterMessages(strings.Split(examplePart2, "\n")), | |
grammar: ParseGrammar(UpdateRules(strings.Split(examplePart2, "\n"))), | |
}, want: 12}, | |
} | |
for _, tt := range tests { | |
t.Run(tt.name, func(t *testing.T) { | |
if got := CountAccepted(tt.args.lines, tt.args.grammar); got != tt.want { | |
t.Errorf("CountAccepted() = %v, want %v", got, tt.want) | |
} | |
}) | |
} | |
} | |
const examplePart1 = `0: 4 1 5 | |
1: 2 3 | 3 2 | |
2: 4 4 | 5 5 | |
3: 4 5 | 5 4 | |
4: "a" | |
5: "b" | |
ababbb | |
bababa | |
abbbab | |
aaabbb | |
aaaabbb` | |
const examplePart2 = `42: 9 14 | 10 1 | |
9: 14 27 | 1 26 | |
10: 23 14 | 28 1 | |
1: "a" | |
11: 42 31 | |
5: 1 14 | 15 1 | |
19: 14 1 | 14 14 | |
12: 24 14 | 19 1 | |
16: 15 1 | 14 14 | |
31: 14 17 | 1 13 | |
6: 14 14 | 1 14 | |
2: 1 24 | 14 4 | |
0: 8 11 | |
13: 14 3 | 1 12 | |
15: 1 | 14 | |
17: 14 2 | 1 7 | |
23: 25 1 | 22 14 | |
28: 16 1 | |
4: 1 1 | |
20: 14 14 | 1 15 | |
3: 5 14 | 16 1 | |
27: 1 6 | 14 18 | |
14: "b" | |
21: 14 1 | 1 14 | |
25: 1 1 | 1 14 | |
22: 14 14 | |
8: 42 | |
26: 14 22 | 1 20 | |
18: 15 15 | |
7: 14 5 | 1 21 | |
24: 14 1 | |
abbbbbabbbaaaababbaabbbbabababbbabbbbbbabaaaa | |
bbabbbbaabaabba | |
babbbbaabbbbbabbbbbbaabaaabaaa | |
aaabbbbbbaaaabaababaabababbabaaabbababababaaa | |
bbbbbbbaaaabbbbaaabbabaaa | |
bbbababbbbaaaaaaaabbababaaababaabab | |
ababaaaaaabaaab | |
ababaaaaabbbaba | |
baabbaaaabbaaaababbaababb | |
abbbbabbbbaaaababbbbbbaaaababb | |
aaaaabbaabaaaaababaa | |
aaaabbaaaabbaaa | |
aaaabbaabbaaaaaaabbbabbbaaabbaabaaa | |
babaaabbbaaabaababbaabababaaab | |
aabbbbbaabbbaaaaaabbbbbababaaaaabbaaabba` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment