Created
November 10, 2024 22:40
-
-
Save empijei/b99546270bddd8c73b64f13c3d44bcc9 to your computer and use it in GitHub Desktop.
Accepting inputs talk 2024 code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"errors" | |
"io" | |
"iter" | |
"strings" | |
"unicode/utf8" | |
) | |
func main() {} | |
var states = []rune{'A', 'B', 'C'} | |
func CheckABCImperative(input string) bool { | |
var state, wantCount, curCount int | |
for _, r := range input { | |
switch { | |
case r == states[0]: | |
wantCount++ | |
case r == states[state]: | |
curCount++ | |
default: | |
if (state != 0 && curCount != wantCount) || | |
state+1 >= len(states) || | |
r != states[state+1] { | |
return false | |
} | |
state++ | |
curCount = 1 | |
} | |
} | |
return len(input) == wantCount*len(states) | |
} | |
func CheckABCTokens(input string) bool { | |
if len(input) == 0 { | |
return true | |
} | |
type token struct { | |
r rune | |
l int | |
} | |
tokenize := func(input string) (result []token) { | |
var t token | |
for i, r := range input { | |
if i == 0 { | |
t = token{r, 1} | |
continue | |
} | |
if r != t.r { | |
result = append(result, t) | |
t = token{r, 1} | |
} else { | |
t.l++ | |
} | |
} | |
result = append(result, t) | |
return result | |
} | |
tokens := tokenize(input) | |
if len(tokens) != len(states) { | |
return false | |
} | |
for i, t := range tokens { | |
if t.l != tokens[0].l || | |
t.r != states[i] { | |
return false | |
} | |
} | |
return true | |
} | |
func CheckABCWithScanner(input string) bool { | |
s := bufio.NewScanner(strings.NewReader(input)) | |
s.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { | |
var pr rune | |
for i, r := range string(data) { | |
if pr == 0 { | |
pr = r | |
} | |
if r != pr { | |
return i, data[:i], nil | |
} | |
} | |
if atEOF && len(data) != 0 { | |
return len(data), data, bufio.ErrFinalToken | |
} | |
return 0, nil, nil | |
}) | |
var wantLen, c int | |
for ; s.Scan(); c++ { | |
tok := s.Text() | |
if c == 0 { | |
wantLen = len(tok) | |
continue | |
} | |
r, _ := utf8.DecodeRune([]byte(s.Bytes())) | |
if len(tok) != wantLen || | |
c >= len(states) || | |
r != states[c] { | |
return false | |
} | |
} | |
return c == 0 || c == len(states) | |
} | |
type cursor struct { | |
in string | |
count, pos int | |
} | |
type stateFn func(*cursor) (next stateFn, ok bool) | |
func stateA(c *cursor) (next stateFn, ok bool) { | |
for i, r := range c.in { | |
c.pos = i | |
if r == 'A' { | |
c.count++ | |
continue | |
} | |
if r != 'B' { | |
return nil, false | |
} | |
return stateB, true | |
} | |
return nil, false | |
} | |
func stateB(c *cursor) (next stateFn, ok bool) { | |
base := c.pos | |
gotCount := 0 | |
for i, r := range c.in[base:] { | |
c.pos = base + i | |
if r == 'B' { | |
gotCount++ | |
continue | |
} | |
if gotCount != c.count || | |
r != 'C' { | |
return nil, false | |
} | |
return stateC, true | |
} | |
return nil, false | |
} | |
func stateC(c *cursor) (next stateFn, ok bool) { | |
gotCount := 0 | |
for _, r := range c.in[c.pos:] { | |
if r == 'C' { | |
gotCount++ | |
continue | |
} | |
return nil, false | |
} | |
if gotCount == c.count { | |
return nil, true | |
} | |
return nil, false | |
} | |
func CheckABCWithStates(input string) bool { | |
if len(input) == 0 { | |
return true | |
} | |
c := cursor{in: input} | |
state := stateA | |
var ok bool | |
for state != nil { | |
state, ok = state(&c) | |
if !ok { | |
return false | |
} | |
} | |
return true | |
} | |
func CheckABCWithStatesStreamed(input string) bool { | |
if len(input) == 0 { | |
return true | |
} | |
in := bufio.NewReader(strings.NewReader(input)) | |
type stateFn func() (next stateFn, ok bool) | |
var ( | |
count = 0 | |
a, b, c stateFn | |
) | |
a = func() (next stateFn, ok bool) { | |
for { | |
r, _, err := in.ReadRune() | |
if err != nil { | |
return nil, false | |
} | |
if r == states[0] { | |
count++ | |
continue | |
} | |
if r != states[1] { | |
return nil, false | |
} | |
in.UnreadRune() | |
return b, true | |
} | |
} | |
b = func() (next stateFn, ok bool) { | |
gotCount := 0 | |
for { | |
r, _, err := in.ReadRune() | |
if err != nil { | |
return nil, false | |
} | |
if r == states[1] { | |
gotCount++ | |
continue | |
} | |
if gotCount != count || | |
r != states[2] { | |
return nil, false | |
} | |
in.UnreadRune() | |
return c, true | |
} | |
} | |
c = func() (next stateFn, ok bool) { | |
gotCount := 0 | |
for { | |
r, _, err := in.ReadRune() | |
if err != nil { | |
if errors.Is(err, io.EOF) { | |
break | |
} | |
return nil, false | |
} | |
if r == states[2] { | |
gotCount++ | |
if gotCount > count { | |
return nil, false | |
} | |
continue | |
} | |
return nil, false | |
} | |
if gotCount != count { | |
return nil, false | |
} | |
return nil, true | |
} | |
state := a | |
ok := true | |
for state != nil { | |
state, ok = state() | |
if !ok { | |
return false | |
} | |
} | |
return true | |
} | |
type Message struct { | |
Action string | |
Content string | |
} | |
var errInvalidMsg = errors.New("invalid message") | |
func ParseSSE(input io.Reader) iter.Seq2[*Message, error] { | |
return func(yield func(*Message, error) bool) { | |
// Take the individual double-return separated lines | |
s := bufio.NewScanner(input) | |
s.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { | |
if atEOF && len(data) == 0 { | |
return 0, nil, nil | |
} | |
if i := bytes.Index(data, []byte("\n\n")); i >= 0 { | |
return i + 2, data[0:i], nil | |
} | |
if atEOF { | |
return len(data), data, nil | |
} | |
return 0, nil, nil | |
}) | |
// Parse each message | |
parse := func(data []byte) (*Message, error) { | |
p := bytes.IndexByte(data, ':') | |
if p < 0 { | |
return nil, errInvalidMsg | |
} | |
return &Message{Action: string(data[0:p]), | |
Content: string(data[p+1:])}, nil | |
} | |
// Iterate | |
for s.Scan() { | |
m, err := parse(s.Bytes()) | |
if !yield(m, err) { | |
return | |
} | |
} | |
if s.Err() != nil { | |
_ = yield(nil, s.Err()) | |
} else { | |
_ = yield(nil, io.EOF) | |
} | |
} | |
} | |
func ParseSSEIgnoreComments(input io.Reader, ignoreComments bool) iter.Seq2[*Message, error] { | |
return func(yield func(*Message, error) bool) { | |
// Take the individual double-return separated lines | |
s := bufio.NewScanner(input) | |
s.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { | |
if atEOF && len(data) == 0 { | |
return 0, nil, nil | |
} | |
if i := bytes.Index(data, []byte("\n\n")); i >= 0 { | |
return i + 2, data[0:i], nil | |
} | |
if atEOF { | |
return len(data), data, nil | |
} | |
return 0, nil, nil | |
}) | |
// Parse each message | |
parse := func(data []byte) (*Message, error) { | |
p := bytes.IndexByte(data, ':') | |
if p < 0 { | |
return nil, errInvalidMsg | |
} | |
return &Message{Action: string(data[0:p]), | |
Content: string(data[p+1:])}, nil | |
} | |
// Iterate | |
for s.Scan() { | |
m, err := parse(s.Bytes()) | |
if err == nil && m.Action == "" && ignoreComments { | |
continue | |
} | |
if !yield(m, err) { | |
return | |
} | |
} | |
if s.Err() != nil { | |
_ = yield(nil, s.Err()) | |
} else { | |
_ = yield(nil, io.EOF) | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"errors" | |
"fmt" | |
"io" | |
"strings" | |
"testing" | |
) | |
var benchstrings = func() []string { | |
qtt := []int{10, 100, 1000, 10000} | |
var res []string | |
for _, q := range qtt { | |
var sb strings.Builder | |
for _, s := range states { | |
for range q { | |
sb.WriteRune(s) | |
} | |
} | |
res = append(res, sb.String()) | |
} | |
return res | |
}() | |
var ABCInputs = []struct { | |
in string | |
want bool | |
}{ | |
{"AAABBBCCC", true}, | |
{"AABBCC", true}, | |
{"ABC", true}, | |
{"", true}, | |
{"AABBCCDD", false}, | |
{"AABCC", false}, | |
{"BCC", false}, | |
{"ABBCAC", false}, | |
{"AABB", false}, | |
{"AABBC", false}, | |
{"AACCBB", false}, | |
{"ABACBC", false}, | |
{"A", false}, | |
} | |
var ABCFuncs = []struct { | |
n string | |
f func(string) bool | |
}{ | |
{"CheckABCImperative", CheckABCImperative}, | |
{"CheckABCTokens", CheckABCTokens}, | |
{"CheckABCWithScanner", CheckABCWithScanner}, | |
{"CheckABCWithStates", CheckABCWithStates}, | |
{"CheckABCWithStatesStreamed", CheckABCWithStatesStreamed}, | |
} | |
func TestABC(t *testing.T) { | |
for _, f := range ABCFuncs { | |
t.Run(f.n, func(t *testing.T) { | |
for _, tt := range ABCInputs { | |
if got := f.f(tt.in); got != tt.want { | |
t.Errorf("%s(%q): got %v want %v", f.n, tt.in, got, tt.want) | |
} | |
} | |
}) | |
} | |
} | |
func BenchmarkABC(b *testing.B) { | |
for _, tt := range benchstrings { | |
for _, f := range ABCFuncs { | |
b.Run(fmt.Sprintf("%s(len=%v)", f.n, len(tt)), func(b *testing.B) { | |
for range b.N { | |
if !f.f(tt) { | |
b.Error("got false, want true") | |
} | |
} | |
}) | |
} | |
} | |
} | |
/* | |
cpu: AMD Ryzen 5 2600X Six-Core Processor | |
BenchmarkABC | |
BenchmarkABC/CheckABCImperative(len=30)-12 25934058 42.07 ns/op 0 B/op 0 allocs/op | |
BenchmarkABC/CheckABCTokens(len=30)-12 5917611 201.3 ns/op 112 B/op 3 allocs/op | |
BenchmarkABC/CheckABCWithScanner(len=30)-12 1000000 1039 ns/op 4128 B/op 2 allocs/op | |
BenchmarkABC/CheckABCWithStates(len=30)-12 6392533 191.1 ns/op 120 B/op 5 allocs/op | |
BenchmarkABC/CheckABCWithStatesStreamed(len=30)-12 924762 1356 ns/op 4296 B/op 7 allocs/op | |
BenchmarkABC/CheckABCImperative(len=300)-12 3143652 377.4 ns/op 0 B/op 0 allocs/op | |
BenchmarkABC/CheckABCTokens(len=300)-12 2223444 540.9 ns/op 112 B/op 3 allocs/op | |
BenchmarkABC/CheckABCWithScanner(len=300)-12 618186 1707 ns/op 5216 B/op 9 allocs/op | |
BenchmarkABC/CheckABCWithStates(len=300)-12 2199685 538.2 ns/op 120 B/op 5 allocs/op | |
BenchmarkABC/CheckABCWithStatesStreamed(len=300)-12 568323 2266 ns/op 4296 B/op 7 allocs/op | |
BenchmarkABC/CheckABCImperative(len=3000)-12 328704 3628 ns/op 0 B/op 0 allocs/op | |
BenchmarkABC/CheckABCTokens(len=3000)-12 304002 3919 ns/op 112 B/op 3 allocs/op | |
BenchmarkABC/CheckABCWithScanner(len=3000)-12 233472 5228 ns/op 14368 B/op 9 allocs/op | |
BenchmarkABC/CheckABCWithStates(len=3000)-12 300710 4033 ns/op 120 B/op 5 allocs/op | |
BenchmarkABC/CheckABCWithStatesStreamed(len=3000)-12 93996 12652 ns/op 4296 B/op 7 allocs/op | |
BenchmarkABC/CheckABCImperative(len=30000)-12 34312 34881 ns/op 0 B/op 0 allocs/op | |
BenchmarkABC/CheckABCTokens(len=30000)-12 32022 37365 ns/op 112 B/op 3 allocs/op | |
BenchmarkABC/CheckABCWithScanner(len=30000)-12 20097 60156 ns/op 138016 B/op 15 allocs/op | |
BenchmarkABC/CheckABCWithStates(len=30000)-12 31648 38353 ns/op 120 B/op 5 allocs/op | |
BenchmarkABC/CheckABCWithStatesStreamed(len=30000)-12 9690 126543 ns/op 4296 B/op 7 allocs/op | |
*/ | |
// All | |
func FuzzDifferential(f *testing.F) { | |
for _, i := range ABCInputs { | |
f.Add(i.in) | |
} | |
f.Fuzz(func(t *testing.T, input string) { | |
var ( | |
gott = CheckABCTokens(input) | |
goti = CheckABCImperative(input) | |
gots = CheckABCWithScanner(input) | |
gotm = CheckABCWithStates(input) | |
) | |
if gott != goti || goti != gots || gots != gotm { | |
t.Errorf("Check(%q): tokens: %v imperative: %v scanner %v states %v", input, gott, goti, gots, gotm) | |
} | |
if gott && len(input)%3 != 0 { | |
t.Errorf("Check(%q): got true want false", input) | |
} | |
}) | |
} | |
func TestParseSSE(t *testing.T) { | |
in := `: initial comment | |
event: event1 | |
data: data1 | |
event: event2 | |
data: data2 | |
:keep alive | |
this is invalid | |
data:data3 | |
last:non terminated` | |
want := []struct { | |
msg *Message | |
err error | |
}{ | |
{&Message{"", " initial comment"}, nil}, | |
{&Message{"event", " event1"}, nil}, | |
{&Message{"data", " data1"}, nil}, | |
{&Message{"event", " event2"}, nil}, | |
{&Message{"data", " data2"}, nil}, | |
{&Message{"", "keep alive"}, nil}, | |
{nil, errInvalidMsg}, | |
{&Message{"data", "data3"}, nil}, | |
{&Message{"last", "non terminated"}, nil}, | |
{nil, io.EOF}, | |
} | |
eq := func(a, b *Message) bool { | |
if a == nil && b == nil { | |
return true | |
} | |
if a == nil || b == nil { | |
return false | |
} | |
return a.Action == b.Action && a.Content == b.Content | |
} | |
i := 0 | |
for gotMsg, gotErr := range ParseSSE(strings.NewReader(in)) { | |
if len(want) <= i { | |
t.Fatalf("Too many messages") | |
} | |
want := want[i] | |
if !eq(gotMsg, want.msg) { | |
t.Errorf("iteration %v: got msg %#v want %#v", i, gotMsg, want.msg) | |
} | |
if !errors.Is(gotErr, want.err) { | |
t.Errorf("iteration %v: got err %v want %v", i, gotErr, want.err) | |
} | |
i++ | |
} | |
} | |
func TestParseSSEIgnoreComments(t *testing.T) { | |
in := `: initial comment | |
event: event1 | |
data: data1 | |
event: event2 | |
data: data2 | |
this is invalid | |
:keep alive | |
data:data3 | |
last:non terminated` | |
want := []struct { | |
msg *Message | |
err error | |
}{ | |
{&Message{"event", " event1"}, nil}, | |
{&Message{"data", " data1"}, nil}, | |
{&Message{"event", " event2"}, nil}, | |
{&Message{"data", " data2"}, nil}, | |
{nil, errInvalidMsg}, | |
{&Message{"data", "data3"}, nil}, | |
{&Message{"last", "non terminated"}, nil}, | |
{nil, io.EOF}, | |
} | |
eq := func(a, b *Message) bool { | |
if a == nil && b == nil { | |
return true | |
} | |
if a == nil || b == nil { | |
return false | |
} | |
return a.Action == b.Action && a.Content == b.Content | |
} | |
i := 0 | |
for gotMsg, gotErr := range ParseSSEIgnoreComments(strings.NewReader(in), true) { | |
if len(want) <= i { | |
t.Fatalf("Too many messages") | |
} | |
want := want[i] | |
if !eq(gotMsg, want.msg) { | |
t.Errorf("iteration %v: got msg %#v want %#v", i, gotMsg, want.msg) | |
} | |
if !errors.Is(gotErr, want.err) { | |
t.Errorf("iteration %v: got err %v want %v", i, gotErr, want.err) | |
} | |
i++ | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment