Last active
February 28, 2021 06:14
-
-
Save itarato/534df2a9ed888fda128e37d16dabadae to your computer and use it in GitHub Desktop.
Parser combinator trial
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'pp' | |
require "test/unit" | |
class String | |
def parse_response | |
ParseResponse.new(self) | |
end | |
def token | |
for_tokens(self) | |
end | |
end | |
raw = <<-JSON | |
{ | |
"foo" => "bar" | |
} | |
JSON | |
class Result | |
attr_reader :err_value, :ok_value | |
def initialize(ok_value: nil, err_value: nil) | |
if err_value != nil | |
@is_ok = false | |
@err_value = err_value | |
else | |
@is_ok = true | |
@ok_value = ok_value | |
end | |
end | |
def ok? | |
@is_ok | |
end | |
def err? | |
!@is_ok | |
end | |
def and_then | |
ok? ? (yield @ok_value) : self | |
end | |
class << self | |
def ok(value) | |
self.new(ok_value: value) | |
end | |
def err(value) | |
self.new(err_value: value) | |
end | |
end | |
end | |
class ParseResponse | |
attr_reader :rest, :args | |
def initialize(rest, *args) | |
@rest = rest | |
@args = args | |
end | |
def progress(size, *added_tokens) | |
@rest = @rest[size..-1] | |
@args = @args + added_tokens | |
self | |
end | |
def drop(n = 1) | |
@args.pop(n) | |
self | |
end | |
end | |
def for_tokens(token) | |
return lambda do |input| | |
return Result.err(:token_too_short) if input.rest.size < token.size | |
return Result.err([:token_mismatch, token, input]) if input.rest[0...token.size] != token | |
return Result.ok(input.progress(token.size, token)) | |
end | |
end | |
def for_sequence(min_length = 1, &block) | |
return lambda do |input| | |
seq = input.rest.chars.take_while(&block).join('') | |
return Result.err(:sequence_too_short) if seq.size < min_length | |
return Result.ok(input.progress(seq.size, seq)) | |
end | |
end | |
def attach(*parsers) | |
return lambda { |input| parsers.inject(Result.ok(input)) { |input, parser| input.and_then &parser } } | |
end | |
def drop(n = 1) | |
return lambda do |input| | |
(yield input).and_then {|input| Result.ok(input.drop(n)) } | |
end | |
end | |
def iterate(min = 1) | |
return lambda do |input| | |
last_ok = Result.ok(input) | |
iteration_done = 0 | |
current = last_ok | |
while current.ok? | |
current = yield current.ok_value | |
if current.ok? | |
iteration_done += 1 | |
last_ok = current | |
end | |
end | |
return Result.err(:iteration_too_short) if iteration_done < min | |
last_ok | |
end | |
end | |
class TestParser < Test::Unit::TestCase | |
def test_for_tokens | |
start_tag = for_tokens('<') | |
assert(start_tag.call('<'.parse_response).ok?) | |
assert(start_tag.call('<abc'.parse_response).ok?) | |
assert_equal(['<'], start_tag.call('<'.parse_response).ok_value.args) | |
assert_equal('', start_tag.call('<'.parse_response).ok_value.rest) | |
assert_equal('abc', start_tag.call('<abc'.parse_response).ok_value.rest) | |
end | |
def test_for_sequence | |
string_seq = for_sequence {|s| /[a-z]{1}/ =~ s } | |
assert(string_seq.call('ab123'.parse_response).ok?) | |
assert_equal('123', string_seq.call('ab123'.parse_response).ok_value.rest) | |
assert_equal(['ab'], string_seq.call('ab123'.parse_response).ok_value.args) | |
end | |
def test_attach | |
start_tag = for_tokens('<') | |
name = for_sequence {|s| /[a-z]{1}/ =~ s } | |
end_tag = for_tokens('/>') | |
full_tag = attach(start_tag, name, end_tag) | |
res_ok = full_tag.call('<hello/>inner'.parse_response) | |
assert(res_ok.ok?) | |
assert_equal('inner', res_ok.ok_value.rest) | |
assert_equal(['<', 'hello', '/>'], res_ok.ok_value.args) | |
assert(full_tag.call('a<hello>else'.parse_response).err?) | |
assert(full_tag.call('a<hello1>else'.parse_response).err?) | |
assert(full_tag.call('<hello>else'.parse_response).err?) | |
end | |
def test_iterate | |
foo_word = for_tokens('foo') | |
one_or_more = iterate &foo_word | |
res_ok = one_or_more.call('foofoofoofobar'.parse_response) | |
assert(res_ok.ok?) | |
assert_equal('fobar', res_ok.ok_value.rest) | |
assert_equal(['foo', 'foo', 'foo'], res_ok.ok_value.args) | |
end | |
def test_json | |
raw = <<-JSON | |
{ | |
"foo" => "bar", | |
"bar" => "zoo" | |
} | |
JSON | |
whitespace_seq = for_sequence {|c| /[\n\t\r ]{1}/ =~ c } | |
whitespace_one = drop &whitespace_seq | |
whitespace = iterate(0, &whitespace_one) | |
open_brace = '{'.token | |
close_brace = '}'.token | |
quote_seq = '"'.token | |
quote = drop "e_seq | |
ident = for_sequence {|c| /[a-z]{1}/ =~ c } | |
string = attach(quote, ident, quote) | |
key_assign = '=>'.token | |
comma = ','.token | |
one_or_more_comma = iterate(0, &comma) | |
key_value = attach(whitespace, string, whitespace, key_assign, whitespace, string, one_or_more_comma, whitespace) | |
key_values = iterate(0, &key_value) | |
json = attach( | |
whitespace, | |
open_brace, | |
whitespace, | |
key_values, | |
whitespace, | |
close_brace, | |
whitespace, | |
) | |
assert(json.call(raw.parse_response).ok?) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment