Last active
August 29, 2015 14:05
-
-
Save rummelonp/b3d9c496169baacc859d to your computer and use it in GitHub Desktop.
Chinese numeral parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/.bundle/ | |
/.yardoc | |
/Gemfile.lock | |
/_yardoc/ | |
/coverage/ | |
/doc/ | |
/pkg/ | |
/spec/reports/ | |
/tmp/ | |
*.bundle | |
*.so | |
*.o | |
*.a | |
mkmf.log |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--color |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gemspec |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
Gem::Specification.new do |spec| | |
spec.name = 'kansuji' | |
spec.version = '0.0.1' | |
spec.authors = ['Kazuya Takeshima'] | |
spec.email = ['[email protected]'] | |
spec.summary = %q{Chinese numeral parser} | |
spec.homepage = 'https://gist.github.com/mitukiii/b3d9c496169baacc859d' | |
spec.license = 'MIT' | |
spec.files = `git ls-files`.split($/) | |
spec.executables = [] | |
spec.test_files = ['kansuji_spec.rb'] | |
spec.require_paths = ['.'] | |
spec.add_runtime_dependency 'parslet' | |
spec.add_development_dependency 'bundler', '~> 1.7' | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
require 'parslet' | |
module Kansuji | |
NUMBERS = %w{一 二 三 四 五 六 七 八 九} | |
DIGITS = %w{十 百 千} | |
UNITS = %w{万 億 兆 京 垓 𥝱 穣 溝 澗 正 載 極 恒河沙 阿僧祇 那由他 不可思議 無量大数} | |
def self.parser | |
@parser ||= Parser.new | |
end | |
def self.transform | |
@transform ||= Transform.new | |
end | |
def self.parse(text) | |
transform.apply( | |
parser.parse( | |
text | |
) | |
) | |
end | |
class Parser < Parslet::Parser | |
root(:expressions) | |
rule(:expressions) { | |
( | |
expression >> unit.maybe >> expressions.maybe | | |
expression | |
).as(:expressions) | |
} | |
rule(:expression) { | |
( | |
number.maybe >> digit >> expression.maybe | | |
number | |
).as(:expression) | |
} | |
rule(:number) { | |
alternative_str(NUMBERS).as(:number) | |
} | |
rule(:digit) { | |
alternative_str(DIGITS).as(:digit) | |
} | |
rule(:unit) { | |
alternative_str(UNITS).as(:unit) | |
} | |
def alternative_str(values) | |
values.map { |value| | |
str(value) | |
}.reduce { |alternatives, parslet| | |
alternatives | parslet | |
} | |
end | |
end | |
class Transform < Parslet::Transform | |
rule(expressions: simple(:expressions)) { | |
expressions | |
} | |
rule(expression: simple(:expression)) { | |
expression | |
} | |
rule(number: simple(:number)) { | |
Number.new(number).to_i | |
} | |
rule(digit: simple(:digit)) { | |
Digit.new(digit).to_i | |
} | |
rule(number: simple(:number), digit: simple(:digit)) { | |
Number.new(number).to_i * Digit.new(digit).to_i | |
} | |
rule(digit: simple(:digit), expression: simple(:expression)) { | |
Digit.new(digit).to_i + expression | |
} | |
rule(number: simple(:number), digit: simple(:digit), expression: simple(:expression)) { | |
Number.new(number).to_i * Digit.new(digit).to_i + expression | |
} | |
rule(expression: simple(:expression), unit: simple(:unit)) { | |
expression * Unit.new(unit).to_i | |
} | |
rule(expression: simple(:expression), unit: simple(:unit), expressions: simple(:expressions)) { | |
expression * Unit.new(unit).to_i + expressions | |
} | |
end | |
class Number < Struct.new(:number) | |
def to_i | |
NUMBERS.index(number) + 1 | |
end | |
end | |
class Digit < Struct.new(:digit) | |
def to_i | |
10 ** (DIGITS.index(digit) + 1) | |
end | |
end | |
class Unit < Struct.new(:unit) | |
def to_i | |
10_000 ** (UNITS.index(unit) + 1) | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
lib = File.expand_path('../', __FILE__) | |
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) | |
require 'kansuji' | |
RSpec.describe Kansuji do | |
describe '.parse' do | |
{ | |
'一' => 1, | |
'十' => 10, | |
'十一' => 11, | |
'二十' => 20, | |
'二十一' => 21, | |
'百一' => 101, | |
'百十' => 110, | |
'百十一' => 111, | |
'百二十' => 120, | |
'百二十一' => 121, | |
'二百' => 200, | |
'千一' => 1_001, | |
'千十' => 1_010, | |
'千十一' => 1_011, | |
'千二十' => 1_020, | |
'千二十一' => 1_021, | |
'千百一' => 1_101, | |
'千百十' => 1_110, | |
'千百十一' => 1_111, | |
'千百二十' => 1_120, | |
'千百二十一' => 1_121, | |
'二千' => 2_000, | |
'一万一' => 10_001, | |
'一万十' => 10_010, | |
'一万十一' => 10_011, | |
'一万二十' => 10_020, | |
'一万二十一' => 10_021, | |
'一万百一' => 10_101, | |
'一万百十' => 10_110, | |
'一万百十一' => 10_111, | |
'一万百二十' => 10_120, | |
'一万百二十一' => 10_121, | |
'一万二百' => 10_200, | |
'一万千一' => 11_001, | |
'一万千十' => 11_010, | |
'一万千十一' => 11_011, | |
'一万千二十' => 11_020, | |
'一万千二十一' => 11_021, | |
'一万千百一' => 11_101, | |
'一万千百十' => 11_110, | |
'一万千百十一' => 11_111, | |
'一万千百二十' => 11_120, | |
'一万千百二十一' => 11_121, | |
'一万二千' => 12_000, | |
'十万' => 100_000, | |
'二十万' => 200_000, | |
'二十一万' => 210_000, | |
'一億' => 100_000_000, | |
'十億' => 1_000_000_000, | |
'十一億' => 1_100_000_000, | |
'二十億' => 2_000_000_000, | |
'二十一億' => 2_100_000_000, | |
'一兆' => 10 ** 12, | |
'一京' => 10 ** 16, | |
'一垓' => 10 ** 20, | |
'一𥝱' => 10 ** 24, | |
'一穣' => 10 ** 28, | |
'一溝' => 10 ** 32, | |
'一澗' => 10 ** 36, | |
'一正' => 10 ** 40, | |
'一載' => 10 ** 44, | |
'一極' => 10 ** 48, | |
'一恒河沙' => 10 ** 52, | |
'一阿僧祇' => 10 ** 56, | |
'一那由他' => 10 ** 60, | |
'一不可思議' => 10 ** 64, | |
'一無量大数' => 10 ** 68, | |
}.each do |(text, number)| | |
it ".parse(#{text.inspect}) = #{number.inspect}" do | |
expect(Kansuji.parse(text)).to eql(number) | |
end | |
end | |
context 'parse failed' do | |
%w(万 一万億).each do |text| | |
it ".parse(#{text.inspect}) should raise error" do | |
expect { Kansuji.parse(text) }.to raise_error | |
end | |
end | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ENV['gem_push'] = 'off' | |
ENV['SPEC'] = 'kansuji_spec.rb' | |
require 'bundler/gem_tasks' | |
require 'rspec/core/rake_task' | |
RSpec::Core::RakeTask.new | |
task default: :spec |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment