Skip to content

Instantly share code, notes, and snippets.

@rummelonp
Last active August 29, 2015 14:05
Show Gist options
  • Save rummelonp/b3d9c496169baacc859d to your computer and use it in GitHub Desktop.
Save rummelonp/b3d9c496169baacc859d to your computer and use it in GitHub Desktop.
Chinese numeral parser
/.bundle/
/.yardoc
/Gemfile.lock
/_yardoc/
/coverage/
/doc/
/pkg/
/spec/reports/
/tmp/
*.bundle
*.so
*.o
*.a
mkmf.log
source 'https://rubygems.org'
gemspec
# coding: utf-8
Gem::Specification.new do |spec|
spec.name = 'kansuji'
spec.version = '0.0.1'
spec.authors = ['Kazuya Takeshima']
spec.email = ['[email protected]']
spec.summary = %q{Chinese numeral parser}
spec.homepage = 'https://gist.github.com/mitukiii/b3d9c496169baacc859d'
spec.license = 'MIT'
spec.files = `git ls-files`.split($/)
spec.executables = []
spec.test_files = ['kansuji_spec.rb']
spec.require_paths = ['.']
spec.add_runtime_dependency 'parslet'
spec.add_development_dependency 'bundler', '~> 1.7'
end
# coding: utf-8
require 'parslet'
module Kansuji
NUMBERS = %w{一 二 三 四 五 六 七 八 九}
DIGITS = %w{十 百 千}
UNITS = %w{万 億 兆 京 垓 𥝱 穣 溝 澗 正 載 極 恒河沙 阿僧祇 那由他 不可思議 無量大数}
def self.parser
@parser ||= Parser.new
end
def self.transform
@transform ||= Transform.new
end
def self.parse(text)
transform.apply(
parser.parse(
text
)
)
end
class Parser < Parslet::Parser
root(:expressions)
rule(:expressions) {
(
expression >> unit.maybe >> expressions.maybe |
expression
).as(:expressions)
}
rule(:expression) {
(
number.maybe >> digit >> expression.maybe |
number
).as(:expression)
}
rule(:number) {
alternative_str(NUMBERS).as(:number)
}
rule(:digit) {
alternative_str(DIGITS).as(:digit)
}
rule(:unit) {
alternative_str(UNITS).as(:unit)
}
def alternative_str(values)
values.map { |value|
str(value)
}.reduce { |alternatives, parslet|
alternatives | parslet
}
end
end
class Transform < Parslet::Transform
rule(expressions: simple(:expressions)) {
expressions
}
rule(expression: simple(:expression)) {
expression
}
rule(number: simple(:number)) {
Number.new(number).to_i
}
rule(digit: simple(:digit)) {
Digit.new(digit).to_i
}
rule(number: simple(:number), digit: simple(:digit)) {
Number.new(number).to_i * Digit.new(digit).to_i
}
rule(digit: simple(:digit), expression: simple(:expression)) {
Digit.new(digit).to_i + expression
}
rule(number: simple(:number), digit: simple(:digit), expression: simple(:expression)) {
Number.new(number).to_i * Digit.new(digit).to_i + expression
}
rule(expression: simple(:expression), unit: simple(:unit)) {
expression * Unit.new(unit).to_i
}
rule(expression: simple(:expression), unit: simple(:unit), expressions: simple(:expressions)) {
expression * Unit.new(unit).to_i + expressions
}
end
class Number < Struct.new(:number)
def to_i
NUMBERS.index(number) + 1
end
end
class Digit < Struct.new(:digit)
def to_i
10 ** (DIGITS.index(digit) + 1)
end
end
class Unit < Struct.new(:unit)
def to_i
10_000 ** (UNITS.index(unit) + 1)
end
end
end
# coding: utf-8
lib = File.expand_path('../', __FILE__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require 'kansuji'
RSpec.describe Kansuji do
describe '.parse' do
{
'一' => 1,
'十' => 10,
'十一' => 11,
'二十' => 20,
'二十一' => 21,
'百一' => 101,
'百十' => 110,
'百十一' => 111,
'百二十' => 120,
'百二十一' => 121,
'二百' => 200,
'千一' => 1_001,
'千十' => 1_010,
'千十一' => 1_011,
'千二十' => 1_020,
'千二十一' => 1_021,
'千百一' => 1_101,
'千百十' => 1_110,
'千百十一' => 1_111,
'千百二十' => 1_120,
'千百二十一' => 1_121,
'二千' => 2_000,
'一万一' => 10_001,
'一万十' => 10_010,
'一万十一' => 10_011,
'一万二十' => 10_020,
'一万二十一' => 10_021,
'一万百一' => 10_101,
'一万百十' => 10_110,
'一万百十一' => 10_111,
'一万百二十' => 10_120,
'一万百二十一' => 10_121,
'一万二百' => 10_200,
'一万千一' => 11_001,
'一万千十' => 11_010,
'一万千十一' => 11_011,
'一万千二十' => 11_020,
'一万千二十一' => 11_021,
'一万千百一' => 11_101,
'一万千百十' => 11_110,
'一万千百十一' => 11_111,
'一万千百二十' => 11_120,
'一万千百二十一' => 11_121,
'一万二千' => 12_000,
'十万' => 100_000,
'二十万' => 200_000,
'二十一万' => 210_000,
'一億' => 100_000_000,
'十億' => 1_000_000_000,
'十一億' => 1_100_000_000,
'二十億' => 2_000_000_000,
'二十一億' => 2_100_000_000,
'一兆' => 10 ** 12,
'一京' => 10 ** 16,
'一垓' => 10 ** 20,
'一𥝱' => 10 ** 24,
'一穣' => 10 ** 28,
'一溝' => 10 ** 32,
'一澗' => 10 ** 36,
'一正' => 10 ** 40,
'一載' => 10 ** 44,
'一極' => 10 ** 48,
'一恒河沙' => 10 ** 52,
'一阿僧祇' => 10 ** 56,
'一那由他' => 10 ** 60,
'一不可思議' => 10 ** 64,
'一無量大数' => 10 ** 68,
}.each do |(text, number)|
it ".parse(#{text.inspect}) = #{number.inspect}" do
expect(Kansuji.parse(text)).to eql(number)
end
end
context 'parse failed' do
%w(万 一万億).each do |text|
it ".parse(#{text.inspect}) should raise error" do
expect { Kansuji.parse(text) }.to raise_error
end
end
end
end
end
ENV['gem_push'] = 'off'
ENV['SPEC'] = 'kansuji_spec.rb'
require 'bundler/gem_tasks'
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new
task default: :spec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment