Created
July 22, 2011 20:13
-
-
Save apeiros/1100314 to your computer and use it in GitHub Desktop.
Parse literals to ruby objects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'strscan' | |
require 'bigdecimal' | |
# This is copied and slightly refactored from BareTest::TabularData | |
# | |
# Example | |
# LiteralParser.parse("nil") # => nil | |
# LiteralParser.parse(":foo") # => :foo | |
# LiteralParser.parse("123") # => 123 | |
# | |
# Beware | |
# 12.5 is NOT parsed as a float, but as a bigdecimal! Use 12.5e to have it parsed as | |
# float (short for 12.5e0, equivalent to 1.25e1) | |
# | |
# Recognizes constants and the following literals: | |
# nil # nil | |
# true # true | |
# false # false | |
# -123 # Fixnum/Bignum (decimal) | |
# 0b1011 # Fixnum/Bignum (binary) | |
# 0755 # Fixnum/Bignum (octal) | |
# 0xff # Fixnum/Bignum (hexadecimal) | |
# 120.30 # BigDecimal | |
# 1e0 # Float | |
# "foo" # String, no interpolation, but \t etc. work | |
# 'foo' # String, only \\ and \' are escaped | |
# /foo/ # Regexp | |
# :foo # Symbol | |
# :"foo" # Symbol | |
# 2010-05-23 # Date | |
# 2010-05-23T06:45:00 # DateTime | |
# [Any, Literals, Here] # Array | |
# {Any => Literals} # Hash | |
# | |
# TODO | |
# * ruby with 32bit and version < 1.9.2 raises RangeError for too big/small Time | |
# instances, should we degrade to DateTime for those? | |
# * Implement %-literals (String: %, %Q, %q, Symbol: %s; Regexp: %r; Array: %W, %w) | |
class LiteralParser | |
RArrayBegin = /\[/ | |
RArrayVoid = /\s*/ | |
RArraySeparator = /#{RArrayVoid},#{RArrayVoid}/ | |
RArrayEnd = /\]/ | |
RHashBegin = /\{/ | |
RHashVoid = /\s*/ | |
RHashSeparator = /#{RHashVoid},#{RHashVoid}/ | |
RHashArrow = /#{RHashVoid}=>#{RHashVoid}/ | |
RHashEnd = /\}/ | |
RConstant = /[A-Z]\w*(?:::[A-Z]\w*)*/ | |
RNil = /nil/ | |
RFalse = /false/ | |
RTrue = /true/ | |
RInteger = /[+-]?\d[\d_]*/ | |
RBinaryInteger = /[+-]?0b[01][01_]*/ | |
RHexInteger = /[+-]?0x[A-Fa-f\d][A-Fa-f\d_]*/ | |
ROctalInteger = /[+-]?0[0-7][0-7'_,]*/ | |
RBigDecimal = /#{RInteger}\.\d+/ | |
RFloat = /#{RBigDecimal}(?:f|e#{RInteger})/ | |
RSString = /'(?:[^\\']+|\\.)*'/ | |
RDString = /"(?:[^\\"]+|\\.)*"/ | |
RRegexp = %r{/((?:[^\\/]+|\\.)*)/([imxnNeEsSuU]*)} | |
RSymbol = /:\w+|:#{RSString}|:#{RDString}/ | |
RDate = /(\d{4})-(\d{2})-(\d{2})/ | |
RTimeZone = /(Z|[A-Z]{3,4}|[+-]\d{4})/ | |
RTime = /(\d{2}):(\d{2}):(\d{2})(?:RTimeZone)?/ | |
RDateTime = /#{RDate}T#{RTime}/ | |
RSeparator = /[^A-Z\#nft\d:'"\/+-]+|$/ | |
RTerminator = /\s*(?:\#.*)?(?:\n|\r\n?|\Z)/ | |
RIdentifier = /[A-Za-z_]\w*/ | |
DStringEscapes = { | |
'\\\\' => "\\", | |
"\\'" => "'", | |
'\\"' => '"', | |
'\t' => "\t", | |
'\f' => "\f", | |
'\r' => "\r", | |
'\n' => "\n", | |
} | |
256.times do |i| | |
DStringEscapes["\\%o" % i] = i.chr | |
DStringEscapes["\\%03o" % i] = i.chr | |
DStringEscapes["\\x%02x" % i] = i.chr | |
DStringEscapes["\\x%02X" % i] = i.chr | |
end | |
def self.parse(string, opt=nil) | |
new(string, opt).value | |
end | |
attr_reader :value | |
attr_reader :constant_base | |
attr_reader :use_big_decimal | |
def initialize(string, opt=nil) | |
opt = opt ? opt.dup : {} | |
@constant_base = opt[:constant_base] # nil means toplevel | |
@use_big_decimal = opt.delete(:use_big_decimal) { true } | |
@string = string | |
@scanner = StringScanner.new(string) | |
@value = scan_value | |
raise SyntaxError, "Unexpected superfluous data: #{@scanner.rest.inspect}" unless @scanner.eos? | |
end | |
private | |
def scan_value | |
case | |
when @scanner.scan(RArrayBegin) then | |
value = [] | |
@scanner.scan(RArrayVoid) | |
if @scanner.scan(RArrayEnd) | |
value | |
else | |
value << scan_value | |
while @scanner.scan(RArraySeparator) | |
value << scan_value | |
end | |
raise SyntaxError, "Expected ]" unless @scanner.scan(RArrayVoid) && @scanner.scan(RArrayEnd) | |
value | |
end | |
when @scanner.scan(RHashBegin) then | |
value = {} | |
@scanner.scan(RHashVoid) | |
if @scanner.scan(RHashEnd) | |
value | |
else | |
key = scan_value | |
raise SyntaxError, "Expected =>" unless @scanner.scan(RHashArrow) | |
val = scan_value | |
value[key] = val | |
while @scanner.scan(RHashSeparator) | |
key = scan_value | |
raise SyntaxError, "Expected =>" unless @scanner.scan(RHashArrow) | |
val = scan_value | |
value[key] = val | |
end | |
raise SyntaxError, "Expected }" unless @scanner.scan(RHashVoid) && @scanner.scan(RHashEnd) | |
value | |
end | |
when @scanner.scan(RConstant) then eval("#{@constant_base}::#{@scanner[0]}") # yes, I know it's evil, but it's sane due to the regex, also it's less annoying than deep_const_get | |
when @scanner.scan(RNil) then nil | |
when @scanner.scan(RTrue) then true | |
when @scanner.scan(RFalse) then false | |
when @scanner.scan(RDateTime) then | |
Time.mktime(@scanner[1], @scanner[2], @scanner[3], @scanner[4], @scanner[5], @scanner[6]) | |
when @scanner.scan(RDate) then | |
date = @scanner[1].to_i, @scanner[2].to_i, @scanner[3].to_i | |
Date.civil(*date) | |
when @scanner.scan(RTime) then | |
now = Time.now | |
Time.mktime(now.year, now.month, now.day, @scanner[1].to_i, @scanner[2].to_i, @scanner[3].to_i) | |
when @scanner.scan(RFloat) then Float(@scanner.matched.delete('^0-9.e-')) | |
when @scanner.scan(RBigDecimal) then | |
data = @scanner.matched.delete('^0-9.-') | |
@use_big_decimal ? BigDecimal(data) : Float(data) | |
when @scanner.scan(ROctalInteger) then Integer(@scanner.matched.delete('^0-9-')) | |
when @scanner.scan(RHexInteger) then Integer(@scanner.matched.delete('^xX0-9A-Fa-f-')) | |
when @scanner.scan(RBinaryInteger) then Integer(@scanner.matched.delete('^bB01-')) | |
when @scanner.scan(RInteger) then @scanner.matched.delete('^0-9-').to_i | |
when @scanner.scan(RRegexp) then | |
source = @scanner[1] | |
flags = 0 | |
lang = nil | |
if @scanner[2] then | |
flags |= Regexp::IGNORECASE if @scanner[2].include?('i') | |
flags |= Regexp::EXTENDED if @scanner[2].include?('m') | |
flags |= Regexp::MULTILINE if @scanner[2].include?('x') | |
lang = @scanner[2].delete('^nNeEsSuU')[-1,1] | |
end | |
Regexp.new(source, flags, lang) | |
when @scanner.scan(RSymbol) then | |
case @scanner.matched[1,1] | |
when '"' | |
@scanner.matched[2..-2].gsub(/\\(?:[0-3]?\d\d?|x[A-Fa-f\d]{2}|.)/) { |m| | |
DStringEscapes[m] | |
}.to_sym | |
when "'" | |
@scanner.matched[2..-2].gsub(/\\'/, "'").gsub(/\\\\/, "\\").to_sym | |
else | |
@scanner.matched[1..-1].to_sym | |
end | |
when @scanner.scan(RSString) then | |
@scanner.matched[1..-2].gsub(/\\'/, "'").gsub(/\\\\/, "\\") | |
when @scanner.scan(RDString) then | |
@scanner.matched[1..-2].gsub(/\\(?:[0-3]?\d\d?|x[A-Fa-f\d]{2}|.)/) { |m| DStringEscapes[m] } | |
else raise SyntaxError, "Unrecognized pattern: #{@scanner.rest.inspect}" | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment