Created
November 28, 2010 15:17
-
-
Save matthewd/719008 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| From 87bf16defc57148ba2014c7b523eda90b59affa1 Mon Sep 17 00:00:00 2001 | |
| From: Matthew Draper <matthew@trebex.net> | |
| Date: Mon, 29 Nov 2010 01:30:11 +1030 | |
| Subject: [PATCH 1/2] Compile sprintf strings to rbx bytecode. | |
| With appropriate caching, this gets our performance into the same | |
| ballpark as MRI. With perfect caching (code choosing to explicitly | |
| pre-compile an expression once, say), this outperforms MRI. | |
| --- | |
| benchmark/rubinius/bm_sprintf.rb | 34 ++ | |
| kernel/common/capi.rb | 2 +- | |
| kernel/common/io.rb | 2 +- | |
| kernel/common/kernel.rb | 4 +- | |
| kernel/common/load_order.txt | 2 +- | |
| kernel/common/sprinter.rb | 851 ++++++++++++++++++++++++++++++++++++++ | |
| kernel/common/sprintf.rb | 372 ----------------- | |
| kernel/common/string.rb | 4 +- | |
| 8 files changed, 892 insertions(+), 379 deletions(-) | |
| create mode 100644 benchmark/rubinius/bm_sprintf.rb | |
| create mode 100644 kernel/common/sprinter.rb | |
| delete mode 100644 kernel/common/sprintf.rb | |
| diff --git a/benchmark/rubinius/bm_sprintf.rb b/benchmark/rubinius/bm_sprintf.rb | |
| new file mode 100644 | |
| index 0000000..698be20 | |
| --- /dev/null | |
| +++ b/benchmark/rubinius/bm_sprintf.rb | |
| @@ -0,0 +1,34 @@ | |
| +require 'benchmark' | |
| + | |
| +total = (ENV['TOTAL'] || 1000000).to_i | |
| + | |
| +def int_alternative(n) | |
| + Integer(n).to_s | |
| +end | |
| + | |
| +if (1.0).respond_to? :to_s_formatted | |
| + def str_alternative(s, n) | |
| + "Hello #{s}, from #{n.send :to_s_formatted, '%4.2f'}" | |
| + end | |
| +else | |
| + def str_alternative(s, n) | |
| + # This isn't representative, but it's less interesting on MRI | |
| + "Hello #{s}, from #{'%4.2f' % n}" | |
| + end | |
| +end | |
| + | |
| +Benchmark.bmbm do |x| | |
| + x.report("10.to_s") do | |
| + total.times { int_alternative(10) } | |
| + end | |
| + x.report("'%1$d'") do | |
| + total.times { sprintf('%1$d', 10) } | |
| + end | |
| + | |
| + x.report('"Hello #{s}, from #{f}"') do | |
| + total.times { str_alternative('world', 123.4567) } | |
| + end | |
| + x.report("'Hello %s, from %4.2f'") do | |
| + total.times { sprintf('Hello %s, from %4.2f', 'world', 123.4567) } | |
| + end | |
| +end | |
| diff --git a/kernel/common/capi.rb b/kernel/common/capi.rb | |
| index 0dd01a3..f396cbf 100644 | |
| --- a/kernel/common/capi.rb | |
| +++ b/kernel/common/capi.rb | |
| @@ -25,7 +25,7 @@ module Rubinius | |
| end | |
| def self.sprintf(format, args) | |
| - Rubinius::Sprintf.new(format, *args).parse | |
| + ::Rubinius::Sprinter.get(format).call(*args) | |
| end | |
| end | |
| end | |
| diff --git a/kernel/common/io.rb b/kernel/common/io.rb | |
| index 69ec554..48f6d92 100644 | |
| --- a/kernel/common/io.rb | |
| +++ b/kernel/common/io.rb | |
| @@ -1097,7 +1097,7 @@ class IO | |
| # Formats and writes to ios, converting parameters under | |
| # control of the format string. See Kernel#sprintf for details. | |
| def printf(fmt, *args) | |
| - write Rubinius::Sprintf.new(fmt, *args).parse | |
| + write ::Rubinius::Sprinter.get(fmt).call(*args) | |
| end | |
| ## | |
| diff --git a/kernel/common/kernel.rb b/kernel/common/kernel.rb | |
| index be03630..3d366a1 100644 | |
| --- a/kernel/common/kernel.rb | |
| +++ b/kernel/common/kernel.rb | |
| @@ -166,7 +166,7 @@ module Kernel | |
| if target.kind_of? IO | |
| target.printf(*args) | |
| elsif target.kind_of? String | |
| - $stdout << Rubinius::Sprintf.new(target, *args).parse | |
| + $stdout << Rubinius::Sprinter.get(target).call(*args) | |
| else | |
| raise TypeError, "The first arg to printf should be an IO or a String" | |
| end | |
| @@ -175,7 +175,7 @@ module Kernel | |
| module_function :printf | |
| def sprintf(str, *args) | |
| - Rubinius::Sprintf.new(str, *args).parse | |
| + ::Rubinius::Sprinter.get(str).call(*args) | |
| end | |
| alias_method :format, :sprintf | |
| module_function :sprintf | |
| diff --git a/kernel/common/load_order.txt b/kernel/common/load_order.txt | |
| index e18a887..b10aaea 100644 | |
| --- a/kernel/common/load_order.txt | |
| +++ b/kernel/common/load_order.txt | |
| @@ -67,7 +67,7 @@ process.rbc | |
| random.rbc | |
| regexp.rbc | |
| signal.rbc | |
| -sprintf.rbc | |
| +sprinter.rbc | |
| symbol.rbc | |
| taskprobe.rbc | |
| thread.rbc | |
| diff --git a/kernel/common/sprinter.rb b/kernel/common/sprinter.rb | |
| new file mode 100644 | |
| index 0000000..8a135db | |
| --- /dev/null | |
| +++ b/kernel/common/sprinter.rb | |
| @@ -0,0 +1,851 @@ | |
| +module Rubinius | |
| + class Sprinter | |
| + class << self | |
| + def get(format) | |
| + @cache ||= {} | |
| + | |
| + # An alternative to the below would be to have the Builder | |
| + # ignore the taint of the format, and instead have sprintf check | |
| + # the format upon invocation, and taint the result there. | |
| + if format.tainted? | |
| + new(format) | |
| + else | |
| + @cache[format] || | |
| + (@cache[format] = new(format)) | |
| + end | |
| + end | |
| + end | |
| + | |
| + def initialize(format) | |
| + Builder.new(self, format).build | |
| + end | |
| + | |
| + class Builder | |
| + def initialize(code, format) | |
| + @code, @format = code, format | |
| + | |
| + # Change next line to 'true' for debug output. Can't really use | |
| + # $DEBUG, because of its special meaning to sprintf. | |
| + if @verbose = false | |
| + @@seen ||= {} | |
| + if @@seen[format] | |
| + @verbose = false | |
| + else | |
| + @@seen[format] = true | |
| + end | |
| + end | |
| + | |
| + @g = ::Rubinius::Generator.new | |
| + @g.name = :call | |
| + @g.file = :sprintf | |
| + @g.set_line 1 | |
| + end | |
| + | |
| + def build | |
| + self.parse | |
| + | |
| + @g.required_args = @arg_count | |
| + | |
| + @g.total_args = @arg_count + 1 | |
| + # We won't use it, but we accept a splat; our semantics require | |
| + # that we ignore any excess arguments provided. | |
| + @g.splat_index = @arg_count | |
| + | |
| + @g.local_count = @arg_count + 1 | |
| + if @index_mode == :absolute | |
| + @g.local_names = (0...@arg_count).map {|i| :"#{i + 1}$" } + [:splat] | |
| + else | |
| + @g.local_names = (0...@arg_count).map {|i| nil } + [:splat] | |
| + end | |
| + | |
| + @g.ret | |
| + @g.close | |
| + | |
| + @g.encode | |
| + cm = @g.package ::Rubinius::CompiledMethod | |
| + | |
| + # Careful with this: CM::Instruction#to_s uses String#% | |
| + if @verbose | |
| + puts | |
| + puts @format.inspect | |
| + puts cm.decode | |
| + puts | |
| + end | |
| + | |
| + ss = ::Rubinius::StaticScope.new Object | |
| + ::Rubinius.attach_method @g.name, cm, ss, @code | |
| + end | |
| + | |
| + def meta_op_minus | |
| + @op_minus ||= @g.find_literal(:-) | |
| + @g.meta_send_op_minus @op_minus | |
| + end | |
| + | |
| + def invert | |
| + @g.meta_push_0 | |
| + @g.swap | |
| + meta_op_minus | |
| + end | |
| + | |
| + def is_negative | |
| + @g.meta_push_0 | |
| + @g.meta_send_op_lt @g.find_literal(:<) | |
| + end | |
| + | |
| + def justify(direction, may_be_negative=true) | |
| + if may_be_negative && direction != :ljust | |
| + width_done = @g.new_label | |
| + | |
| + @g.dup | |
| + is_negative | |
| + if_false do | |
| + @g.send direction, 1 | |
| + @g.goto width_done | |
| + end | |
| + | |
| + invert | |
| + @g.send :ljust, 1 | |
| + | |
| + width_done.set! | |
| + else | |
| + @g.send direction, 1 | |
| + end | |
| + end | |
| + | |
| + RADIX = { 'd' => 10, 'i' => 10, 'u' => 10, 'x' => 16, 'o' => 8, 'b' => 2 } | |
| + PREFIX = { 'o' => '0', 'x' => '0x', 'X' => '0X', 'b' => '0b', 'B' => '0B' } | |
| + | |
| + def next_index(specified=nil) | |
| + if specified | |
| + specified = specified.to_i | |
| + raise ArgumentError, "invalid positional index" if specified == 0 | |
| + raise ArgumentError, "unnumbered mixed with numbered" if @index_mode == :relative | |
| + @index_mode = :absolute | |
| + @arg_count = specified if specified > @arg_count | |
| + specified - 1 | |
| + else | |
| + raise ArgumentError, "unnumbered mixed with numbered" if @index_mode == :absolute | |
| + @index_mode = :relative | |
| + (@arg_count += 1) - 1 | |
| + end | |
| + end | |
| + | |
| + RE = / | |
| + ([^%]+|%(?:[\n\0]|\z)) # 1 | |
| + | | |
| + % | |
| + ( # 2 | |
| + ([0# +-]*) # 3 | |
| + (?:([0-9]+)\$)? # 4 | |
| + ([0# +-]*) # 5 | |
| + (?: | |
| + (\*(?:([0-9]+)\$)?|([1-9][0-9]*))? # 6 7 8 | |
| + (?:\.(\*(?:([0-9]+)\$)?|([1-9][0-9]*))?)? # 9 10 11 | |
| + ) | |
| + (?:([0-9]+)\$)? # 12 | |
| + ([BbcdEefGgiopsuXx]) # 13 | |
| + ) | |
| + | | |
| + (%)(?:%|[-+0-9# *.$]+\$[0-9.]*\z) # 14 | |
| + | | |
| + (%) # 15 | |
| + /x | |
| + | |
| + def append_literal(str) | |
| + @g.push_unique_literal str | |
| + append_str false | |
| + end | |
| + | |
| + def append_str taint | |
| + if @has_content | |
| + if taint | |
| + @g.swap | |
| + @g.move_down 2 | |
| + end | |
| + | |
| + @g.swap | |
| + @g.string_append | |
| + else | |
| + @g.string_dup | |
| + @has_content = true | |
| + | |
| + if @pre_tainted | |
| + @g.send :taint, 0 | |
| + end | |
| + end | |
| + | |
| + if taint | |
| + @g.swap | |
| + tainting_done = @g.new_label | |
| + @g.send :tainted?, 0 | |
| + if_true do | |
| + @g.send :taint, 0 | |
| + end | |
| + end | |
| + end | |
| + | |
| + class Atom | |
| + def initialize(b, g, format_code, flags) | |
| + @b, @g = b, g | |
| + @format_code, @flags = format_code, flags | |
| + | |
| + @f_alt = flags.index(?#) | |
| + @f_zero = flags.index(?0) | |
| + @f_plus = flags.index(?+) | |
| + @f_ljust = flags.index(?-) | |
| + @f_space = flags.index(?\ ) | |
| + | |
| + @just_dir = @f_ljust ? :ljust : :rjust | |
| + | |
| + @prefix = PREFIX[@format_code] if @f_alt | |
| + | |
| + @full_leader_size = @prefix ? @prefix.size : 0 | |
| + @full_leader_size += 1 if @f_plus || @f_space | |
| + | |
| + end | |
| + def prepend_prefix | |
| + if @prefix | |
| + @g.push_literal @prefix | |
| + @g.string_dup | |
| + @g.string_append | |
| + end | |
| + end | |
| + | |
| + def set_value(ref) | |
| + @field_index = @b.next_index(ref) | |
| + end | |
| + def set_width(full, ref, static) | |
| + @width_static = static && static.to_i | |
| + if full && !static | |
| + @width_index = @b.next_index(ref) | |
| + end | |
| + | |
| + @has_width = @width_static || @width_index | |
| + end | |
| + def set_precision(full, ref, static) | |
| + @prec_static = static && static.to_i | |
| + if full && !static | |
| + @prec_index = @b.next_index(ref) | |
| + end | |
| + | |
| + if @format_code == 'g' && @f_alt && !full | |
| + @prec_static = 4 | |
| + end | |
| + | |
| + @has_precision = @prec_static || @prec_index | |
| + end | |
| + | |
| + def push_value | |
| + @g.push_local @field_index | |
| + end | |
| + | |
| + def push_width(adjust=true) | |
| + yield if block_given? | |
| + if @width_static | |
| + raise ArgumentError, "width too big" unless @width_static.class == Fixnum | |
| + if adjust && @full_leader_size > 0 | |
| + @g.push(@width_static - @full_leader_size) | |
| + else | |
| + @g.push @width_static | |
| + end | |
| + | |
| + if block_given? | |
| + @g.swap | |
| + @b.if_true do | |
| + @g.meta_push_1 | |
| + @b.meta_op_minus | |
| + end | |
| + end | |
| + | |
| + elsif @width_index | |
| + @g.push_local @width_index | |
| + | |
| + @b.force_type :Fixnum, :Integer do | |
| + # If we had to do a conversion, we check we ended up | |
| + # with a Fixnum | |
| + @g.dup | |
| + @b.push_Fixnum | |
| + @g.swap | |
| + @g.kind_of | |
| + | |
| + @b.if_false do | |
| + @b.raise_ArgumentError "width too big" | |
| + end | |
| + end | |
| + | |
| + n = adjust ? @full_leader_size : 0 | |
| + if block_given? | |
| + adjusted = @g.new_label | |
| + | |
| + @g.swap | |
| + @b.if_true do | |
| + @g.push n + 1 | |
| + @b.meta_op_minus | |
| + if n > 0 | |
| + @g.goto adjusted | |
| + end | |
| + end | |
| + | |
| + if n > 0 | |
| + @g.push n | |
| + @b.meta_op_minus | |
| + adjusted.set! | |
| + end | |
| + | |
| + elsif n > 0 | |
| + @g.push n | |
| + @b.meta_op_minus | |
| + end | |
| + | |
| + else | |
| + raise "push without a width" | |
| + | |
| + end | |
| + end | |
| + | |
| + def push_precision | |
| + yield if block_given? | |
| + if @prec_static | |
| + raise ArgumentError, "precision too big" unless @prec_static.class == Fixnum | |
| + @g.push @prec_static | |
| + | |
| + elsif @prec_index | |
| + @g.push_local @prec_index | |
| + | |
| + @b.force_type :Fixnum, :Integer do | |
| + # If we had to do a conversion, we check we ended up | |
| + # with a Fixnum | |
| + @g.dup | |
| + @b.push_Fixnum | |
| + @g.swap | |
| + @g.kind_of | |
| + | |
| + @b.if_false do | |
| + @b.raise_ArgumentError "precision too big" | |
| + end | |
| + end | |
| + | |
| + else | |
| + raise "push without a precision" | |
| + | |
| + end | |
| + | |
| + if block_given? | |
| + @g.swap | |
| + @b.if_true do | |
| + @g.meta_push_1 | |
| + @b.meta_op_minus | |
| + end | |
| + end | |
| + end | |
| + | |
| + def push_format_string | |
| + float_format_code = @format_code | |
| + float_format_code = 'f' if @format_code == 'g' && @f_alt | |
| + | |
| + leader = "%#{@flags}" | |
| + if !@width_index && !@prec_index | |
| + leader << @width_static.to_s if @width_static | |
| + leader << ".#{@prec_static}" if @prec_static | |
| + @g.push_literal "#{leader}#{float_format_code}" | |
| + else | |
| + format_parts = 1 | |
| + if @prec_static | |
| + @g.push_literal ".#{@prec_static}#{float_format_code}" | |
| + else | |
| + @g.push_literal(float_format_code) | |
| + if @prec_index | |
| + push_precision | |
| + @g.send :to_s, 0 | |
| + format_parts += 1 | |
| + | |
| + if @width_index | |
| + @g.push_literal "." | |
| + format_parts += 1 | |
| + end | |
| + end | |
| + end | |
| + | |
| + if @width_static | |
| + leader << @width_static.to_s | |
| + elsif @width_index | |
| + push_width | |
| + @g.send :to_s, 0 | |
| + format_parts += 1 | |
| + end | |
| + leader << "." if @prec_index && !@width_index | |
| + | |
| + @g.push_literal leader | |
| + @g.string_dup | |
| + format_parts.times do | |
| + @g.string_append | |
| + end | |
| + end | |
| + end | |
| + | |
| + def positive_sign | |
| + if @f_plus | |
| + '+' | |
| + elsif @f_space | |
| + ' ' | |
| + else | |
| + '' | |
| + end | |
| + end | |
| + | |
| + def justify_width(adjust=true) | |
| + if @has_width | |
| + push_width adjust | |
| + @b.justify @just_dir, @width_static.nil? | |
| + end | |
| + end | |
| + | |
| + def zero_pad? | |
| + @has_precision || (@has_width && @f_zero) | |
| + end | |
| + def zero_pad(pad="0", &readjust) | |
| + if @has_precision | |
| + push_precision &readjust | |
| + @g.push_literal pad | |
| + @g.send :rjust, 2 | |
| + elsif @has_width && @f_zero | |
| + push_width true, &readjust | |
| + @g.push_literal pad | |
| + @g.send :rjust, 2 | |
| + end | |
| + end | |
| + | |
| + def width? | |
| + @has_width | |
| + end | |
| + def precision? | |
| + @has_precision | |
| + end | |
| + end | |
| + | |
| + def push_Kernel | |
| + @lit_Kernel ||= @g.add_literal(:Kernel) | |
| + @slot_Kernel ||= @g.add_literal(nil) | |
| + | |
| + @g.push_const_fast @lit_Kernel, @slot_Kernel | |
| + end | |
| + | |
| + def push_Fixnum | |
| + @lit_Fixnum ||= @g.add_literal(:Fixnum) | |
| + @slot_Fixnum ||= @g.add_literal(nil) | |
| + | |
| + @g.push_const_fast @lit_Fixnum, @slot_Fixnum | |
| + end | |
| + | |
| + def raise_ArgumentError(msg) | |
| + @lit_ArgumentError ||= @g.add_literal(:ArgumentError) | |
| + @slot_ArgumentError ||= @g.add_literal(nil) | |
| + | |
| + @lit_new ||= @g.add_literal(:new) | |
| + | |
| + @g.push_const_fast @lit_ArgumentError, @slot_ArgumentError | |
| + @g.push_unique_literal msg | |
| + @g.send_stack @lit_new, 1 | |
| + @g.raise_exc | |
| + end | |
| + | |
| + def force_type(klass, method=klass) | |
| + @g.dup | |
| + @g.push_const klass | |
| + @g.swap | |
| + @g.kind_of | |
| + if_false do | |
| + @g.push_self | |
| + @g.swap | |
| + @g.send method, 1, true | |
| + | |
| + yield if block_given? | |
| + end | |
| + end | |
| + | |
| + def if_true | |
| + l = @g.new_label | |
| + @g.gif l | |
| + yield | |
| + l.set! | |
| + end | |
| + | |
| + def if_false | |
| + l = @g.new_label | |
| + @g.git l | |
| + yield | |
| + l.set! | |
| + end | |
| + | |
| + def parse | |
| + @arg_count = 0 | |
| + @index_mode = nil | |
| + | |
| + @pre_tainted = @format.tainted? | |
| + | |
| + bignum_width = bignum_precision = nil | |
| + | |
| + pos = 0 | |
| + while match = RE.match_start(@format, pos) | |
| + pos = match.end(0) | |
| + | |
| + _, | |
| + plain_string, | |
| + whole_format, | |
| + flags_a, | |
| + field_ref_a, | |
| + flags_b, | |
| + width_full, width_ref, width_static, | |
| + prec_full, prec_ref, prec_static, | |
| + field_ref_b, | |
| + format_code, | |
| + literal_char, | |
| + invalid_format = *match | |
| + | |
| + if plain_string | |
| + append_literal plain_string | |
| + elsif literal_char | |
| + append_literal literal_char | |
| + elsif invalid_format || (field_ref_a && field_ref_b) | |
| + raise ArgumentError, "malformed format string" | |
| + else | |
| + field_ref = field_ref_a || field_ref_b | |
| + flags = "#{flags_a}#{flags_b}" | |
| + | |
| + alt = flags.index(?#) | |
| + zero = flags.index(?0) | |
| + plus = flags.index(?+) | |
| + ljust = flags.index(?-) | |
| + space = flags.index(?\ ) | |
| + | |
| + | |
| + atom = Atom.new(self, @g, format_code, flags) | |
| + atom.set_width width_full, width_ref, width_static | |
| + atom.set_precision prec_full, prec_ref, prec_static | |
| + atom.set_value field_ref | |
| + | |
| + case format_code | |
| + when 's', 'p', 'c' | |
| + atom.push_value | |
| + | |
| + case format_code | |
| + when 's' | |
| + unless @pre_tainted | |
| + @g.dup | |
| + end | |
| + | |
| + force_type :String | |
| + | |
| + when 'c' | |
| + unless @pre_tainted | |
| + @g.dup | |
| + end | |
| + | |
| + force_type :Fixnum, :Integer | |
| + | |
| + chr_range_ok = @g.new_label | |
| + | |
| + @g.dup | |
| + @g.push 256 | |
| + @g.meta_send_op_lt @g.find_literal(:<) | |
| + if_true do | |
| + @g.dup | |
| + @g.meta_push_neg_1 | |
| + @g.meta_send_op_gt @g.find_literal(:>) | |
| + @g.git chr_range_ok | |
| + end | |
| + | |
| + @g.push 256 | |
| + @g.send :%, 1 | |
| + | |
| + chr_range_ok.set! | |
| + @g.send :chr, 0 | |
| + when 'p' | |
| + @g.send :inspect, 0 | |
| + | |
| + unless @pre_tainted | |
| + @g.dup | |
| + end | |
| + end | |
| + | |
| + atom.justify_width | |
| + | |
| + if atom.precision? | |
| + @g.meta_push_0 | |
| + atom.push_precision | |
| + @g.send :[], 2 | |
| + end | |
| + | |
| + append_str !@pre_tainted | |
| + | |
| + when 'e', 'E', 'f', 'g', 'G' | |
| + | |
| + atom.push_value | |
| + force_type :Float | |
| + | |
| + format_done = @g.new_label | |
| + | |
| + @g.dup | |
| + @g.send :finite?, 0 | |
| + | |
| + if_true do | |
| + atom.push_format_string | |
| + @g.send :to_s_formatted, 1, true | |
| + | |
| + @g.goto format_done | |
| + end | |
| + | |
| + formatted_non_finite = @g.new_label | |
| + | |
| + @g.dup | |
| + @g.send :nan?, 0 | |
| + | |
| + if_false do | |
| + is_negative | |
| + | |
| + if_false do | |
| + @g.push_literal "#{atom.positive_sign}Inf" | |
| + @g.goto formatted_non_finite | |
| + end | |
| + @g.push_literal '-Inf' | |
| + @g.goto formatted_non_finite | |
| + end | |
| + | |
| + @g.pop | |
| + @g.push_literal 'NaN' | |
| + | |
| + formatted_non_finite.set! | |
| + atom.justify_width false | |
| + | |
| + format_done.set! | |
| + | |
| + append_str false | |
| + | |
| + when 'd', 'i', 'u', 'B', 'b', 'o', 'X', 'x' | |
| + radix = RADIX[format_code.downcase] | |
| + | |
| + atom.push_value | |
| + | |
| + # Bignum is obviously also perfectly acceptable. But we | |
| + # just address the most common case by avoiding the call | |
| + # if we've been given a Fixnum. The call is enough | |
| + # overhead to bother, but not something to panic about. | |
| + force_type :Fixnum, :Integer | |
| + | |
| + if plus || space || (zero && radix == 10 && format_code != 'u') | |
| + @g.dup | |
| + | |
| + # stash away whether it's negative | |
| + is_negative | |
| + @g.dup | |
| + @g.move_down 2 | |
| + | |
| + if_true do | |
| + # but treat it as positive for now | |
| + invert | |
| + end | |
| + | |
| + if radix == 10 | |
| + @g.send :to_s, 0 | |
| + else | |
| + @g.push radix | |
| + @g.send :to_s, 1 | |
| + end | |
| + elsif radix == 10 && format_code != 'u' | |
| + @g.send :to_s, 0 | |
| + else | |
| + have_formatted = @g.new_label | |
| + | |
| + @g.dup | |
| + is_negative | |
| + | |
| + if_false do | |
| + if radix == 10 | |
| + @g.send :to_s, 0 | |
| + else | |
| + @g.push radix | |
| + @g.send :to_s, 1 | |
| + end | |
| + @g.goto have_formatted | |
| + end | |
| + | |
| + if format_code == 'u' | |
| + # Now we need to find how many bits we need to | |
| + # represent the number, starting with a native int, | |
| + # then incrementing by 32 each round. | |
| + | |
| + more_bits_loop = @g.new_label | |
| + got_enough_bits = @g.new_label | |
| + | |
| + # Push a positive version of the number ($N) | |
| + @g.dup | |
| + invert | |
| + | |
| + # Push the baseline ($B), starting from a native int: | |
| + # 2**32 or 2**64, as appropriate | |
| + @g.meta_push_1 | |
| + l_native = @g.find_literal(2.size * 8) | |
| + @g.push_literal_at l_native | |
| + @g.send :<<, 1 | |
| + | |
| + # Switch to $N | |
| + @g.swap | |
| + # For the first time, because it's what we've used | |
| + # above, we'll shift it by our native int size | |
| + @g.push_literal_at l_native | |
| + | |
| + more_bits_loop.set! | |
| + # Throw out the bits from $N that $B can offset | |
| + @g.send :>>, 1 | |
| + | |
| + # Check whether $N == 0 | |
| + @g.dup | |
| + @g.meta_push_0 | |
| + @g.meta_send_op_equal @g.find_literal(:==) | |
| + @g.git got_enough_bits | |
| + | |
| + # Switch to $B | |
| + @g.swap | |
| + l_32 = @g.find_literal(32) | |
| + @g.push_literal_at l_32 | |
| + # Add 32 bits | |
| + @g.send :<<, 1 | |
| + # Switch to $N | |
| + @g.swap | |
| + # We'll throw out 32 bits this time | |
| + @g.push_literal_at l_32 | |
| + @g.goto more_bits_loop | |
| + | |
| + got_enough_bits.set! | |
| + # Pop the spare copy of $N, which is 0 | |
| + @g.pop | |
| + | |
| + | |
| + # Now we're left with $B; we can now use it, by adding | |
| + # it to the (negative) number still on the stack from | |
| + # earlier. | |
| + | |
| + # $B is a Bignum; no point using meta_send_op_plus. | |
| + @g.send :+, 1 | |
| + @g.send :to_s, 0 | |
| + | |
| + padding = "." | |
| + | |
| + else | |
| + # (num + radix ** num.to_s(radix).size).to_s(radix) | |
| + @g.push radix | |
| + @g.dup_many 2 | |
| + @g.send :to_s, 1 | |
| + @g.send :size, 0 | |
| + @g.send :**, 1 | |
| + @g.meta_send_op_plus @g.find_literal(:+) | |
| + @g.push radix | |
| + @g.send :to_s, 1 | |
| + | |
| + padding = (radix - 1).to_s(radix) | |
| + end | |
| + | |
| + if atom.zero_pad? | |
| + atom.zero_pad padding | |
| + | |
| + elsif !atom.precision? && !zero | |
| + @g.push_literal ".." | |
| + @g.string_dup | |
| + @g.string_append | |
| + end | |
| + | |
| + have_formatted.set! | |
| + end | |
| + | |
| + # 'B' also returns an uppercase string, but there, the | |
| + # only alpha character is in the prefix -- and that's | |
| + # already uppercase | |
| + if format_code == 'X' | |
| + @g.send :upcase, 0 | |
| + end | |
| + | |
| + if !(plus || space) && (zero && radix == 10 && format_code != 'u') | |
| + atom.zero_pad do | |
| + # If it decides to do any padding, zero_pad will yield | |
| + # before it modifies the stack, and we must ensure the | |
| + # top of the stack is a boolean indicating whether to | |
| + # subtract one from the requested width (for a minus | |
| + # sign to be prepended below), followed by the string- | |
| + # in-progress. | |
| + | |
| + @g.swap | |
| + @g.dup | |
| + @g.move_down 2 | |
| + end | |
| + else | |
| + atom.zero_pad | |
| + end | |
| + | |
| + atom.prepend_prefix | |
| + | |
| + if plus || space | |
| + append_sign = @g.new_label | |
| + | |
| + @g.swap | |
| + if_true do | |
| + @g.push_literal '-' | |
| + | |
| + @g.goto append_sign | |
| + end | |
| + | |
| + @g.push_literal atom.positive_sign | |
| + | |
| + append_sign.set! | |
| + @g.string_dup | |
| + @g.string_append | |
| + | |
| + elsif zero && radix == 10 && format_code != 'u' | |
| + | |
| + @g.swap | |
| + if_true do | |
| + @g.push_literal '-' | |
| + | |
| + @g.string_dup | |
| + @g.string_append | |
| + end | |
| + | |
| + end | |
| + | |
| + | |
| + if atom.precision? || !zero | |
| + atom.justify_width false | |
| + end | |
| + | |
| + append_str false | |
| + | |
| + else | |
| + raise ArgumentError, "bad format character: #{format_code}" | |
| + end | |
| + end | |
| + end | |
| + | |
| + unless @has_content | |
| + append_literal '' | |
| + end | |
| + | |
| + if @index_mode != :absolute | |
| + no_exception = @g.new_label | |
| + | |
| + # If we've used relative arguments, and $DEBUG is true, we | |
| + # throw an exception if passed more arguments than we need. | |
| + | |
| + # Check this first; it's much faster, and generally false | |
| + @g.passed_arg @arg_count | |
| + @g.gif no_exception | |
| + | |
| + ::Rubinius::AST::GlobalVariableAccess.new(0, :$DEBUG).bytecode(@g) | |
| + @g.gif no_exception | |
| + | |
| + raise_ArgumentError "too many arguments for format string" | |
| + | |
| + no_exception.set! | |
| + end | |
| + end | |
| + end | |
| + end | |
| +end | |
| diff --git a/kernel/common/sprintf.rb b/kernel/common/sprintf.rb | |
| deleted file mode 100644 | |
| index a76a9a3..0000000 | |
| --- a/kernel/common/sprintf.rb | |
| +++ /dev/null | |
| @@ -1,372 +0,0 @@ | |
| -module Rubinius | |
| - class Sprintf | |
| - | |
| - attr_accessor :fmt | |
| - attr_accessor :args | |
| - attr_accessor :flags | |
| - | |
| - RADIXES = {"b" => 2, "o" => 8, "d" => 10, "x" => 16} | |
| - ALTERNATIVES = {"o" => "0", "b" => "0b", "B" => "0B", "x" => "0x", "X" => "0X"} | |
| - PrecisionMax = 1048576 # Totally random value | |
| - | |
| - def initialize(fmt, *args) | |
| - @tainted = fmt.tainted? | |
| - @fmt, @args, @arg_position = fmt.to_str, args, 0 | |
| - end | |
| - | |
| - def parse | |
| - start = 0 | |
| - ret = "" | |
| - width = nil | |
| - precision = nil | |
| - @positional = false | |
| - @relative = false | |
| - @arg_position = 0 | |
| - | |
| - while (match = /%/.match_from(fmt, start)) | |
| - | |
| - @flags = {:space => nil, :position => nil, :alternative => nil, :plus => nil, | |
| - :minus => nil, :zero => nil, :star => nil} | |
| - @width = @precision = @type = nil | |
| - | |
| - ret << match.pre_match_from(start) | |
| - start = match.begin(0) + 1 | |
| - | |
| - # Special case: %% prints out as "%" | |
| - if [?\n, 0].include?(@fmt[start]) | |
| - ret << "%" << @fmt[start] | |
| - start += 1 | |
| - next | |
| - elsif [?%, nil].include?(@fmt[start]) | |
| - ret << "%" | |
| - start += 1 | |
| - next | |
| - elsif @fmt[start, 3] =~ /[1-9]\$/ && !@fmt[start + 2] | |
| - ret << "%" | |
| - start = @fmt.size | |
| - break | |
| - end | |
| - | |
| - # FLAG STATE | |
| - while token = /\G( |[1-9]\$|#|\+|\-|0|\*)/.match_from(fmt, start) | |
| - case token[0] | |
| - # Special case: if we get two [1-9]\$, it means that we're outside of flag-land | |
| - when /[1-9]\$/ | |
| - raise ArgumentError, "value given twice - #{token[0]}" if flags[:position] | |
| - @flags[:position] = token[0][0].chr.to_i | |
| - start += 1 | |
| - when " " | |
| - @flags[:space] = true | |
| - when "#" | |
| - @flags[:alternative] = true | |
| - when "+" | |
| - @flags[:plus] = true | |
| - when "-" | |
| - @flags[:minus] = true | |
| - when "0" | |
| - @flags[:zero] = true | |
| - when "*" | |
| - raise ArgumentError, "width given twice" if flags[:star] | |
| - if width_dollar_match = /\G[1-9]\$/.match_from(fmt, start + 1) | |
| - @width = Slot.new('*' << width_dollar_match[0]) | |
| - start += 2 | |
| - end | |
| - @flags[:star] = true | |
| - end | |
| - start += 1 | |
| - end | |
| - | |
| - # WIDTH STATE | |
| - if !flags[:star] && width_match = /\G([1-9]\$|\*|\d+)/.match_from(fmt, start) | |
| - @width = Slot.new(width_match[0]) | |
| - start += width_match[0].size | |
| - end | |
| - | |
| - # PRECISION DETERMINATION STATE | |
| - if /\G\./.match_from(fmt, start) | |
| - start += 1 | |
| - # PRECISION STATE | |
| - if /\G\*/.match_from(fmt, start) | |
| - if precision_dollar_match = /\G[1-9]\$/.match_from(fmt, start + 1) | |
| - @precision = Slot.new('*' << precision_dollar_match[0]) | |
| - start += 3 | |
| - else | |
| - @precision = Slot.new('*') | |
| - start += 1 | |
| - end | |
| - elsif precision_match = /\G([1-9]\$|\d+)/.match_from(fmt, start) | |
| - @precision = Slot.new(precision_match[0]) | |
| - start += precision_match[0].size | |
| - else | |
| - @precision = Slot.new("0") | |
| - end | |
| - | |
| - # check for positional value again, after the optional '.' | |
| - if positional_match = /\G[1-9]\$/.match_from(fmt, start) | |
| - raise ArgumentError, "value given twice - #{token[0]}" if flags[:position] | |
| - @flags[:position] = positional_match[0][0].chr.to_i | |
| - start += 2 | |
| - end | |
| - end | |
| - | |
| - # TYPE STATE | |
| - unless type = /\G[bcdEefGgiopsuXx]/i.match_from(fmt, start) | |
| - raise ArgumentError, "malformed format string - missing field type" | |
| - else | |
| - @type = type[0] | |
| - start += 1 | |
| - end | |
| - | |
| - # Next: Use the parsed values to format some stuff :) | |
| - f = format | |
| - ret << f if f | |
| - end | |
| - if $DEBUG == true && !@positional | |
| - raise ArgumentError, "you need to use all the arguments" unless @arg_position == args.size | |
| - end | |
| - ret << @fmt[start..-1] if start < @fmt.size | |
| - ret.taint if @tainted | |
| - ret | |
| - end | |
| - | |
| - def format | |
| - # GET VALUE | |
| - if flags[:position] | |
| - val = Slot.new("#{flags[:position]}$") | |
| - val = get_arg(val) | |
| - end | |
| - | |
| - # GET WIDTH | |
| - @width = Slot.new("*") if flags[:star] && !@width | |
| - width = get_arg(@width) | |
| - width = width.to_int if width.respond_to?(:to_int) | |
| - if width && width < 0 | |
| - width = width.abs | |
| - flags[:minus] = true | |
| - end | |
| - | |
| - # GET PRECISION | |
| - precision = get_arg(@precision) | |
| - precision = precision.to_int if precision.respond_to?(:to_int) | |
| - | |
| - unless flags[:position] | |
| - val = Slot.new("*") | |
| - val = get_arg(val) | |
| - end | |
| - | |
| - case @type | |
| - when "e", "E", "f", "g", "G" | |
| - if @type.downcase == "g" && flags[:alternative] | |
| - @old_type = "g" | |
| - @type = "f" | |
| - precision = 4 unless precision | |
| - end | |
| - val = Float(val) | |
| - if val.finite? | |
| - ret = val.send(:to_s_formatted, build_format_string(width, precision)) | |
| - ret = plus_char + ret if val >= 0 && @old_type | |
| - else | |
| - ret = (val < 0 ? "-Inf" : "Inf") if val.infinite? | |
| - ret = "NaN" if val.nan? | |
| - ret = plus_char + ret if val > 0 | |
| - flags[:zero] = flags[:space] = flags[:plus] = nil | |
| - ret = pad(ret, width, precision) | |
| - end | |
| - when "u" | |
| - val = get_number(val) | |
| - if val < 0 | |
| - unless val.kind_of?(Fixnum) | |
| - raise ArgumentError, "invalid type (only Fixnum allowed)" | |
| - end | |
| - | |
| - plus_or_space = flags[:space] || flags[:plus] | |
| - unless plus_or_space | |
| - val = (1 << (2.size * 8)) + val | |
| - end | |
| - unless flags[:zero] or precision or plus_or_space | |
| - ret = "..#{pad(val, width, precision)}" | |
| - else | |
| - ret = pad(val, width, precision) | |
| - end | |
| - else | |
| - ret = pad(val, width, precision) | |
| - end | |
| - when "d", "i" | |
| - val = get_number(val) | |
| - ret = pad(val, width, precision) | |
| - when "c" | |
| - val = val.to_int if val.respond_to?(:to_int) | |
| - raise TypeError, "cannot convert #{val.class} into Integer" unless val.respond_to?(:chr) && val.respond_to?(:%) | |
| - val = (val % 256).chr | |
| - ret = pad(val, width, precision) | |
| - when "s" | |
| - flags[:zero] = flags[:space] = flags[:plus] = nil | |
| - ret = pad(val, width, precision) | |
| - ret.taint if val.tainted? | |
| - when "p" | |
| - flags[:zero] = flags[:space] = flags[:plus] = nil | |
| - ret = pad(val.inspect, width, precision) | |
| - when "o", "x", "X", "b", "B" | |
| - val = get_number(val) | |
| - unless flags[:space] || flags[:plus] | |
| - ret = Number.new(val, RADIXES[@type.downcase]).rep | |
| - chr = val < 0 ? (RADIXES[@type.downcase] - 1).to_s(RADIXES[@type.downcase]) : 0.to_s | |
| - ret = pad(ret, width, precision, chr) | |
| - ret = ALTERNATIVES[@type].to_s + ret if flags[:alternative] | |
| - else | |
| - flags[:plus] = nil if val < 0 | |
| - ret = val.to_s(RADIXES[@type.downcase]) | |
| - ret.gsub!(/^(\-?)/, "\1#{ALTERNATIVES[@type]}") if flags[:alternative] | |
| - ret = pad(ret, width, precision) | |
| - ret.gsub!(/ \-/, "-") | |
| - end | |
| - ret = ret.downcase if @type == "x" | |
| - ret = ret.upcase if @type == "X" | |
| - end | |
| - ret | |
| - end | |
| - | |
| - def get_number(val) | |
| - unless val.respond_to?(:full_to_i) | |
| - if val.respond_to?(:to_int) | |
| - val = val.to_int | |
| - elsif val.respond_to?(:to_i) | |
| - val = val.to_i | |
| - end | |
| - end | |
| - val = val.full_to_i if val.respond_to?(:full_to_i) | |
| - val = 0 if val.nil? | |
| - val | |
| - end | |
| - | |
| - def build_format_string(width, precision) | |
| - ret = "%#{make_flags}#{width}" | |
| - ret << ".#{precision}" if precision | |
| - ret << @type | |
| - ret | |
| - end | |
| - | |
| - def make_flags | |
| - ret = "" | |
| - ret << " " if flags[:space] | |
| - ret << "#" if flags[:alternative] | |
| - ret << "+" if flags[:plus] | |
| - ret << "-" if flags[:minus] | |
| - ret << "0" if flags[:zero] | |
| - ret | |
| - end | |
| - | |
| - def get_arg(slot) | |
| - return nil unless slot | |
| - | |
| - case | |
| - when slot.position == :next | |
| - raise ArgumentError, "unnumbered mixed with numbered" if @positional | |
| - @relative = true | |
| - raise ArgumentError, "you ran out of arguments" if @arg_position >= args.size | |
| - ret = args[@arg_position] | |
| - @arg_position += 1 | |
| - when slot.pos | |
| - raise ArgumentError, "unnumbered mixed with numbered" if @relative | |
| - @positional = true | |
| - ret = args[slot.position - 1] | |
| - when slot.value | |
| - @relative = true | |
| - ret = slot.value | |
| - else | |
| - raise ArgumentError, "argument position does not exist: #{slot.str}" | |
| - end | |
| - | |
| - ret | |
| - end | |
| - | |
| - def pad(val, width, precision, pad_override = nil) | |
| - direction = flags[:minus] ? :ljust : :rjust | |
| - ret = val.to_s | |
| - modded_width = width.to_i + (flags[:plus] ? 1 : 0) | |
| - width = nil if modded_width <= val.to_s.size | |
| - if ret[0] != ?- | |
| - sign = plus_char | |
| - else | |
| - sign = "" | |
| - end | |
| - | |
| - if precision || flags[:zero] | |
| - ret.gsub!("..", "") | |
| - end | |
| - if precision | |
| - if precision > PrecisionMax | |
| - raise ArgumentError, "precision too big" | |
| - end | |
| - ret = sign + ret.send(direction, precision, pad_override || "0") | |
| - flags[:zero] = flags[:plus] = flags[:space] = nil | |
| - end | |
| - if width | |
| - if pad_char != " " && ret[0] == ?- | |
| - ret.slice!(0) | |
| - sign = "-" | |
| - width -= 1 | |
| - ret = ret.rjust(width, pad_char) | |
| - else | |
| - ret = ret.send(direction, width, pad_char) | |
| - ret[0] = sign unless sign.empty? | |
| - return ret | |
| - end | |
| - end | |
| - sign + ret | |
| - end | |
| - | |
| - def pad_char | |
| - flags[:zero] ? "0" : " " | |
| - end | |
| - | |
| - def plus_char | |
| - return "+" if flags[:plus] | |
| - return " " if flags[:space] | |
| - "" | |
| - end | |
| - | |
| - class Slot | |
| - | |
| - # pos means it got a N$ position | |
| - attr_reader :pos | |
| - attr_reader :position | |
| - attr_reader :value | |
| - attr_reader :str | |
| - | |
| - def initialize(str) | |
| - @pos = false | |
| - @str = str | |
| - if str.size == 3 && /\*\d\$/.match(str) | |
| - @pos = true | |
| - @position = str[1..1].to_i | |
| - elsif str.size == 2 && str[1] == ?$ | |
| - @pos = true | |
| - @position = str[0..0].to_i | |
| - elsif str == "*" | |
| - @position = :next | |
| - else | |
| - @value = str.to_i | |
| - end | |
| - end | |
| - end | |
| - | |
| - class Number | |
| - | |
| - def initialize(number, radix) | |
| - @number = number | |
| - @radix = radix | |
| - @pad = (radix - 1).to_s(radix) | |
| - end | |
| - | |
| - def rep | |
| - return @number.to_s(@radix) if(@number >= 0) || @radix == 10 | |
| - strlen = (@number.to_s(@radix)).size | |
| - max = (@pad * strlen).to_i(@radix) | |
| - ".." + (max + @number + 1).to_s(@radix) | |
| - end | |
| - | |
| - end | |
| - | |
| - end | |
| -end | |
| diff --git a/kernel/common/string.rb b/kernel/common/string.rb | |
| index cce3a97..37941e5 100644 | |
| --- a/kernel/common/string.rb | |
| +++ b/kernel/common/string.rb | |
| @@ -58,9 +58,9 @@ class String | |
| # "%-5s: %08x" % [ "ID", self.id ] #=> "ID : 200e14d6" | |
| def %(args) | |
| if args.is_a? String # Fixes "%s" % "" | |
| - Rubinius::Sprintf.new(self, args).parse | |
| + ::Rubinius::Sprinter.get(self).call(args) | |
| else | |
| - Rubinius::Sprintf.new(self, *args).parse | |
| + ::Rubinius::Sprinter.get(self).call(*args) | |
| end | |
| end | |
| -- | |
| 1.7.2.3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment