Skip to content

Instantly share code, notes, and snippets.

@matthewd
Created November 28, 2010 15:17
Show Gist options
  • Select an option

  • Save matthewd/719008 to your computer and use it in GitHub Desktop.

Select an option

Save matthewd/719008 to your computer and use it in GitHub Desktop.
From 87bf16defc57148ba2014c7b523eda90b59affa1 Mon Sep 17 00:00:00 2001
From: Matthew Draper <matthew@trebex.net>
Date: Mon, 29 Nov 2010 01:30:11 +1030
Subject: [PATCH 1/2] Compile sprintf strings to rbx bytecode.
With appropriate caching, this gets our performance into the same
ballpark as MRI. With perfect caching (code choosing to explicitly
pre-compile an expression once, say), this outperforms MRI.
---
benchmark/rubinius/bm_sprintf.rb | 34 ++
kernel/common/capi.rb | 2 +-
kernel/common/io.rb | 2 +-
kernel/common/kernel.rb | 4 +-
kernel/common/load_order.txt | 2 +-
kernel/common/sprinter.rb | 851 ++++++++++++++++++++++++++++++++++++++
kernel/common/sprintf.rb | 372 -----------------
kernel/common/string.rb | 4 +-
8 files changed, 892 insertions(+), 379 deletions(-)
create mode 100644 benchmark/rubinius/bm_sprintf.rb
create mode 100644 kernel/common/sprinter.rb
delete mode 100644 kernel/common/sprintf.rb
diff --git a/benchmark/rubinius/bm_sprintf.rb b/benchmark/rubinius/bm_sprintf.rb
new file mode 100644
index 0000000..698be20
--- /dev/null
+++ b/benchmark/rubinius/bm_sprintf.rb
@@ -0,0 +1,34 @@
+require 'benchmark'
+
+total = (ENV['TOTAL'] || 1000000).to_i
+
+def int_alternative(n)
+ Integer(n).to_s
+end
+
+if (1.0).respond_to? :to_s_formatted
+ def str_alternative(s, n)
+ "Hello #{s}, from #{n.send :to_s_formatted, '%4.2f'}"
+ end
+else
+ def str_alternative(s, n)
+ # This isn't representative, but it's less interesting on MRI
+ "Hello #{s}, from #{'%4.2f' % n}"
+ end
+end
+
+Benchmark.bmbm do |x|
+ x.report("10.to_s") do
+ total.times { int_alternative(10) }
+ end
+ x.report("'%1$d'") do
+ total.times { sprintf('%1$d', 10) }
+ end
+
+ x.report('"Hello #{s}, from #{f}"') do
+ total.times { str_alternative('world', 123.4567) }
+ end
+ x.report("'Hello %s, from %4.2f'") do
+ total.times { sprintf('Hello %s, from %4.2f', 'world', 123.4567) }
+ end
+end
diff --git a/kernel/common/capi.rb b/kernel/common/capi.rb
index 0dd01a3..f396cbf 100644
--- a/kernel/common/capi.rb
+++ b/kernel/common/capi.rb
@@ -25,7 +25,7 @@ module Rubinius
end
def self.sprintf(format, args)
- Rubinius::Sprintf.new(format, *args).parse
+ ::Rubinius::Sprinter.get(format).call(*args)
end
end
end
diff --git a/kernel/common/io.rb b/kernel/common/io.rb
index 69ec554..48f6d92 100644
--- a/kernel/common/io.rb
+++ b/kernel/common/io.rb
@@ -1097,7 +1097,7 @@ class IO
# Formats and writes to ios, converting parameters under
# control of the format string. See Kernel#sprintf for details.
def printf(fmt, *args)
- write Rubinius::Sprintf.new(fmt, *args).parse
+ write ::Rubinius::Sprinter.get(fmt).call(*args)
end
##
diff --git a/kernel/common/kernel.rb b/kernel/common/kernel.rb
index be03630..3d366a1 100644
--- a/kernel/common/kernel.rb
+++ b/kernel/common/kernel.rb
@@ -166,7 +166,7 @@ module Kernel
if target.kind_of? IO
target.printf(*args)
elsif target.kind_of? String
- $stdout << Rubinius::Sprintf.new(target, *args).parse
+ $stdout << Rubinius::Sprinter.get(target).call(*args)
else
raise TypeError, "The first arg to printf should be an IO or a String"
end
@@ -175,7 +175,7 @@ module Kernel
module_function :printf
def sprintf(str, *args)
- Rubinius::Sprintf.new(str, *args).parse
+ ::Rubinius::Sprinter.get(str).call(*args)
end
alias_method :format, :sprintf
module_function :sprintf
diff --git a/kernel/common/load_order.txt b/kernel/common/load_order.txt
index e18a887..b10aaea 100644
--- a/kernel/common/load_order.txt
+++ b/kernel/common/load_order.txt
@@ -67,7 +67,7 @@ process.rbc
random.rbc
regexp.rbc
signal.rbc
-sprintf.rbc
+sprinter.rbc
symbol.rbc
taskprobe.rbc
thread.rbc
diff --git a/kernel/common/sprinter.rb b/kernel/common/sprinter.rb
new file mode 100644
index 0000000..8a135db
--- /dev/null
+++ b/kernel/common/sprinter.rb
@@ -0,0 +1,851 @@
+module Rubinius
+ class Sprinter
+ class << self
+ def get(format)
+ @cache ||= {}
+
+ # An alternative to the below would be to have the Builder
+ # ignore the taint of the format, and instead have sprintf check
+ # the format upon invocation, and taint the result there.
+ if format.tainted?
+ new(format)
+ else
+ @cache[format] ||
+ (@cache[format] = new(format))
+ end
+ end
+ end
+
+ def initialize(format)
+ Builder.new(self, format).build
+ end
+
+ class Builder
+ def initialize(code, format)
+ @code, @format = code, format
+
+ # Change next line to 'true' for debug output. Can't really use
+ # $DEBUG, because of its special meaning to sprintf.
+ if @verbose = false
+ @@seen ||= {}
+ if @@seen[format]
+ @verbose = false
+ else
+ @@seen[format] = true
+ end
+ end
+
+ @g = ::Rubinius::Generator.new
+ @g.name = :call
+ @g.file = :sprintf
+ @g.set_line 1
+ end
+
+ def build
+ self.parse
+
+ @g.required_args = @arg_count
+
+ @g.total_args = @arg_count + 1
+ # We won't use it, but we accept a splat; our semantics require
+ # that we ignore any excess arguments provided.
+ @g.splat_index = @arg_count
+
+ @g.local_count = @arg_count + 1
+ if @index_mode == :absolute
+ @g.local_names = (0...@arg_count).map {|i| :"#{i + 1}$" } + [:splat]
+ else
+ @g.local_names = (0...@arg_count).map {|i| nil } + [:splat]
+ end
+
+ @g.ret
+ @g.close
+
+ @g.encode
+ cm = @g.package ::Rubinius::CompiledMethod
+
+ # Careful with this: CM::Instruction#to_s uses String#%
+ if @verbose
+ puts
+ puts @format.inspect
+ puts cm.decode
+ puts
+ end
+
+ ss = ::Rubinius::StaticScope.new Object
+ ::Rubinius.attach_method @g.name, cm, ss, @code
+ end
+
+ def meta_op_minus
+ @op_minus ||= @g.find_literal(:-)
+ @g.meta_send_op_minus @op_minus
+ end
+
+ def invert
+ @g.meta_push_0
+ @g.swap
+ meta_op_minus
+ end
+
+ def is_negative
+ @g.meta_push_0
+ @g.meta_send_op_lt @g.find_literal(:<)
+ end
+
+ def justify(direction, may_be_negative=true)
+ if may_be_negative && direction != :ljust
+ width_done = @g.new_label
+
+ @g.dup
+ is_negative
+ if_false do
+ @g.send direction, 1
+ @g.goto width_done
+ end
+
+ invert
+ @g.send :ljust, 1
+
+ width_done.set!
+ else
+ @g.send direction, 1
+ end
+ end
+
+ RADIX = { 'd' => 10, 'i' => 10, 'u' => 10, 'x' => 16, 'o' => 8, 'b' => 2 }
+ PREFIX = { 'o' => '0', 'x' => '0x', 'X' => '0X', 'b' => '0b', 'B' => '0B' }
+
+ def next_index(specified=nil)
+ if specified
+ specified = specified.to_i
+ raise ArgumentError, "invalid positional index" if specified == 0
+ raise ArgumentError, "unnumbered mixed with numbered" if @index_mode == :relative
+ @index_mode = :absolute
+ @arg_count = specified if specified > @arg_count
+ specified - 1
+ else
+ raise ArgumentError, "unnumbered mixed with numbered" if @index_mode == :absolute
+ @index_mode = :relative
+ (@arg_count += 1) - 1
+ end
+ end
+
+ RE = /
+ ([^%]+|%(?:[\n\0]|\z)) # 1
+ |
+ %
+ ( # 2
+ ([0# +-]*) # 3
+ (?:([0-9]+)\$)? # 4
+ ([0# +-]*) # 5
+ (?:
+ (\*(?:([0-9]+)\$)?|([1-9][0-9]*))? # 6 7 8
+ (?:\.(\*(?:([0-9]+)\$)?|([1-9][0-9]*))?)? # 9 10 11
+ )
+ (?:([0-9]+)\$)? # 12
+ ([BbcdEefGgiopsuXx]) # 13
+ )
+ |
+ (%)(?:%|[-+0-9# *.$]+\$[0-9.]*\z) # 14
+ |
+ (%) # 15
+ /x
+
+ def append_literal(str)
+ @g.push_unique_literal str
+ append_str false
+ end
+
+ def append_str taint
+ if @has_content
+ if taint
+ @g.swap
+ @g.move_down 2
+ end
+
+ @g.swap
+ @g.string_append
+ else
+ @g.string_dup
+ @has_content = true
+
+ if @pre_tainted
+ @g.send :taint, 0
+ end
+ end
+
+ if taint
+ @g.swap
+ tainting_done = @g.new_label
+ @g.send :tainted?, 0
+ if_true do
+ @g.send :taint, 0
+ end
+ end
+ end
+
+ class Atom
+ def initialize(b, g, format_code, flags)
+ @b, @g = b, g
+ @format_code, @flags = format_code, flags
+
+ @f_alt = flags.index(?#)
+ @f_zero = flags.index(?0)
+ @f_plus = flags.index(?+)
+ @f_ljust = flags.index(?-)
+ @f_space = flags.index(?\ )
+
+ @just_dir = @f_ljust ? :ljust : :rjust
+
+ @prefix = PREFIX[@format_code] if @f_alt
+
+ @full_leader_size = @prefix ? @prefix.size : 0
+ @full_leader_size += 1 if @f_plus || @f_space
+
+ end
+ def prepend_prefix
+ if @prefix
+ @g.push_literal @prefix
+ @g.string_dup
+ @g.string_append
+ end
+ end
+
+ def set_value(ref)
+ @field_index = @b.next_index(ref)
+ end
+ def set_width(full, ref, static)
+ @width_static = static && static.to_i
+ if full && !static
+ @width_index = @b.next_index(ref)
+ end
+
+ @has_width = @width_static || @width_index
+ end
+ def set_precision(full, ref, static)
+ @prec_static = static && static.to_i
+ if full && !static
+ @prec_index = @b.next_index(ref)
+ end
+
+ if @format_code == 'g' && @f_alt && !full
+ @prec_static = 4
+ end
+
+ @has_precision = @prec_static || @prec_index
+ end
+
+ def push_value
+ @g.push_local @field_index
+ end
+
+ def push_width(adjust=true)
+ yield if block_given?
+ if @width_static
+ raise ArgumentError, "width too big" unless @width_static.class == Fixnum
+ if adjust && @full_leader_size > 0
+ @g.push(@width_static - @full_leader_size)
+ else
+ @g.push @width_static
+ end
+
+ if block_given?
+ @g.swap
+ @b.if_true do
+ @g.meta_push_1
+ @b.meta_op_minus
+ end
+ end
+
+ elsif @width_index
+ @g.push_local @width_index
+
+ @b.force_type :Fixnum, :Integer do
+ # If we had to do a conversion, we check we ended up
+ # with a Fixnum
+ @g.dup
+ @b.push_Fixnum
+ @g.swap
+ @g.kind_of
+
+ @b.if_false do
+ @b.raise_ArgumentError "width too big"
+ end
+ end
+
+ n = adjust ? @full_leader_size : 0
+ if block_given?
+ adjusted = @g.new_label
+
+ @g.swap
+ @b.if_true do
+ @g.push n + 1
+ @b.meta_op_minus
+ if n > 0
+ @g.goto adjusted
+ end
+ end
+
+ if n > 0
+ @g.push n
+ @b.meta_op_minus
+ adjusted.set!
+ end
+
+ elsif n > 0
+ @g.push n
+ @b.meta_op_minus
+ end
+
+ else
+ raise "push without a width"
+
+ end
+ end
+
+ def push_precision
+ yield if block_given?
+ if @prec_static
+ raise ArgumentError, "precision too big" unless @prec_static.class == Fixnum
+ @g.push @prec_static
+
+ elsif @prec_index
+ @g.push_local @prec_index
+
+ @b.force_type :Fixnum, :Integer do
+ # If we had to do a conversion, we check we ended up
+ # with a Fixnum
+ @g.dup
+ @b.push_Fixnum
+ @g.swap
+ @g.kind_of
+
+ @b.if_false do
+ @b.raise_ArgumentError "precision too big"
+ end
+ end
+
+ else
+ raise "push without a precision"
+
+ end
+
+ if block_given?
+ @g.swap
+ @b.if_true do
+ @g.meta_push_1
+ @b.meta_op_minus
+ end
+ end
+ end
+
+ def push_format_string
+ float_format_code = @format_code
+ float_format_code = 'f' if @format_code == 'g' && @f_alt
+
+ leader = "%#{@flags}"
+ if !@width_index && !@prec_index
+ leader << @width_static.to_s if @width_static
+ leader << ".#{@prec_static}" if @prec_static
+ @g.push_literal "#{leader}#{float_format_code}"
+ else
+ format_parts = 1
+ if @prec_static
+ @g.push_literal ".#{@prec_static}#{float_format_code}"
+ else
+ @g.push_literal(float_format_code)
+ if @prec_index
+ push_precision
+ @g.send :to_s, 0
+ format_parts += 1
+
+ if @width_index
+ @g.push_literal "."
+ format_parts += 1
+ end
+ end
+ end
+
+ if @width_static
+ leader << @width_static.to_s
+ elsif @width_index
+ push_width
+ @g.send :to_s, 0
+ format_parts += 1
+ end
+ leader << "." if @prec_index && !@width_index
+
+ @g.push_literal leader
+ @g.string_dup
+ format_parts.times do
+ @g.string_append
+ end
+ end
+ end
+
+ def positive_sign
+ if @f_plus
+ '+'
+ elsif @f_space
+ ' '
+ else
+ ''
+ end
+ end
+
+ def justify_width(adjust=true)
+ if @has_width
+ push_width adjust
+ @b.justify @just_dir, @width_static.nil?
+ end
+ end
+
+ def zero_pad?
+ @has_precision || (@has_width && @f_zero)
+ end
+ def zero_pad(pad="0", &readjust)
+ if @has_precision
+ push_precision &readjust
+ @g.push_literal pad
+ @g.send :rjust, 2
+ elsif @has_width && @f_zero
+ push_width true, &readjust
+ @g.push_literal pad
+ @g.send :rjust, 2
+ end
+ end
+
+ def width?
+ @has_width
+ end
+ def precision?
+ @has_precision
+ end
+ end
+
+ def push_Kernel
+ @lit_Kernel ||= @g.add_literal(:Kernel)
+ @slot_Kernel ||= @g.add_literal(nil)
+
+ @g.push_const_fast @lit_Kernel, @slot_Kernel
+ end
+
+ def push_Fixnum
+ @lit_Fixnum ||= @g.add_literal(:Fixnum)
+ @slot_Fixnum ||= @g.add_literal(nil)
+
+ @g.push_const_fast @lit_Fixnum, @slot_Fixnum
+ end
+
+ def raise_ArgumentError(msg)
+ @lit_ArgumentError ||= @g.add_literal(:ArgumentError)
+ @slot_ArgumentError ||= @g.add_literal(nil)
+
+ @lit_new ||= @g.add_literal(:new)
+
+ @g.push_const_fast @lit_ArgumentError, @slot_ArgumentError
+ @g.push_unique_literal msg
+ @g.send_stack @lit_new, 1
+ @g.raise_exc
+ end
+
+ def force_type(klass, method=klass)
+ @g.dup
+ @g.push_const klass
+ @g.swap
+ @g.kind_of
+ if_false do
+ @g.push_self
+ @g.swap
+ @g.send method, 1, true
+
+ yield if block_given?
+ end
+ end
+
+ def if_true
+ l = @g.new_label
+ @g.gif l
+ yield
+ l.set!
+ end
+
+ def if_false
+ l = @g.new_label
+ @g.git l
+ yield
+ l.set!
+ end
+
+ def parse
+ @arg_count = 0
+ @index_mode = nil
+
+ @pre_tainted = @format.tainted?
+
+ bignum_width = bignum_precision = nil
+
+ pos = 0
+ while match = RE.match_start(@format, pos)
+ pos = match.end(0)
+
+ _,
+ plain_string,
+ whole_format,
+ flags_a,
+ field_ref_a,
+ flags_b,
+ width_full, width_ref, width_static,
+ prec_full, prec_ref, prec_static,
+ field_ref_b,
+ format_code,
+ literal_char,
+ invalid_format = *match
+
+ if plain_string
+ append_literal plain_string
+ elsif literal_char
+ append_literal literal_char
+ elsif invalid_format || (field_ref_a && field_ref_b)
+ raise ArgumentError, "malformed format string"
+ else
+ field_ref = field_ref_a || field_ref_b
+ flags = "#{flags_a}#{flags_b}"
+
+ alt = flags.index(?#)
+ zero = flags.index(?0)
+ plus = flags.index(?+)
+ ljust = flags.index(?-)
+ space = flags.index(?\ )
+
+
+ atom = Atom.new(self, @g, format_code, flags)
+ atom.set_width width_full, width_ref, width_static
+ atom.set_precision prec_full, prec_ref, prec_static
+ atom.set_value field_ref
+
+ case format_code
+ when 's', 'p', 'c'
+ atom.push_value
+
+ case format_code
+ when 's'
+ unless @pre_tainted
+ @g.dup
+ end
+
+ force_type :String
+
+ when 'c'
+ unless @pre_tainted
+ @g.dup
+ end
+
+ force_type :Fixnum, :Integer
+
+ chr_range_ok = @g.new_label
+
+ @g.dup
+ @g.push 256
+ @g.meta_send_op_lt @g.find_literal(:<)
+ if_true do
+ @g.dup
+ @g.meta_push_neg_1
+ @g.meta_send_op_gt @g.find_literal(:>)
+ @g.git chr_range_ok
+ end
+
+ @g.push 256
+ @g.send :%, 1
+
+ chr_range_ok.set!
+ @g.send :chr, 0
+ when 'p'
+ @g.send :inspect, 0
+
+ unless @pre_tainted
+ @g.dup
+ end
+ end
+
+ atom.justify_width
+
+ if atom.precision?
+ @g.meta_push_0
+ atom.push_precision
+ @g.send :[], 2
+ end
+
+ append_str !@pre_tainted
+
+ when 'e', 'E', 'f', 'g', 'G'
+
+ atom.push_value
+ force_type :Float
+
+ format_done = @g.new_label
+
+ @g.dup
+ @g.send :finite?, 0
+
+ if_true do
+ atom.push_format_string
+ @g.send :to_s_formatted, 1, true
+
+ @g.goto format_done
+ end
+
+ formatted_non_finite = @g.new_label
+
+ @g.dup
+ @g.send :nan?, 0
+
+ if_false do
+ is_negative
+
+ if_false do
+ @g.push_literal "#{atom.positive_sign}Inf"
+ @g.goto formatted_non_finite
+ end
+ @g.push_literal '-Inf'
+ @g.goto formatted_non_finite
+ end
+
+ @g.pop
+ @g.push_literal 'NaN'
+
+ formatted_non_finite.set!
+ atom.justify_width false
+
+ format_done.set!
+
+ append_str false
+
+ when 'd', 'i', 'u', 'B', 'b', 'o', 'X', 'x'
+ radix = RADIX[format_code.downcase]
+
+ atom.push_value
+
+ # Bignum is obviously also perfectly acceptable. But we
+ # just address the most common case by avoiding the call
+ # if we've been given a Fixnum. The call is enough
+ # overhead to bother, but not something to panic about.
+ force_type :Fixnum, :Integer
+
+ if plus || space || (zero && radix == 10 && format_code != 'u')
+ @g.dup
+
+ # stash away whether it's negative
+ is_negative
+ @g.dup
+ @g.move_down 2
+
+ if_true do
+ # but treat it as positive for now
+ invert
+ end
+
+ if radix == 10
+ @g.send :to_s, 0
+ else
+ @g.push radix
+ @g.send :to_s, 1
+ end
+ elsif radix == 10 && format_code != 'u'
+ @g.send :to_s, 0
+ else
+ have_formatted = @g.new_label
+
+ @g.dup
+ is_negative
+
+ if_false do
+ if radix == 10
+ @g.send :to_s, 0
+ else
+ @g.push radix
+ @g.send :to_s, 1
+ end
+ @g.goto have_formatted
+ end
+
+ if format_code == 'u'
+ # Now we need to find how many bits we need to
+ # represent the number, starting with a native int,
+ # then incrementing by 32 each round.
+
+ more_bits_loop = @g.new_label
+ got_enough_bits = @g.new_label
+
+ # Push a positive version of the number ($N)
+ @g.dup
+ invert
+
+ # Push the baseline ($B), starting from a native int:
+ # 2**32 or 2**64, as appropriate
+ @g.meta_push_1
+ l_native = @g.find_literal(2.size * 8)
+ @g.push_literal_at l_native
+ @g.send :<<, 1
+
+ # Switch to $N
+ @g.swap
+ # For the first time, because it's what we've used
+ # above, we'll shift it by our native int size
+ @g.push_literal_at l_native
+
+ more_bits_loop.set!
+ # Throw out the bits from $N that $B can offset
+ @g.send :>>, 1
+
+ # Check whether $N == 0
+ @g.dup
+ @g.meta_push_0
+ @g.meta_send_op_equal @g.find_literal(:==)
+ @g.git got_enough_bits
+
+ # Switch to $B
+ @g.swap
+ l_32 = @g.find_literal(32)
+ @g.push_literal_at l_32
+ # Add 32 bits
+ @g.send :<<, 1
+ # Switch to $N
+ @g.swap
+ # We'll throw out 32 bits this time
+ @g.push_literal_at l_32
+ @g.goto more_bits_loop
+
+ got_enough_bits.set!
+ # Pop the spare copy of $N, which is 0
+ @g.pop
+
+
+ # Now we're left with $B; we can now use it, by adding
+ # it to the (negative) number still on the stack from
+ # earlier.
+
+ # $B is a Bignum; no point using meta_send_op_plus.
+ @g.send :+, 1
+ @g.send :to_s, 0
+
+ padding = "."
+
+ else
+ # (num + radix ** num.to_s(radix).size).to_s(radix)
+ @g.push radix
+ @g.dup_many 2
+ @g.send :to_s, 1
+ @g.send :size, 0
+ @g.send :**, 1
+ @g.meta_send_op_plus @g.find_literal(:+)
+ @g.push radix
+ @g.send :to_s, 1
+
+ padding = (radix - 1).to_s(radix)
+ end
+
+ if atom.zero_pad?
+ atom.zero_pad padding
+
+ elsif !atom.precision? && !zero
+ @g.push_literal ".."
+ @g.string_dup
+ @g.string_append
+ end
+
+ have_formatted.set!
+ end
+
+ # 'B' also returns an uppercase string, but there, the
+ # only alpha character is in the prefix -- and that's
+ # already uppercase
+ if format_code == 'X'
+ @g.send :upcase, 0
+ end
+
+ if !(plus || space) && (zero && radix == 10 && format_code != 'u')
+ atom.zero_pad do
+ # If it decides to do any padding, zero_pad will yield
+ # before it modifies the stack, and we must ensure the
+ # top of the stack is a boolean indicating whether to
+ # subtract one from the requested width (for a minus
+ # sign to be prepended below), followed by the string-
+ # in-progress.
+
+ @g.swap
+ @g.dup
+ @g.move_down 2
+ end
+ else
+ atom.zero_pad
+ end
+
+ atom.prepend_prefix
+
+ if plus || space
+ append_sign = @g.new_label
+
+ @g.swap
+ if_true do
+ @g.push_literal '-'
+
+ @g.goto append_sign
+ end
+
+ @g.push_literal atom.positive_sign
+
+ append_sign.set!
+ @g.string_dup
+ @g.string_append
+
+ elsif zero && radix == 10 && format_code != 'u'
+
+ @g.swap
+ if_true do
+ @g.push_literal '-'
+
+ @g.string_dup
+ @g.string_append
+ end
+
+ end
+
+
+ if atom.precision? || !zero
+ atom.justify_width false
+ end
+
+ append_str false
+
+ else
+ raise ArgumentError, "bad format character: #{format_code}"
+ end
+ end
+ end
+
+ unless @has_content
+ append_literal ''
+ end
+
+ if @index_mode != :absolute
+ no_exception = @g.new_label
+
+ # If we've used relative arguments, and $DEBUG is true, we
+ # throw an exception if passed more arguments than we need.
+
+ # Check this first; it's much faster, and generally false
+ @g.passed_arg @arg_count
+ @g.gif no_exception
+
+ ::Rubinius::AST::GlobalVariableAccess.new(0, :$DEBUG).bytecode(@g)
+ @g.gif no_exception
+
+ raise_ArgumentError "too many arguments for format string"
+
+ no_exception.set!
+ end
+ end
+ end
+ end
+end
diff --git a/kernel/common/sprintf.rb b/kernel/common/sprintf.rb
deleted file mode 100644
index a76a9a3..0000000
--- a/kernel/common/sprintf.rb
+++ /dev/null
@@ -1,372 +0,0 @@
-module Rubinius
- class Sprintf
-
- attr_accessor :fmt
- attr_accessor :args
- attr_accessor :flags
-
- RADIXES = {"b" => 2, "o" => 8, "d" => 10, "x" => 16}
- ALTERNATIVES = {"o" => "0", "b" => "0b", "B" => "0B", "x" => "0x", "X" => "0X"}
- PrecisionMax = 1048576 # Totally random value
-
- def initialize(fmt, *args)
- @tainted = fmt.tainted?
- @fmt, @args, @arg_position = fmt.to_str, args, 0
- end
-
- def parse
- start = 0
- ret = ""
- width = nil
- precision = nil
- @positional = false
- @relative = false
- @arg_position = 0
-
- while (match = /%/.match_from(fmt, start))
-
- @flags = {:space => nil, :position => nil, :alternative => nil, :plus => nil,
- :minus => nil, :zero => nil, :star => nil}
- @width = @precision = @type = nil
-
- ret << match.pre_match_from(start)
- start = match.begin(0) + 1
-
- # Special case: %% prints out as "%"
- if [?\n, 0].include?(@fmt[start])
- ret << "%" << @fmt[start]
- start += 1
- next
- elsif [?%, nil].include?(@fmt[start])
- ret << "%"
- start += 1
- next
- elsif @fmt[start, 3] =~ /[1-9]\$/ && !@fmt[start + 2]
- ret << "%"
- start = @fmt.size
- break
- end
-
- # FLAG STATE
- while token = /\G( |[1-9]\$|#|\+|\-|0|\*)/.match_from(fmt, start)
- case token[0]
- # Special case: if we get two [1-9]\$, it means that we're outside of flag-land
- when /[1-9]\$/
- raise ArgumentError, "value given twice - #{token[0]}" if flags[:position]
- @flags[:position] = token[0][0].chr.to_i
- start += 1
- when " "
- @flags[:space] = true
- when "#"
- @flags[:alternative] = true
- when "+"
- @flags[:plus] = true
- when "-"
- @flags[:minus] = true
- when "0"
- @flags[:zero] = true
- when "*"
- raise ArgumentError, "width given twice" if flags[:star]
- if width_dollar_match = /\G[1-9]\$/.match_from(fmt, start + 1)
- @width = Slot.new('*' << width_dollar_match[0])
- start += 2
- end
- @flags[:star] = true
- end
- start += 1
- end
-
- # WIDTH STATE
- if !flags[:star] && width_match = /\G([1-9]\$|\*|\d+)/.match_from(fmt, start)
- @width = Slot.new(width_match[0])
- start += width_match[0].size
- end
-
- # PRECISION DETERMINATION STATE
- if /\G\./.match_from(fmt, start)
- start += 1
- # PRECISION STATE
- if /\G\*/.match_from(fmt, start)
- if precision_dollar_match = /\G[1-9]\$/.match_from(fmt, start + 1)
- @precision = Slot.new('*' << precision_dollar_match[0])
- start += 3
- else
- @precision = Slot.new('*')
- start += 1
- end
- elsif precision_match = /\G([1-9]\$|\d+)/.match_from(fmt, start)
- @precision = Slot.new(precision_match[0])
- start += precision_match[0].size
- else
- @precision = Slot.new("0")
- end
-
- # check for positional value again, after the optional '.'
- if positional_match = /\G[1-9]\$/.match_from(fmt, start)
- raise ArgumentError, "value given twice - #{token[0]}" if flags[:position]
- @flags[:position] = positional_match[0][0].chr.to_i
- start += 2
- end
- end
-
- # TYPE STATE
- unless type = /\G[bcdEefGgiopsuXx]/i.match_from(fmt, start)
- raise ArgumentError, "malformed format string - missing field type"
- else
- @type = type[0]
- start += 1
- end
-
- # Next: Use the parsed values to format some stuff :)
- f = format
- ret << f if f
- end
- if $DEBUG == true && !@positional
- raise ArgumentError, "you need to use all the arguments" unless @arg_position == args.size
- end
- ret << @fmt[start..-1] if start < @fmt.size
- ret.taint if @tainted
- ret
- end
-
- def format
- # GET VALUE
- if flags[:position]
- val = Slot.new("#{flags[:position]}$")
- val = get_arg(val)
- end
-
- # GET WIDTH
- @width = Slot.new("*") if flags[:star] && !@width
- width = get_arg(@width)
- width = width.to_int if width.respond_to?(:to_int)
- if width && width < 0
- width = width.abs
- flags[:minus] = true
- end
-
- # GET PRECISION
- precision = get_arg(@precision)
- precision = precision.to_int if precision.respond_to?(:to_int)
-
- unless flags[:position]
- val = Slot.new("*")
- val = get_arg(val)
- end
-
- case @type
- when "e", "E", "f", "g", "G"
- if @type.downcase == "g" && flags[:alternative]
- @old_type = "g"
- @type = "f"
- precision = 4 unless precision
- end
- val = Float(val)
- if val.finite?
- ret = val.send(:to_s_formatted, build_format_string(width, precision))
- ret = plus_char + ret if val >= 0 && @old_type
- else
- ret = (val < 0 ? "-Inf" : "Inf") if val.infinite?
- ret = "NaN" if val.nan?
- ret = plus_char + ret if val > 0
- flags[:zero] = flags[:space] = flags[:plus] = nil
- ret = pad(ret, width, precision)
- end
- when "u"
- val = get_number(val)
- if val < 0
- unless val.kind_of?(Fixnum)
- raise ArgumentError, "invalid type (only Fixnum allowed)"
- end
-
- plus_or_space = flags[:space] || flags[:plus]
- unless plus_or_space
- val = (1 << (2.size * 8)) + val
- end
- unless flags[:zero] or precision or plus_or_space
- ret = "..#{pad(val, width, precision)}"
- else
- ret = pad(val, width, precision)
- end
- else
- ret = pad(val, width, precision)
- end
- when "d", "i"
- val = get_number(val)
- ret = pad(val, width, precision)
- when "c"
- val = val.to_int if val.respond_to?(:to_int)
- raise TypeError, "cannot convert #{val.class} into Integer" unless val.respond_to?(:chr) && val.respond_to?(:%)
- val = (val % 256).chr
- ret = pad(val, width, precision)
- when "s"
- flags[:zero] = flags[:space] = flags[:plus] = nil
- ret = pad(val, width, precision)
- ret.taint if val.tainted?
- when "p"
- flags[:zero] = flags[:space] = flags[:plus] = nil
- ret = pad(val.inspect, width, precision)
- when "o", "x", "X", "b", "B"
- val = get_number(val)
- unless flags[:space] || flags[:plus]
- ret = Number.new(val, RADIXES[@type.downcase]).rep
- chr = val < 0 ? (RADIXES[@type.downcase] - 1).to_s(RADIXES[@type.downcase]) : 0.to_s
- ret = pad(ret, width, precision, chr)
- ret = ALTERNATIVES[@type].to_s + ret if flags[:alternative]
- else
- flags[:plus] = nil if val < 0
- ret = val.to_s(RADIXES[@type.downcase])
- ret.gsub!(/^(\-?)/, "\1#{ALTERNATIVES[@type]}") if flags[:alternative]
- ret = pad(ret, width, precision)
- ret.gsub!(/ \-/, "-")
- end
- ret = ret.downcase if @type == "x"
- ret = ret.upcase if @type == "X"
- end
- ret
- end
-
- def get_number(val)
- unless val.respond_to?(:full_to_i)
- if val.respond_to?(:to_int)
- val = val.to_int
- elsif val.respond_to?(:to_i)
- val = val.to_i
- end
- end
- val = val.full_to_i if val.respond_to?(:full_to_i)
- val = 0 if val.nil?
- val
- end
-
- def build_format_string(width, precision)
- ret = "%#{make_flags}#{width}"
- ret << ".#{precision}" if precision
- ret << @type
- ret
- end
-
- def make_flags
- ret = ""
- ret << " " if flags[:space]
- ret << "#" if flags[:alternative]
- ret << "+" if flags[:plus]
- ret << "-" if flags[:minus]
- ret << "0" if flags[:zero]
- ret
- end
-
- def get_arg(slot)
- return nil unless slot
-
- case
- when slot.position == :next
- raise ArgumentError, "unnumbered mixed with numbered" if @positional
- @relative = true
- raise ArgumentError, "you ran out of arguments" if @arg_position >= args.size
- ret = args[@arg_position]
- @arg_position += 1
- when slot.pos
- raise ArgumentError, "unnumbered mixed with numbered" if @relative
- @positional = true
- ret = args[slot.position - 1]
- when slot.value
- @relative = true
- ret = slot.value
- else
- raise ArgumentError, "argument position does not exist: #{slot.str}"
- end
-
- ret
- end
-
- def pad(val, width, precision, pad_override = nil)
- direction = flags[:minus] ? :ljust : :rjust
- ret = val.to_s
- modded_width = width.to_i + (flags[:plus] ? 1 : 0)
- width = nil if modded_width <= val.to_s.size
- if ret[0] != ?-
- sign = plus_char
- else
- sign = ""
- end
-
- if precision || flags[:zero]
- ret.gsub!("..", "")
- end
- if precision
- if precision > PrecisionMax
- raise ArgumentError, "precision too big"
- end
- ret = sign + ret.send(direction, precision, pad_override || "0")
- flags[:zero] = flags[:plus] = flags[:space] = nil
- end
- if width
- if pad_char != " " && ret[0] == ?-
- ret.slice!(0)
- sign = "-"
- width -= 1
- ret = ret.rjust(width, pad_char)
- else
- ret = ret.send(direction, width, pad_char)
- ret[0] = sign unless sign.empty?
- return ret
- end
- end
- sign + ret
- end
-
- def pad_char
- flags[:zero] ? "0" : " "
- end
-
- def plus_char
- return "+" if flags[:plus]
- return " " if flags[:space]
- ""
- end
-
- class Slot
-
- # pos means it got a N$ position
- attr_reader :pos
- attr_reader :position
- attr_reader :value
- attr_reader :str
-
- def initialize(str)
- @pos = false
- @str = str
- if str.size == 3 && /\*\d\$/.match(str)
- @pos = true
- @position = str[1..1].to_i
- elsif str.size == 2 && str[1] == ?$
- @pos = true
- @position = str[0..0].to_i
- elsif str == "*"
- @position = :next
- else
- @value = str.to_i
- end
- end
- end
-
- class Number
-
- def initialize(number, radix)
- @number = number
- @radix = radix
- @pad = (radix - 1).to_s(radix)
- end
-
- def rep
- return @number.to_s(@radix) if(@number >= 0) || @radix == 10
- strlen = (@number.to_s(@radix)).size
- max = (@pad * strlen).to_i(@radix)
- ".." + (max + @number + 1).to_s(@radix)
- end
-
- end
-
- end
-end
diff --git a/kernel/common/string.rb b/kernel/common/string.rb
index cce3a97..37941e5 100644
--- a/kernel/common/string.rb
+++ b/kernel/common/string.rb
@@ -58,9 +58,9 @@ class String
# "%-5s: %08x" % [ "ID", self.id ] #=> "ID : 200e14d6"
def %(args)
if args.is_a? String # Fixes "%s" % ""
- Rubinius::Sprintf.new(self, args).parse
+ ::Rubinius::Sprinter.get(self).call(args)
else
- Rubinius::Sprintf.new(self, *args).parse
+ ::Rubinius::Sprinter.get(self).call(*args)
end
end
--
1.7.2.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment