allolex · August 29, 2015 14:06 · allolex · Sep 16, 2014
diff --git a/unindent.rb b/unindent.rb
 TEXT = <<EOF
   See, the interesting thing about this text
     is that while it seems like the first line defines an indent
       it's actually the last line which has the smallest indent

    there are also some blank lines

    both with and without extra spaces in them

    and it just goes on and on

       this text

    and starts to repeat itself
   See, the interesting thing about this text
     is that while it seems like the first line defines an indent
       it's actually the last line which has the smallest indent

    there are also some blank lines

    both with and without extra spaces in them

    and it just goes on and on

       this text

    and starts to repeat itself
   See, the interesting thing about this text
     is that while it seems like the first line defines an indent
       it's actually the last line which has the smallest indent

    there are also some blank lines

    both with and without extra spaces in them

    and it just goes on and on

       this text

    and starts to repeat itself

  The End.
 EOF

 EXPECTED_TEXT = <<EOF
 See, the interesting thing about this text
   is that while it seems like the first line defines an indent
     it's actually the last line which has the smallest indent

  there are also some blank lines

  both with and without extra spaces in them

  and it just goes on and on

     this text

  and starts to repeat itself
 See, the interesting thing about this text
   is that while it seems like the first line defines an indent
     it's actually the last line which has the smallest indent

  there are also some blank lines

  both with and without extra spaces in them

  and it just goes on and on

     this text

  and starts to repeat itself
 See, the interesting thing about this text
   is that while it seems like the first line defines an indent
     it's actually the last line which has the smallest indent

  there are also some blank lines

  both with and without extra spaces in them

  and it just goes on and on

     this text

  and starts to repeat itself

 The End.
 EOF

 require "minitest/autorun"
 require "minitest"
 require "active_support/core_ext/string"
 require "unindent"

 class String

  # Try to optimize by avoiding splitting into an array
  def unindent_scan
    indent_str = nil
    scan(/^[\t ]*(?=\S)/) do |s|
      indent_str ||= s
      indent_str = (s.size < indent_str.size) ? s : indent_str
    end
    if indent_str
      gsub(/^#{indent_str}/, "")
    end
  end

  def unindent_scan_regex_optimized
    indent_str = nil
    scan(/^[ ]+/) do |s|
      indent_str ||= s
      indent_str = (s.size < indent_str.size) ? s : indent_str
    end
    if indent_str
      gsub(/^#{indent_str}/, "")
    end
  end

  # This version tries to avoid a second traversal of the string by
  # saving a list of offsets. However, it winds up being
  # slower. Perhaps this is a sign of how well-optimized #gsub is?
  def unindent_offsets
    min_indent = nil
    offsets    = []
    scan(/^[\t ]*(?=\S)/) do |s|
      offsets << $~.offset(0).first
      min_indent ||= s.size
      min_indent = (s.size < min_indent.size) ? s.size : min_indent
    end
    if min_indent
      result = dup
      shift  = 0
      offsets.each do |offset|
        result[offset - shift, min_indent] = ""
        shift += min_indent
      end
      result
    else
      self
    end
  end
 end

 class TestUnindent < MiniTest::Unit::TestCase
  def test_unindent_gem
    assert_equal EXPECTED_TEXT, TEXT.unindent
  end

  def test_activesupport
    assert_equal EXPECTED_TEXT, TEXT.strip_heredoc
  end

  def test_scan
    assert_equal EXPECTED_TEXT, TEXT.unindent_scan
  end

  def test_scan_regex_optimized
    assert_equal EXPECTED_TEXT, TEXT.unindent_scan_regex_optimized
  end

  def test_offsets
    assert_equal EXPECTED_TEXT, TEXT.unindent_offsets
  end
 end

 require "benchmark/ips"

 Benchmark.ips do |x|
  x.report("unindent gem")         { TEXT.unindent                      }
  x.report("activesupport")        { TEXT.strip_heredoc                 }
  x.report("scan")                 { TEXT.unindent_scan                 }
  x.report("scan regex optimized") { TEXT.unindent_scan_regex_optimized }
  x.report("offsets")              { TEXT.unindent_offsets              }
 end

 # MiniTest::Unit::TestCase is now Minitest::Test. From unindent.rb:144:in `<main>'
 # Calculating -------------------------------------
 #         unindent gem       972 i/100ms
 #        activesupport      1221 i/100ms
 #                 scan      1590 i/100ms
 # scan regex optimized      1647 i/100ms
 #              offsets      1059 i/100ms
 # -------------------------------------------------
 #         unindent gem    10637.8 (¬±9.6%) i/s -      53460 in   5.080267s
 #        activesupport    13888.1 (¬±10.4%) i/s -      69597 in   5.073358s
 #                 scan    17181.8 (¬±9.9%) i/s -      85860 in   5.053618s
 # scan regex optimized    17848.0 (¬±10.1%) i/s -      88938 in   5.042961s
 #              offsets    11629.9 (¬±10.0%) i/s -      58245 in   5.064342s
 # Run options: --seed 35169
 #
 # # Running:
 #
 # .....
 #
 # Finished in 0.001769s, 2826.4556 runs/s, 2826.4556 assertions/s.
 #
 # 5 runs, 5 assertions, 0 failures, 0 errors, 0 skips
	TEXT = <<EOF
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself

	The End.
	EOF

	EXPECTED_TEXT = <<EOF
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself

	The End.
	EOF

	require "minitest/autorun"
	require "minitest"
	require "active_support/core_ext/string"
	require "unindent"

	class String

	# Try to optimize by avoiding splitting into an array
	def unindent_scan
	indent_str = nil
	scan(/^[\t ]*(?=\S)/) do \|s\|
	indent_str \|\|= s
	indent_str = (s.size < indent_str.size) ? s : indent_str
	end
	if indent_str
	gsub(/^#{indent_str}/, "")
	end
	end

	def unindent_scan_regex_optimized
	indent_str = nil
	scan(/^[ ]+/) do \|s\|
	indent_str \|\|= s
	indent_str = (s.size < indent_str.size) ? s : indent_str
	end
	if indent_str
	gsub(/^#{indent_str}/, "")
	end
	end

	# This version tries to avoid a second traversal of the string by
	# saving a list of offsets. However, it winds up being
	# slower. Perhaps this is a sign of how well-optimized #gsub is?
	def unindent_offsets
	min_indent = nil
	offsets = []
	scan(/^[\t ]*(?=\S)/) do \|s\|
	offsets << $~.offset(0).first
	min_indent \|\|= s.size
	min_indent = (s.size < min_indent.size) ? s.size : min_indent
	end
	if min_indent
	result = dup
	shift = 0
	offsets.each do \|offset\|
	result[offset - shift, min_indent] = ""
	shift += min_indent
	end
	result
	else
	self
	end
	end
	end

	class TestUnindent < MiniTest::Unit::TestCase
	def test_unindent_gem
	assert_equal EXPECTED_TEXT, TEXT.unindent
	end

	def test_activesupport
	assert_equal EXPECTED_TEXT, TEXT.strip_heredoc
	end

	def test_scan
	assert_equal EXPECTED_TEXT, TEXT.unindent_scan
	end

	def test_scan_regex_optimized
	assert_equal EXPECTED_TEXT, TEXT.unindent_scan_regex_optimized
	end

	def test_offsets
	assert_equal EXPECTED_TEXT, TEXT.unindent_offsets
	end
	end

	require "benchmark/ips"

	Benchmark.ips do \|x\|
	x.report("unindent gem") { TEXT.unindent }
	x.report("activesupport") { TEXT.strip_heredoc }
	x.report("scan") { TEXT.unindent_scan }
	x.report("scan regex optimized") { TEXT.unindent_scan_regex_optimized }
	x.report("offsets") { TEXT.unindent_offsets }
	end

	# MiniTest::Unit::TestCase is now Minitest::Test. From unindent.rb:144:in `<main>'
	# Calculating -------------------------------------
	# unindent gem 972 i/100ms
	# activesupport 1221 i/100ms
	# scan 1590 i/100ms
	# scan regex optimized 1647 i/100ms
	# offsets 1059 i/100ms
	# -------------------------------------------------
	# unindent gem 10637.8 (¬±9.6%) i/s - 53460 in 5.080267s
	# activesupport 13888.1 (¬±10.4%) i/s - 69597 in 5.073358s
	# scan 17181.8 (¬±9.9%) i/s - 85860 in 5.053618s
	# scan regex optimized 17848.0 (¬±10.1%) i/s - 88938 in 5.042961s
	# offsets 11629.9 (¬±10.0%) i/s - 58245 in 5.064342s
	# Run options: --seed 35169
	#
	# # Running:
	#
	# .....
	#
	# Finished in 0.001769s, 2826.4556 runs/s, 2826.4556 assertions/s.
	#
	# 5 runs, 5 assertions, 0 failures, 0 errors, 0 skips