-
-
Save wconrad/e64017b55f0474ae5e31 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TEXT = <<EOF | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
The End. | |
EOF | |
EXPECTED_TEXT = <<EOF | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
The End. | |
EOF | |
require "minitest/autorun" | |
require "minitest" | |
require "active_support/core_ext/string" | |
require "unindent" | |
class String | |
# Try to optimize by avoiding splitting into an array | |
def unindent_scan | |
indent_str = nil | |
scan(/^[\t ]*(?=\S)/) do |s| | |
indent_str ||= s | |
indent_str = (s.size < indent_str.size) ? s : indent_str | |
end | |
if indent_str | |
gsub(/^#{indent_str}/, "") | |
end | |
end | |
# This version tries to avoid a second traversal of the string by | |
# saving a list of offsets. However, it winds up being | |
# slower. Perhaps this is a sign of how well-optimized #gsub is? | |
def unindent_offsets | |
min_indent = nil | |
offsets = [] | |
scan(/^[\t ]*(?=\S)/) do |s| | |
offsets << $~.offset(0).first | |
min_indent ||= s.size | |
min_indent = (s.size < min_indent.size) ? s.size : min_indent | |
end | |
if min_indent | |
result = dup | |
shift = 0 | |
offsets.each do |offset| | |
result[offset - shift, min_indent] = "" | |
shift += min_indent | |
end | |
result | |
else | |
self | |
end | |
end | |
end | |
class TestUnindent < MiniTest::Unit::TestCase | |
def test_unindent_gem | |
assert_equal EXPECTED_TEXT, TEXT.unindent | |
end | |
def test_activesupport | |
assert_equal EXPECTED_TEXT, TEXT.strip_heredoc | |
end | |
def test_scan | |
assert_equal EXPECTED_TEXT, TEXT.unindent_scan | |
end | |
def test_offsets | |
assert_equal EXPECTED_TEXT, TEXT.unindent_offsets | |
end | |
end | |
require "benchmark" | |
n = 10_000 | |
Benchmark.bmbm(15) do |x| | |
x.report("unindent gem") { n.times { result = TEXT.unindent } } | |
x.report("activesupport") { n.times { result = TEXT.strip_heredoc } } | |
x.report("scan") { n.times { result = TEXT.unindent_scan } } | |
x.report("offsets") { n.times { result = TEXT.unindent_offsets } } | |
end | |
# ~> MiniTest::Unit::TestCase is now Minitest::Test. From -:130:in `<main>' | |
# >> Rehearsal --------------------------------------------------- | |
# >> unindent gem 0.850000 0.000000 0.850000 ( 0.859678) | |
# >> activesupport 0.630000 0.000000 0.630000 ( 0.638381) | |
# >> scan 0.510000 0.000000 0.510000 ( 0.502865) | |
# >> offsets 0.710000 0.010000 0.720000 ( 0.721911) | |
# >> ------------------------------------------ total: 2.710000sec | |
# >> | |
# >> user system total real | |
# >> unindent gem 0.770000 0.000000 0.770000 ( 0.773135) | |
# >> activesupport 0.620000 0.000000 0.620000 ( 0.627442) | |
# >> scan 0.490000 0.000000 0.490000 ( 0.497729) | |
# >> offsets 0.700000 0.000000 0.700000 ( 0.704367) | |
# >> Run options: --seed 50148 | |
# >> | |
# >> # Running: | |
# >> | |
# >> .... | |
# >> | |
# >> Finished in 0.001476s, 2710.3833 runs/s, 2710.3833 assertions/s. | |
# >> | |
# >> 4 runs, 4 assertions, 0 failures, 0 errors, 0 skips |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment