-
-
Save allolex/4259c9fba53cfec61a43 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TEXT = <<EOF | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
The End. | |
EOF | |
EXPECTED_TEXT = <<EOF | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
See, the interesting thing about this text | |
is that while it seems like the first line defines an indent | |
it's actually the last line which has the smallest indent | |
there are also some blank lines | |
both with and without extra spaces in them | |
and it just goes on and on | |
this text | |
and starts to repeat itself | |
The End. | |
EOF | |
require "minitest/autorun" | |
require "minitest" | |
require "active_support/core_ext/string" | |
require "unindent" | |
class String | |
# Try to optimize by avoiding splitting into an array | |
def unindent_scan | |
indent_str = nil | |
scan(/^[\t ]*(?=\S)/) do |s| | |
indent_str ||= s | |
indent_str = (s.size < indent_str.size) ? s : indent_str | |
end | |
if indent_str | |
gsub(/^#{indent_str}/, "") | |
end | |
end | |
def unindent_scan_regex_optimized | |
indent_str = nil | |
scan(/^[ ]+/) do |s| | |
indent_str ||= s | |
indent_str = (s.size < indent_str.size) ? s : indent_str | |
end | |
if indent_str | |
gsub(/^#{indent_str}/, "") | |
end | |
end | |
# This version tries to avoid a second traversal of the string by | |
# saving a list of offsets. However, it winds up being | |
# slower. Perhaps this is a sign of how well-optimized #gsub is? | |
def unindent_offsets | |
min_indent = nil | |
offsets = [] | |
scan(/^[\t ]*(?=\S)/) do |s| | |
offsets << $~.offset(0).first | |
min_indent ||= s.size | |
min_indent = (s.size < min_indent.size) ? s.size : min_indent | |
end | |
if min_indent | |
result = dup | |
shift = 0 | |
offsets.each do |offset| | |
result[offset - shift, min_indent] = "" | |
shift += min_indent | |
end | |
result | |
else | |
self | |
end | |
end | |
end | |
class TestUnindent < MiniTest::Unit::TestCase | |
def test_unindent_gem | |
assert_equal EXPECTED_TEXT, TEXT.unindent | |
end | |
def test_activesupport | |
assert_equal EXPECTED_TEXT, TEXT.strip_heredoc | |
end | |
def test_scan | |
assert_equal EXPECTED_TEXT, TEXT.unindent_scan | |
end | |
def test_scan_regex_optimized | |
assert_equal EXPECTED_TEXT, TEXT.unindent_scan_regex_optimized | |
end | |
def test_offsets | |
assert_equal EXPECTED_TEXT, TEXT.unindent_offsets | |
end | |
end | |
require "benchmark/ips" | |
Benchmark.ips do |x| | |
x.report("unindent gem") { TEXT.unindent } | |
x.report("activesupport") { TEXT.strip_heredoc } | |
x.report("scan") { TEXT.unindent_scan } | |
x.report("scan regex optimized") { TEXT.unindent_scan_regex_optimized } | |
x.report("offsets") { TEXT.unindent_offsets } | |
end | |
# MiniTest::Unit::TestCase is now Minitest::Test. From unindent.rb:144:in `<main>' | |
# Calculating ------------------------------------- | |
# unindent gem 972 i/100ms | |
# activesupport 1221 i/100ms | |
# scan 1590 i/100ms | |
# scan regex optimized 1647 i/100ms | |
# offsets 1059 i/100ms | |
# ------------------------------------------------- | |
# unindent gem 10637.8 (±9.6%) i/s - 53460 in 5.080267s | |
# activesupport 13888.1 (±10.4%) i/s - 69597 in 5.073358s | |
# scan 17181.8 (±9.9%) i/s - 85860 in 5.053618s | |
# scan regex optimized 17848.0 (±10.1%) i/s - 88938 in 5.042961s | |
# offsets 11629.9 (±10.0%) i/s - 58245 in 5.064342s | |
# Run options: --seed 35169 | |
# | |
# # Running: | |
# | |
# ..... | |
# | |
# Finished in 0.001769s, 2826.4556 runs/s, 2826.4556 assertions/s. | |
# | |
# 5 runs, 5 assertions, 0 failures, 0 errors, 0 skips |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Interestingly, a slight performance increase can be had by just "fixing" the regex.