Skip to content

Instantly share code, notes, and snippets.

@tannermares
Last active November 27, 2024 20:06
Show Gist options
  • Select an option

  • Save tannermares/8e584c44e5cf4fd24f76088ae107b1a9 to your computer and use it in GitHub Desktop.

Select an option

Save tannermares/8e584c44e5cf4fd24f76088ae107b1a9 to your computer and use it in GitHub Desktop.
ruby regex w/lookahead benchmarks
require 'benchmark'
require 'nokogiri'
content = "#{'<div class="test" style="margin:10px;">' * 450}<p>Deep content</p>#{'</div>' * 450}"
DUPLICATE_NESTED_DIV_LOOKAHEAD_REGEX = %r{
(?<outer_div>
<div(?<attrs>[^>]*)>\s* # Match the outermost <div> with its attributes
(?:<div\k<attrs>>\s*)* # Match zero or more nested identical <div>
(?<inner_div><div\k<attrs>>.*?</div>) # Match the innermost <div> and its content
(?:\s*</div>\s*)* # Match all closing </div> tags for the nested structure
)
}xm
DUPLICATE_NESTED_DIV_REGEX = %r{
<div(?<attrs>[^>]*)>\s* # Match an opening <div> and capture its attributes
(?<inner_div><div\k<attrs>>\s*.*?\s*</div>) # Match a nested <div> with the same attributes and its content
\s*</div> # Match the outer closing </div>
}xm
def attributes_match?(node1, node2)
node1.attributes.transform_values(&:value) == node2.attributes.transform_values(&:value)
end
Benchmark.bm(7) do |x|
@looked = x.report('Looked:') do
content.gsub(DUPLICATE_NESTED_DIV_LOOKAHEAD_REGEX) do
duplicate_nested_div_matches = Regexp.last_match
duplicate_nested_div_matches[:inner_div]
end
end
@looped = x.report('Looped:') do
new_content = content.dup
loop do
content_before = new_content.dup
new_content = new_content.gsub(DUPLICATE_NESTED_DIV_REGEX) do
duplicate_nested_div_matches = Regexp.last_match
duplicate_nested_div_matches[:inner_div]
end
break if new_content == content_before
end
end
@nokoed = x.report('Nokoed:') do
doc = Nokogiri::HTML.fragment(content)
loop do
modified = false
doc.css('div').reverse_each do |div|
children = div.children.select { |child| child.element? && child.name == 'div' }
if children.length.positive? && attributes_match?(children.first, div)
div.replace(children.first)
modified = true
end
end
break unless modified
end
end
end
looked_real = @looked.real
looped_real = @looped.real
nokoed_real = @nokoed.real
relative_ll_speed = (looped_real / looked_real)
relative_ln_speed = (nokoed_real / looked_real)
puts "\nComparison:"
puts "Looked is #{relative_ll_speed.round(2)}x faster than Looped"
puts "Looked is #{relative_ln_speed.round(2)}x faster than Nokoed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment