Last active
November 27, 2024 20:06
-
-
Save tannermares/8e584c44e5cf4fd24f76088ae107b1a9 to your computer and use it in GitHub Desktop.
ruby regex w/lookahead benchmarks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'benchmark' | |
| require 'nokogiri' | |
| content = "#{'<div class="test" style="margin:10px;">' * 450}<p>Deep content</p>#{'</div>' * 450}" | |
| DUPLICATE_NESTED_DIV_LOOKAHEAD_REGEX = %r{ | |
| (?<outer_div> | |
| <div(?<attrs>[^>]*)>\s* # Match the outermost <div> with its attributes | |
| (?:<div\k<attrs>>\s*)* # Match zero or more nested identical <div> | |
| (?<inner_div><div\k<attrs>>.*?</div>) # Match the innermost <div> and its content | |
| (?:\s*</div>\s*)* # Match all closing </div> tags for the nested structure | |
| ) | |
| }xm | |
| DUPLICATE_NESTED_DIV_REGEX = %r{ | |
| <div(?<attrs>[^>]*)>\s* # Match an opening <div> and capture its attributes | |
| (?<inner_div><div\k<attrs>>\s*.*?\s*</div>) # Match a nested <div> with the same attributes and its content | |
| \s*</div> # Match the outer closing </div> | |
| }xm | |
| def attributes_match?(node1, node2) | |
| node1.attributes.transform_values(&:value) == node2.attributes.transform_values(&:value) | |
| end | |
| Benchmark.bm(7) do |x| | |
| @looked = x.report('Looked:') do | |
| content.gsub(DUPLICATE_NESTED_DIV_LOOKAHEAD_REGEX) do | |
| duplicate_nested_div_matches = Regexp.last_match | |
| duplicate_nested_div_matches[:inner_div] | |
| end | |
| end | |
| @looped = x.report('Looped:') do | |
| new_content = content.dup | |
| loop do | |
| content_before = new_content.dup | |
| new_content = new_content.gsub(DUPLICATE_NESTED_DIV_REGEX) do | |
| duplicate_nested_div_matches = Regexp.last_match | |
| duplicate_nested_div_matches[:inner_div] | |
| end | |
| break if new_content == content_before | |
| end | |
| end | |
| @nokoed = x.report('Nokoed:') do | |
| doc = Nokogiri::HTML.fragment(content) | |
| loop do | |
| modified = false | |
| doc.css('div').reverse_each do |div| | |
| children = div.children.select { |child| child.element? && child.name == 'div' } | |
| if children.length.positive? && attributes_match?(children.first, div) | |
| div.replace(children.first) | |
| modified = true | |
| end | |
| end | |
| break unless modified | |
| end | |
| end | |
| end | |
| looked_real = @looked.real | |
| looped_real = @looped.real | |
| nokoed_real = @nokoed.real | |
| relative_ll_speed = (looped_real / looked_real) | |
| relative_ln_speed = (nokoed_real / looked_real) | |
| puts "\nComparison:" | |
| puts "Looked is #{relative_ll_speed.round(2)}x faster than Looped" | |
| puts "Looked is #{relative_ln_speed.round(2)}x faster than Nokoed" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment