Skip to content

Instantly share code, notes, and snippets.

@brasic
Last active August 29, 2015 14:14
Show Gist options
  • Save brasic/f8ee5a90661c56a40800 to your computer and use it in GitHub Desktop.
Save brasic/f8ee5a90661c56a40800 to your computer and use it in GitHub Desktop.
anonymize.rb
# anonymize a HTTP path, replacing numbers with :num and hexes with :hex
# for example,
# /shipments/12345/2a95f880b65f245b9158fd4240767d91 => /shipments/:num/:hex
event = {"path" => "/shipments/12345/2a95f880b65f245b9158fd4240767d91" }
expected = "/shipments/:num/:hex"
ONLY_NUM_REGEX = /^\d+$/
NUM = ':num'
HEX = ':hex'
NON_HEX_CHARS = %w[g h i j k l m n o p q r s t u v w x y z _]
PATH = 'path'
SLASH = '/'
TIMES = 10_000
REPLACE_NUM = lambda do |x|
x.sub(ONLY_NUM_REGEX, NUM)
end
REPLACE_HEX = lambda do |x|
if (
!x.empty? &&
# String#chars is faster but in JRuby it returns an enumerator so we can't intersect it
(x.each_char.to_a & NON_HEX_CHARS).empty?
)
HEX
else
x
end
end
parsed = nil
GC.start
GC.disable
time_before = Time.now
obj_before = ObjectSpace.count_objects[:TOTAL]
# hot loop:
TIMES.times do
parsed = event[PATH]
.split(SLASH)
.map(&REPLACE_NUM)
.map(&REPLACE_HEX)
.join(SLASH)
end
diff_obj = ObjectSpace.count_objects[:TOTAL] - obj_before
diff_time = ((Time.now - time_before) / TIMES) * 1_000_000
puts "per loop: #{diff_time} microseconds, objs: #{diff_obj / TIMES}"
raise "parsed wrong: #{parsed} != #{expected}" unless parsed == expected
# $ ruby anonymize.rb
# per loop: 48.993399999999994 microseconds, objs: 141
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment