Created
May 15, 2014 16:56
-
-
Save tonyta/78560e024b391af15693 to your computer and use it in GitHub Desktop.
Anagrams with Anne
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'benchmark/ips' | |
def anagram_tony?(str1, str2) | |
str1.chars.sort! == str2.chars.sort! | |
end | |
def anagram_enum?(str1, str2) | |
count = Hash.new {0} | |
str1.each_char {|char| count[char] += 1} | |
str2.each_char {|char| count[char] -= 1} | |
count.values.all?(&:zero?) | |
end | |
def anagram_anne?(str1, str2) | |
letter_count = Hash.new(0) | |
str1.split('').each do |letter| | |
letter_count[letter] += 1 | |
end | |
str2.split('').each do |letter| | |
letter_count[letter] -= 1 | |
end | |
letter_count.values.all? {|v| v == 0} | |
end | |
######### | |
# SETUP # | |
######### | |
POSS_CHARS = ('A'..'Z').to_a + ('a'..'z').to_a | |
RAND_CHARS = Array.new(10_000) {POSS_CHARS.sample} | |
BIG_STRING = RAND_CHARS.join | |
BIG_ANAGRAM = RAND_CHARS.shuffle.join | |
BIG_NOT_ANA = RAND_CHARS[0..-2].shuffle.join | |
LIL_STRING = RAND_CHARS[0...20].join | |
LIL_ANAGRAM = RAND_CHARS[0...20].shuffle.join | |
LIL_NOT_ANA = RAND_CHARS[0...19].shuffle.join | |
######### | |
# TESTS # | |
######### | |
p anagram_anne?(BIG_STRING, BIG_ANAGRAM) == true | |
p anagram_tony?(BIG_STRING, BIG_ANAGRAM) == true | |
p anagram_enum?(BIG_STRING, BIG_ANAGRAM) == true | |
p anagram_anne?(BIG_STRING, BIG_NOT_ANA) == false | |
p anagram_tony?(BIG_STRING, BIG_NOT_ANA) == false | |
p anagram_enum?(BIG_STRING, BIG_NOT_ANA) == false | |
############# | |
# BENCHMARK # | |
############# | |
puts "\nBenchmarking anagrams \#=> true" | |
Benchmark.ips do |x| | |
x.report('LIL anagram_tony?') { anagram_tony?(LIL_STRING, LIL_ANAGRAM) } | |
x.report('LIL anagram_enum?') { anagram_enum?(LIL_STRING, LIL_ANAGRAM) } | |
x.report('LIL anagram_anne?') { anagram_anne?(LIL_STRING, LIL_ANAGRAM) } | |
x.report('BIG anagram_tony?') { anagram_tony?(BIG_STRING, BIG_ANAGRAM) } | |
x.report('BIG anagram_enum?') { anagram_enum?(BIG_STRING, BIG_ANAGRAM) } | |
x.report('BIG anagram_anne?') { anagram_anne?(BIG_STRING, BIG_ANAGRAM) } | |
end | |
puts "\nBenchmarking anagrams \#=> false" | |
Benchmark.ips do |x| | |
x.report('LIL anagram_tony?') { anagram_tony?(LIL_STRING, LIL_NOT_ANA) } | |
x.report('LIL anagram_enum?') { anagram_enum?(LIL_STRING, LIL_NOT_ANA) } | |
x.report('LIL anagram_anne?') { anagram_anne?(LIL_STRING, LIL_NOT_ANA) } | |
x.report('BIG anagram_tony?') { anagram_tony?(BIG_STRING, BIG_NOT_ANA) } | |
x.report('BIG anagram_enum?') { anagram_enum?(BIG_STRING, BIG_NOT_ANA) } | |
x.report('BIG anagram_anne?') { anagram_anne?(BIG_STRING, BIG_NOT_ANA) } | |
end | |
########### | |
# RESULTS # | |
########### | |
# Benchmarking anagrams #=> true | |
# Calculating ------------------------------------- | |
# LIL anagram_tony? 8793 i/100ms | |
# LIL anagram_enum? 3205 i/100ms | |
# LIL anagram_anne? 2083 i/100ms | |
# BIG anagram_tony? 14 i/100ms | |
# BIG anagram_enum? 12 i/100ms | |
# BIG anagram_anne? 6 i/100ms | |
# ------------------------------------------------- | |
# LIL anagram_tony? 90081.5 (±13.0%) i/s - 448443 in 5.071273s | |
# LIL anagram_enum? 34511.7 (±9.9%) i/s - 173070 in 5.068379s | |
# LIL anagram_anne? 20038.4 (±13.6%) i/s - 99984 in 5.096383s | |
# BIG anagram_tony? 153.1 (±13.1%) i/s - 756 in 5.042288s | |
# BIG anagram_enum? 128.0 (±10.9%) i/s - 636 in 5.040368s | |
# BIG anagram_anne? 63.0 (±12.7%) i/s - 312 in 5.080153s | |
# Benchmarking anagrams #=> false | |
# Calculating ------------------------------------- | |
# LIL anagram_tony? 9998 i/100ms | |
# LIL anagram_enum? 2885 i/100ms | |
# LIL anagram_anne? 2025 i/100ms | |
# BIG anagram_tony? 17 i/100ms | |
# BIG anagram_enum? 12 i/100ms | |
# BIG anagram_anne? 6 i/100ms | |
# ------------------------------------------------- | |
# LIL anagram_tony? 114035.4 (±14.7%) i/s - 559888 in 5.049876s | |
# LIL anagram_enum? 36144.3 (±10.0%) i/s - 178870 in 5.004557s | |
# LIL anagram_anne? 20824.8 (±12.9%) i/s - 103275 in 5.049939s | |
# BIG anagram_tony? 174.6 (±15.5%) i/s - 850 in 5.016642s | |
# BIG anagram_enum? 114.9 (±14.8%) i/s - 576 in 5.155270s | |
# BIG anagram_anne? 63.5 (±11.0%) i/s - 318 in 5.076423s | |
# It looks like #anagram_tony? has a performance boost when comparing | |
# non-anagrams. I assume that when comparing the two sorted char-arrays, | |
# #== will enumerate through both arrays and will return `false` as soon | |
# as it encounters a difference, while the other two methods will continue | |
# enumerating through the entire array. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment