Skip to content

Instantly share code, notes, and snippets.

@codatory
Created July 11, 2012 18:32
Show Gist options
  • Save codatory/3092216 to your computer and use it in GitHub Desktop.
Save codatory/3092216 to your computer and use it in GitHub Desktop.
require 'set'
my_data = %w(cat cat dog dog foo bar bin baz baz bin)
my_set = Set.new
my_data.each do |i|
if my_set.add?(i)
puts "#{i} is the first of its kind"
else
puts "#{i} is already in the set"
end
end
require 'set'
class CleanedList
def initialize
@data = SortedSet.new
end
def to_a
@data.to_a
end
def to_set
@data
end
def <<(item)
processed = item.to_s.downcase.strip.gsub(/\s{2,}/, '')
@data << processed unless processed.nil? || processed == ""
end
alias_method :add, :<<
def add?(item)
processed = item.to_s.downcase.strip.gsub(/\s{2,}/, '')
if processed.nil? || processed == ""
return false
else
@data.add? processed
end
end
end
require './0002-cleaned_list.rb'
my_data = %w(Cat cat dog dOg foo BAR bin baz baz bin)
my_set = CleanedList.new
my_data.each do |i|
if my_set.add?(i)
puts "#{i} is the first of its kind"
else
puts "#{i} is already in the set"
end
end
class FuzzyMatch
class Matcher
require 'singleton'
require 'amatch'
include Singleton
def self.get_score(str1,str2)
Amatch::JaroWinkler.new(str1).match(str2)
end
end
class List
require 'set'
def initialize(options={})
@options = {
:filter => //,
:score => 0.8,
:downcase => true,
:ascii_only => true,
:fix_spaces => true
}.merge!(options)
@data = Set.new
end
def to_a
@data.to_a
end
def <<(item)
item.downcase! if @options[:downcase]
item.gsub!(/[^a-z0-9 ]/,'') if @options[:ascii_only]
item.gsub!(/\s{2,}/, '') if @options[:fix_spaces]
item.gsub!(@options[:filter], '')
@data << item
end
alias_method :add, :<<
def add?(item)
item.downcase! if @options[:downcase]
item.gsub!(/[^a-z0-9 ]/,'') if @options[:ascii_only]
item.gsub!(/\s{2,}/, '') if @options[:fix_spaces]
item.gsub!(@options[:filter], '')
@data.add?(item)
end
def search(string)
@data.to_a.select do |candidate|
Matcher.get_score(string,candidate) > @options[:score]
end
end
end
end
require './0004-fuzzy_match_class.rb'
my_data = ["John Doe", "Jon Doe", "Jason Bourne", "Jason Bourn ", " My Face", "My Space"]
my_list = FuzzyMatch::List.new
my_data.each do |i|
if my_list.add?(i)
puts "#{i} is the first of its kind"
my_list.search(i).each do |m|
puts "-> #{m} is also quite similar" unless m == i
end
else
puts "#{i} is already in the set"
end
end
@janxious
Copy link

Seems like processing in 2 should be its own method since you're doing the same thing in two different methods.

@codatory
Copy link
Author

@janxios Yep, it should. Well, actually I probably should've taken the time to subclass Set...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment