Skip to content

Instantly share code, notes, and snippets.

@jszmajda
Created July 26, 2010 21:27
Show Gist options
  • Save jszmajda/491285 to your computer and use it in GitHub Desktop.
Save jszmajda/491285 to your computer and use it in GitHub Desktop.
# manifest-entry-field => [ search-method, product-data-source, link-weight ]
# search-method: is the name of a function [fuzzy_search | manufacturer_search | exact]
# OR a lambda which receives ( the value of manifest-entry-field, the value of product-data-source )
# and returns the matching product or nil
LINKING_COLUMNS = {
:title => [:fuzzy_search,
:title,
5],
#:description => [:fuzzy_search, :description, 10],
:manufacturer => [:manufacturer_search,
lambda {|product| product.manufacturer},
8],
:model_number => [:exact_search,
:model_number,
20],
:part_number => [:exact_search,
:part_number,
20],
:original_manifest_upc => [lambda {|search, upc, score| upc.value == search ? score : 0 },
lambda{|product| product.upcs},
50],
:upc_id => [lambda {|upc_id, upc, score| upc.id == upc_id ? score : 0 },
lambda {|product| product.upcs },
100],
# This does an AND
[:manufacturer,
:part_number ] => [[:manufacturer_search, :exact_search],
[lambda{|p| p.manufacturer}, :part_number],
1000],
# TODO
#[:client_sku,
# :client ] => [[:
}
def try_and_match_products
# TODO use LINKING_COLUMNS to execute links
Product.all.each do |product|
LINKING_COLUMNS.each_pair do |field_set, (linking_method_set, linking_datasource_set, score)|
links_for_this_fieldset = []
field_set = Array(field_set)
linking_method_set = Array(linking_method_set)
linking_datasource_set = Array(linking_datasource_set)
field_set.each_with_index do |field, i|
linking_method = linking_method_set[i]
linking_datasource = linking_datasource_set[i]
product_dataset = get_product_data(product, linking_datasource)
self_data = get_self_data(field)
next if product_dataset.nil? or self_data.nil?
Array(product_dataset).each do |product_data|
next if product_data.nil?
if (linked_score = link_score_for(product_data, self_data, linking_method, score)) > 0
#puts "linking from #{linking_method} on #{field}:#{self.send(field)} -> #{product_data}"
links_for_this_fieldset << {:product_id => product.id, :link_strength => linked_score}
else
links_for_this_fieldset << nil
end
end
end
unless links_for_this_fieldset.include? nil
if links_for_this_fieldset.size == field_set.size
self.manifest_entry_product_links.create(links_for_this_fieldset.first)
end
end
end
end
end
def get_self_data(fieldset)
self.send(fieldset)
end
def get_product_data(product, linking_datasource)
if linking_datasource.respond_to? :call
linking_datasource.call(product)
else
# is a product field or method
product.send(linking_datasource)
end
end
def link_score_for(product_data, manifest_data, linking_method, score)
if linking_method.respond_to? :call
linking_method.call(manifest_data, product_data, score)
else
self.send(linking_method, manifest_data, product_data, score)
end
end
def exact_search(manifest_data, product_data, score)
manifest_data.downcase.squeeze(" ") == product_data.downcase.squeeze(" ") ? score : 0
end
def fuzzy_search(manifest_data, product_data, score)
manifest_data.to_s.split(/\W/).inject(0) do |ms, manifest_word|
ms + product_data.to_s.split(/\W/).inject(0) do |ps, product_word|
ps += score if product_word == manifest_word
ps
end
end
end
def manufacturer_search(manifest_data, manufacturer, score)
manifest_data.downcase.squeeze(" ") == manufacturer.name.downcase.squeeze(" ") ? score : 0
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment