Created
July 26, 2010 21:27
-
-
Save jszmajda/491285 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# manifest-entry-field => [ search-method, product-data-source, link-weight ] | |
# search-method: is the name of a function [fuzzy_search | manufacturer_search | exact] | |
# OR a lambda which receives ( the value of manifest-entry-field, the value of product-data-source ) | |
# and returns the matching product or nil | |
LINKING_COLUMNS = { | |
:title => [:fuzzy_search, | |
:title, | |
5], | |
#:description => [:fuzzy_search, :description, 10], | |
:manufacturer => [:manufacturer_search, | |
lambda {|product| product.manufacturer}, | |
8], | |
:model_number => [:exact_search, | |
:model_number, | |
20], | |
:part_number => [:exact_search, | |
:part_number, | |
20], | |
:original_manifest_upc => [lambda {|search, upc, score| upc.value == search ? score : 0 }, | |
lambda{|product| product.upcs}, | |
50], | |
:upc_id => [lambda {|upc_id, upc, score| upc.id == upc_id ? score : 0 }, | |
lambda {|product| product.upcs }, | |
100], | |
# This does an AND | |
[:manufacturer, | |
:part_number ] => [[:manufacturer_search, :exact_search], | |
[lambda{|p| p.manufacturer}, :part_number], | |
1000], | |
# TODO | |
#[:client_sku, | |
# :client ] => [[: | |
} | |
def try_and_match_products | |
# TODO use LINKING_COLUMNS to execute links | |
Product.all.each do |product| | |
LINKING_COLUMNS.each_pair do |field_set, (linking_method_set, linking_datasource_set, score)| | |
links_for_this_fieldset = [] | |
field_set = Array(field_set) | |
linking_method_set = Array(linking_method_set) | |
linking_datasource_set = Array(linking_datasource_set) | |
field_set.each_with_index do |field, i| | |
linking_method = linking_method_set[i] | |
linking_datasource = linking_datasource_set[i] | |
product_dataset = get_product_data(product, linking_datasource) | |
self_data = get_self_data(field) | |
next if product_dataset.nil? or self_data.nil? | |
Array(product_dataset).each do |product_data| | |
next if product_data.nil? | |
if (linked_score = link_score_for(product_data, self_data, linking_method, score)) > 0 | |
#puts "linking from #{linking_method} on #{field}:#{self.send(field)} -> #{product_data}" | |
links_for_this_fieldset << {:product_id => product.id, :link_strength => linked_score} | |
else | |
links_for_this_fieldset << nil | |
end | |
end | |
end | |
unless links_for_this_fieldset.include? nil | |
if links_for_this_fieldset.size == field_set.size | |
self.manifest_entry_product_links.create(links_for_this_fieldset.first) | |
end | |
end | |
end | |
end | |
end | |
def get_self_data(fieldset) | |
self.send(fieldset) | |
end | |
def get_product_data(product, linking_datasource) | |
if linking_datasource.respond_to? :call | |
linking_datasource.call(product) | |
else | |
# is a product field or method | |
product.send(linking_datasource) | |
end | |
end | |
def link_score_for(product_data, manifest_data, linking_method, score) | |
if linking_method.respond_to? :call | |
linking_method.call(manifest_data, product_data, score) | |
else | |
self.send(linking_method, manifest_data, product_data, score) | |
end | |
end | |
def exact_search(manifest_data, product_data, score) | |
manifest_data.downcase.squeeze(" ") == product_data.downcase.squeeze(" ") ? score : 0 | |
end | |
def fuzzy_search(manifest_data, product_data, score) | |
manifest_data.to_s.split(/\W/).inject(0) do |ms, manifest_word| | |
ms + product_data.to_s.split(/\W/).inject(0) do |ps, product_word| | |
ps += score if product_word == manifest_word | |
ps | |
end | |
end | |
end | |
def manufacturer_search(manifest_data, manufacturer, score) | |
manifest_data.downcase.squeeze(" ") == manufacturer.name.downcase.squeeze(" ") ? score : 0 | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment