Skip to content

Instantly share code, notes, and snippets.

@elliottcordo
Created August 4, 2015 11:25
Show Gist options
  • Save elliottcordo/a883894f1defd5ab578c to your computer and use it in GitHub Desktop.
Save elliottcordo/a883894f1defd5ab578c to your computer and use it in GitHub Desktop.
simple non-fuzzy customer matching in postgres
create index ix_customer_first_last on mstr.d_customer(first_name, last_name);
create index ix_email on mstr.d_customer(email);
drop table cust_matches;
--1. first name matches
create temporary table cust_matches as
select c.customer_key as customer_key_1, c.first_name as first_name_1,c.last_name as last_name_1, c.email as email_1,
c1.customer_key as customer_key_2, c1.first_name as first_name_2, c1.last_name as last_name_2, c1.email as email_2,
.5::float as match_score
from mstr.d_customer c
join mstr.d_customer c1 on c.first_name=c1.first_name and c.last_name = c1.last_name
where c.customer_key <> c1.customer_key
and length(c.last_name) > 3 and length(c.first_name) > 3;
--2. now email
insert into cust_matches
select c.customer_key as customer_key_1, c.first_name as first_name_1,c.last_name as last_name_1, c.email as email_1,
c1.customer_key as customer_key_2, c1.first_name as first_name_2, c1.last_name as last_name_2, c1.email as email_2,
.8::float as match_score
from mstr.d_customer c
join mstr.d_customer c1 on c.email = c1.email
where c.customer_key <> c1.customer_key
and c.email like '%@%'
and c.email not in (
select email from mstr.d_customer
group by email, source
having count(1) > 10); --or some reasonable number
--3. now location (with score of .25)
--4. now company (score of .25) - need to figure out how to do this (D&B append?)
--5. once this is all done we will need to dedup this table as you will have matches duplicated from the right and left side
--6. sum resulting scores
--7. recursively "walk" the table to find ultimate parent (selecting lowest number customer_key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment