Last active
September 13, 2024 14:09
-
-
Save pontusab/d966d53dfe22822dc78f3637c893c27f to your computer and use it in GitHub Desktop.
Match
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This SQL code defines functions for calculating similarity scores optimized for reconciliation | |
-- between transactions and inbox items (e.g., invoices). It focuses on high precision to avoid incorrect matches. | |
-- Function: calculate_name_similarity_score | |
-- Purpose: Calculates similarity score between transaction and inbox item names | |
-- Input: transaction_name (text), inbox_name (text) | |
-- Output: numeric (0 to 0.6) | |
create or replace function calculate_name_similarity_score( | |
transaction_name text, | |
inbox_name text | |
) returns numeric as $$ | |
declare | |
name_similarity numeric; | |
similarity_score numeric := 0; | |
begin | |
if transaction_name is null or inbox_name is null then | |
return 0; | |
end if; | |
name_similarity := similarity(lower(transaction_name), lower(inbox_name)); | |
similarity_score := 0.4 * name_similarity; -- Base score is 40% of calculated similarity | |
if name_similarity > 0.95 then -- Bonus for very high similarity | |
similarity_score := similarity_score + 0.2; | |
end if; | |
return round(least(similarity_score, 0.6), 2); -- Cap at 0.6 | |
end; | |
$$ language plpgsql; | |
-- Function: calculate_amount_similarity | |
-- Purpose: Calculates similarity score based on transaction and inbox amounts | |
-- Input: transaction_currency (text), inbox_currency (text), transaction_amount (numeric), inbox_amount (numeric) | |
-- Output: numeric (0 to 1) | |
create or replace function calculate_amount_similarity( | |
transaction_currency text, | |
inbox_currency text, | |
transaction_amount numeric, | |
inbox_amount numeric | |
) returns numeric as $$ | |
declare | |
similarity_score numeric := 0; | |
relative_difference numeric; | |
abs_transaction_amount numeric; | |
abs_inbox_amount numeric; | |
begin | |
if transaction_currency = inbox_currency then | |
abs_transaction_amount := abs(transaction_amount); | |
abs_inbox_amount := abs(inbox_amount); | |
relative_difference := abs(abs_transaction_amount - abs_inbox_amount) / greatest(abs_transaction_amount, abs_inbox_amount, 1); | |
if relative_difference < 0.001 then -- Exact match | |
similarity_score := 1; | |
elsif relative_difference < 0.01 then -- Very close match | |
similarity_score := 0.9; | |
else | |
similarity_score := 1 - least(relative_difference, 1); | |
similarity_score := similarity_score * similarity_score * 0.5; -- Quadratic scaling | |
end if; | |
end if; | |
return round(least(similarity_score, 1), 2); | |
end; | |
$$ language plpgsql; | |
-- Function: calculate_date_similarity | |
-- Purpose: Calculates similarity score based on date difference | |
-- Input: transaction_date (date), inbox_date (date) | |
-- Output: numeric (0 to 0.7) | |
create or replace function calculate_date_similarity( | |
transaction_date date, | |
inbox_date date | |
) returns numeric as $$ | |
declare | |
date_difference integer; | |
similarity_score numeric := 0; | |
begin | |
date_difference := abs(transaction_date - inbox_date); | |
case | |
when date_difference = 0 then similarity_score := 0.7 | |
when date_difference <= 1 then similarity_score := 0.5 | |
when date_difference <= 3 then similarity_score := 0.3 | |
when date_difference <= 7 then similarity_score := 0.1 | |
else similarity_score := 0 | |
end case; | |
return similarity_score; | |
end; | |
$$ language plpgsql; | |
-- Function: calculate_overall_similarity | |
-- Purpose: Calculates overall similarity between a transaction and an inbox item | |
-- Input: transaction_record (record), inbox_record (record) | |
-- Output: numeric (0 to 1) | |
create or replace function calculate_overall_similarity(transaction_record record, inbox_record record) | |
returns numeric as $$ | |
declare | |
overall_score numeric := 0; | |
amount_score numeric; | |
date_score numeric; | |
name_score numeric; | |
begin | |
-- Calculate individual scores | |
amount_score := calculate_amount_similarity( | |
transaction_record.currency, | |
inbox_record.currency, | |
transaction_record.amount, | |
inbox_record.amount | |
); | |
date_score := calculate_date_similarity(transaction_record.date, inbox_record.date); | |
name_score := calculate_name_similarity_score(transaction_record.name, inbox_record.display_name); | |
-- Weighted combination of scores (60% amount, 20% date, 20% name) | |
overall_score := (amount_score * 0.6) + (date_score * 0.2) + (name_score * 0.2); | |
-- Bonus for perfect amount match | |
if amount_score = 1 then | |
overall_score := overall_score + 0.1; | |
end if; | |
return least(overall_score, 1); | |
end; | |
$$ language plpgsql; | |
-- Function: find_matching_inbox_item | |
-- Purpose: Finds matching inbox item for a given transaction | |
-- Input: transaction_id (uuid), specific_inbox_id (uuid, optional) | |
-- Output: table (inbox_id, transaction_id, transaction_name, similarity_score, file_name) | |
create or replace function find_matching_inbox_item( | |
transaction_id uuid, | |
specific_inbox_id uuid default null | |
) returns table ( | |
inbox_id uuid, | |
transaction_id uuid, | |
transaction_name text, | |
similarity_score numeric, | |
file_name text | |
) as $$ | |
declare | |
transaction_data record; | |
inbox_data record; | |
calculated_score numeric; | |
similarity_threshold numeric := 0.95; -- High threshold for precision | |
begin | |
-- Fetch transaction data | |
select t.* | |
into transaction_data | |
from transactions t | |
where t.id = transaction_id; | |
if specific_inbox_id is not null then | |
-- Check for a specific inbox item | |
select * | |
into inbox_data | |
from inbox | |
where id = specific_inbox_id | |
and team_id = transaction_data.team_id | |
and status = 'pending'; | |
if inbox_data.id is not null then | |
calculated_score := calculate_overall_similarity(transaction_data, inbox_data); | |
if calculated_score >= similarity_threshold then | |
return query select specific_inbox_id, transaction_id, transaction_data.name, calculated_score, inbox_data.file_name; | |
end if; | |
end if; | |
else | |
-- Find best matching inbox item | |
return query | |
select | |
i.id as inbox_id, | |
transaction_data.id as transaction_id, | |
transaction_data.name as transaction_name, | |
calculate_overall_similarity(transaction_data, i.*) as similarity_score, | |
i.file_name | |
from inbox i | |
where | |
i.team_id = transaction_data.team_id | |
and i.status = 'pending' | |
and calculate_overall_similarity(transaction_data, i.*) >= similarity_threshold | |
order by | |
calculate_overall_similarity(transaction_data, i.*) desc, | |
abs(i.date - transaction_data.date) asc | |
limit 1; -- Return only the best match | |
end if; | |
end; | |
$$ language plpgsql; | |
-- Function: trigger_matching_process | |
-- Purpose: Triggers the matching process for new transactions or inbox items | |
-- Input: None (triggered by INSERT or UPDATE operations) | |
-- Output: trigger | |
create or replace function trigger_matching_process() returns trigger as $$ | |
begin | |
if tg_table_name = 'transactions' then | |
perform find_matching_inbox_item(new.id); | |
elsif tg_table_name = 'inbox' then | |
perform find_matching_inbox_item(null, new.id); | |
end if; | |
return new; | |
end; | |
$$ language plpgsql; | |
-- Trigger: auto_match_new_transaction | |
-- Purpose: Automatically matches new transactions | |
create trigger auto_match_new_transaction | |
after insert on transactions | |
for each row execute function trigger_matching_process(); | |
-- Trigger: auto_match_updated_inbox_item | |
-- Purpose: Automatically matches new or updated inbox items | |
create trigger auto_match_updated_inbox_item | |
after insert or update of amount, date, currency on inbox | |
for each row | |
when (new.amount is not null) | |
execute function trigger_matching_process(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment