Last active
April 6, 2023 14:39
-
-
Save d1manson/8116f031eccecd87b95e87cd06873c15 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{% test enum_distribution(model, column_name, expected, allow_other=False, fudge=0.1) %} | |
SELECT | |
COALESCE(expected.enum_val, actual.enum_val) AS enum_val, | |
expected.fraction AS expected_fraction, | |
actual.fraction AS actual_fraction | |
FROM ( | |
{% for enum_val, expected_fraction in expected.items() -%} | |
SELECT '{{enum_val}}' AS enum_val, {{expected_fraction}} AS fraction | |
{%- if not loop.last %} UNION ALL {% endif %} | |
{% endfor -%} | |
) AS expected | |
FULL OUTER JOIN ( | |
SELECT | |
{{column_name}} AS enum_val, | |
count(*) / (SELECT count(*) AS n FROM {{ model }}) AS fraction | |
FROM {{ model }} | |
GROUP BY {{column_name}} | |
) AS actual USING (enum_val) | |
WHERE (expected.fraction = 0 AND actual.fraction IS NOT NULL) -- expect specific vals to be absent even when allow_other is true | |
OR (expected.fraction > 0 AND COALESCE(actual.fraction, 0) | |
NOT BETWEEN expected.fraction * (1 - {{fudge}}) AND expected.fraction * (1 + {{fudge}})) | |
{%- if not allow_other %} | |
OR (expected.fraction IS NULL AND actual.fraction IS NOT NULL) | |
{% endif -%} | |
{% endtest %} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: 2 | |
models: | |
- name: dwh_something | |
description: imagine this is a table with a few million rows and amongst other things, a 'country' column | |
columns: | |
- name: country | |
tests: | |
- enum_distribution: | |
allow_other: true | |
expected: | |
'usa': 0.3 | |
'united kingdom': 0.1 | |
'rhodezia': 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This test will fail for any of these reasons:
country:usa
or no rows withcountry:united kingdom
country:usa
is not in the range 27%-33%, or number of rows withcountry:united kingdom
is not in the range 9%-11%country:rhodezia
(note therhodezia: 0
overridesallow_other:true
for the specific case of rhodezia)It will not fail for
country:null
or any other country name not mentioned, but such rows do contribute to the total count when calculating percentages.