Created
September 23, 2021 00:41
-
-
Save andrewdoss-bit/45c20cea84dd462bfe532c2a13df82d6 to your computer and use it in GitHub Desktop.
Data tests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cases_vs_deaths(df): | |
"""Checks that death count is no more than case count.""" | |
return (df['deaths'] <= df['cases']).all() | |
def unique_records(df): | |
"""Checks that each date and FIPs combination is unique.""" | |
return df[['date', 'fips']].drop_duplicates().shape[0] == df.shape[0] | |
def no_nulls_test(df): | |
"""Checks that all elements are not null""" | |
return df.isnull().values.sum() == 0 | |
def range_test(series, min, max): | |
"""Checks that all values in a series are within a range, inclusive""" | |
return (series >= min).all() and (series <= max).all() | |
def cases_range_test(df): | |
"""Checks that all cases are non-negative and <= 10M""" | |
return range_test(df['cases'], 0, 10e6) | |
def deaths_range_test(df): | |
"""Checks that all deaths are non-negative and <= 100K""" | |
return range_test(df['deaths'], 0, 1e5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment