Created
October 4, 2024 17:51
-
-
Save bukzor/ff7e600caf5aa8aa2074a750884167f7 to your computer and use it in GitHub Desktop.
Unit testing for bigquery SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env py.test | |
from __future__ import annotations | |
import subprocess | |
from pathlib import Path | |
from typing import IO | |
from typing import Iterable | |
import pandas | |
from _pytest.python import Metafunc | |
from pytest import fixture | |
from devinfra_metrics.gcp_clients import bigquery | |
# for cleaner pytest tracebacks | |
subprocess.__tracebackhide__ = ( # pyright:ignore[reportAttributeAccessIssue] | |
True | |
) | |
QUERY_TEMPLATE = """\ | |
WITH | |
# input sql | |
{input_sql} | |
# query under test | |
{query} | |
# prevent enormous results | |
LIMIT 100 | |
""" | |
def all_query_under_test() -> Iterable[Path]: | |
return Path(__file__).parent.glob("*.sql") | |
def all_input_sql_for(query_under_test: Path) -> Iterable[Path]: | |
testdata = query_under_test.parent / (query_under_test.name + ".test") | |
for input_sql in testdata.glob("*.input.sql"): | |
input_sql = ( # prettier and git-diff work better w/o symlinks | |
input_sql.resolve() | |
) | |
yield input_sql | |
def get_testid(path: Path): | |
return ( | |
path.name.removesuffix(".sql") | |
.removesuffix(".bq") | |
.removesuffix(".input") | |
) | |
def pytest_generate_tests(metafunc: Metafunc): | |
if ( | |
"query_under_test" in metafunc.fixturenames | |
and "input_sql" in metafunc.fixturenames | |
): | |
metafunc.parametrize( | |
argnames=["query_under_test", "input_sql"], | |
argvalues=[ | |
(query_under_test, input_sql) | |
for query_under_test in all_query_under_test() | |
for input_sql in all_input_sql_for(query_under_test) | |
], | |
ids=get_testid, | |
) | |
@fixture | |
def test_query(input_sql: Path, query_under_test: Path) -> str: | |
query = query_under_test.read_text() | |
before, sep, after = ("\n" + query).partition("\nWITH\n") | |
if sep: | |
query = "," + after | |
else: | |
query = before | |
test_query = QUERY_TEMPLATE.format( | |
input_sql=input_sql.read_text().replace("\n", "\n "), query=query | |
) | |
print(test_query) # very helpful for debug | |
return test_query | |
@fixture | |
def bigquery_results(test_query: str) -> pandas.DataFrame: | |
query_result = bigquery.query(test_query).result() | |
return query_result.to_dataframe() # type:ignore | |
def sh_run( | |
cmd: tuple[str, ...], | |
input: str | None = None, | |
stdout: IO[str] | None = None, | |
): | |
__tracebackhide__ = True | |
proc = subprocess.run( | |
("sh", "-exc", '"$@"', "-") + cmd, | |
input=input, | |
stdout=stdout, | |
encoding="UTF-8", | |
) | |
if proc.returncode != 0: | |
import shlex | |
raise subprocess.CalledProcessError(proc.returncode, shlex.join(cmd)) | |
@fixture | |
def output_json(input_sql: Path, bigquery_results: pandas.DataFrame) -> Path: | |
result = input_sql.with_name( | |
input_sql.name.removesuffix(".input.sql") + ".output.json" | |
) | |
if not result.exists(): | |
result.touch() | |
if result.read_text() == "": | |
# it's too easy to forget, otherwise | |
sh_run(("git", "add", str(result))) | |
with result.open("w") as f: | |
bigquery_results.to_json( # type:ignore | |
f, orient="table", indent=2 | |
) | |
print(bigquery_results) | |
return result | |
def assert_expected(output_json: Path): | |
__tracebackhide__ = True | |
# workaround for: https://github.com/prettier/prettier/issues/16683 | |
sh_run( | |
("prettier", "--parser=json"), | |
input=output_json.read_text(), | |
stdout=output_json.open("w"), | |
) | |
sh_run(( | |
"git", | |
"diff", | |
"--color=always", | |
"--no-relative", | |
"--exit-code", | |
str(output_json), | |
)) | |
class DescribeGocdBigqueryView: | |
def it_works_for_examples(self, output_json: Path): | |
assert_expected(output_json) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment