Skip to content

Instantly share code, notes, and snippets.

@JeremyMcCormick
Created July 25, 2024 18:35
Show Gist options
  • Save JeremyMcCormick/ba779527c1e8d239e283699c894bdc17 to your computer and use it in GitHub Desktop.
Save JeremyMcCormick/ba779527c1e8d239e283699c894bdc17 to your computer and use it in GitHub Desktop.
"""Example of using Great Expectations to validate ci_imsim test
data.
"""
import lsst.daf.butler as dafButler
import great_expectations as gx
from great_expectations.data_context import DataContext
from great_expectations.core.batch import BatchRequest
# Get the test data frame from the Butler
butler = dafButler.Butler("./DATA", collections=["LSSTCam-imSim/runs/ci_imsim"])
skymap = list(butler.registry.queryDatasets(datasetType="objectTable_tract"))[0].dataId["skymap"]
dataId = {"instrument": "LSSTCam-imSim", "tract": 0, "skymap": skymap}
table_name = "Object" # Not used for data retrieval but useful for finding tables in Felis schema
dataset = "objectTable_tract"
df = butler.get(dataset, dataId, storageClass="DataFrame")
# Setup and run great expectations
context = gx.get_context()
validator = context.sources.pandas_default.read_dataframe(df)
validator.expect_column_values_to_not_be_null("xErr")
validator.save_expectation_suite(discard_failed_expectations=False)
checkpoint = context.add_or_update_checkpoint(name="my_quickstart_checkpoint", validator=validator)
checkpoint_result = checkpoint.run()
# View results in browser (won't work on SSH connection without X-forwarding)
context.view_validation_result(checkpoint_result)
# CHANGEME: Output directory for Great Expectations validation report
base_directory = "/sdf/group/rubin/user/jeremym/git/ci_imsim/gx_output"
# Save validation results to an HTML file
context.config.data_docs_sites["local_site"] = {
"class_name": "SiteBuilder",
"store_backend": {
"class_name": "TupleFilesystemStoreBackend",
"base_directory": base_directory
},
"site_index_builder": {
"class_name": "DefaultSiteIndexBuilder"
}
}
context.build_data_docs()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment