Skip to content

Instantly share code, notes, and snippets.

@philerooski
Last active October 4, 2024 18:52
Show Gist options
  • Save philerooski/39ff929b2027a7a57a617bdb6cede6c4 to your computer and use it in GitHub Desktop.
Save philerooski/39ff929b2027a7a57a617bdb6cede6c4 to your computer and use it in GitHub Desktop.
### Does not work
expectation_suite_name = "my_expectation_suite"
checkpoint_name = "my_checkpoint"
context = gx.get_context()
# # Initialize expectation suite
def init_expectation_suite():
expectation_suite = context.add_expectation_suite(
expectation_suite_name=expectation_suite_name
)
# Expect column 'name' to exist
expect_column_name = ExpectationConfiguration(
expectation_type="expect_column_to_exist", kwargs={"column": "name"}
)
# Expect column 'age' to have values between 18 and 65
expect_column_age = ExpectationConfiguration(
expectation_type="expect_column_values_to_be_between",
kwargs={"column": "age", "min_value": 18, "max_value": 65},
)
# Expect column 'salary' to have a mean between 45000 and 80000
expect_column_salary = ExpectationConfiguration(
expectation_type="expect_column_mean_to_be_between",
kwargs={"column": "salary", "min_value": 45000, "max_value": 80000},
)
expectation_suite.add_expectation_configurations(
expectation_configurations=[
expect_column_name,
expect_column_age,
expect_column_salary,
]
)
return expectation_suite
expectation_suite = init_expectation_suite()
# Initialize data
def init_batch_request():
data = pd.DataFrame(
{
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35],
"salary": [50000, 60000, 70000],
}
)
this_data_source = context.sources.add_pandas(name="my_pandas_datasource")
this_data_asset = this_data_source.add_dataframe_asset(name="my_dataframe_asset")
batch_request = this_data_asset.build_batch_request(dataframe=data)
return batch_request
batch_request = init_batch_request()
checkpoint = context.add_or_update_checkpoint(
name=checkpoint_name,
expectation_suite_name=expectation_suite_name,
batch_request=batch_request,
)
checkpoint_result = checkpoint.run(run_name=f"run_{datetime.now()}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment