Last active
September 20, 2020 07:33
-
-
Save AshHimself/6b4478cc43ca1b13ae7c17fdba53a34d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_ge = ge.dataset.PandasDataset(df) #Load pd dataframe into great_expectations (GE) | |
#Define a rule to define the order and fields that should exist | |
df_ge.expect_table_columns_to_match_ordered_list(['flight_number', | |
'date', | |
'time_utc', | |
'booster_version', | |
'launch_site', | |
'payload', | |
'customer', | |
'outcome']) | |
#flight_number should ALWAYS be unique. We don't want duplicate data in our warehouse | |
df_ge.expect_column_values_to_be_unique('flight_number') | |
#flight_number should never be null | |
df_ge.expect_column_values_to_not_be_null('flight_number') | |
#flight_number should be an integer | |
df_ge.expect_column_values_to_be_of_type('flight_number', 'int64') | |
#Validate our data | |
validation_results = df_ge.validate() | |
#If our data is valid validation_results["success"] should return True | |
if(validation_results["success"]): | |
print ('All assertions have passed! :)') | |
else: | |
print ('Some assertions have failed! :(') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment