Skip to content

Instantly share code, notes, and snippets.

@samueleresca
Last active August 30, 2020 18:17
Show Gist options
  • Save samueleresca/7a5fe4a609db092b2aee881c7bc0fbd0 to your computer and use it in GitHub Desktop.
Save samueleresca/7a5fe4a609db092b2aee881c7bc0fbd0 to your computer and use it in GitHub Desktop.
using deequ;
using deequ.Checks;
using deequ.Extensions;
using Microsoft.Spark.Sql;
namespace DeequExample
{
class Program
{
static void Main(string[] args)
{
SparkSession spark = SparkSession.Builder().GetOrCreate();
DataFrame data = spark.Read().Json("inventory.json");
data.Show();
VerificationResult verificationResult = new VerificationSuite()
.OnData(data)
.AddCheck(
new Check(CheckLevel.Error, "integrity checks")
.HasSize(value => value == 5)
.IsComplete("id")
.IsUnique("id")
.IsComplete("productName")
.IsContainedIn("priority", new[] { "high", "low" })
.IsNonNegative("numViews")
)
.AddCheck(
new Check(CheckLevel.Warning, "distribution checks")
.ContainsURL("description", value => value >= .5)
)
.Run();
verificationResult.Debug();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment