Skip to content

Instantly share code, notes, and snippets.

@samueleresca
Created August 18, 2020 20:52
Show Gist options
  • Save samueleresca/ae0624d30ace040e3cdda41970fa154a to your computer and use it in GitHub Desktop.
Save samueleresca/ae0624d30ace040e3cdda41970fa154a to your computer and use it in GitHub Desktop.
using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Types;
using static Microsoft.Spark.Sql.Functions;
namespace deequ.Analyzers
{
internal sealed class MaxLength : StandardScanShareableAnalyzer<MaxState>, IFilterableAnalyzer
{
public readonly string Column;
public readonly Option<string> Where;
public MaxLength(string column, Option<string> where)
: base("MaxLength", column, Entity.Column)
{
Column = column;
Where = where;
}
public Option<string> FilterCondition() => Where;
public override IEnumerable<Column> AggregationFunctions() => new[]
{
Max(Length(AnalyzersExt.ConditionalSelection(Column, Where))).Cast("double")
};
public override Option<MaxState> FromAggregationResult(Row result, int offset) =>
AnalyzersExt.IfNoNullsIn(result, offset, () => new MaxState(result.GetAs<double>(offset)));
public override IEnumerable<Action<StructType>> AdditionalPreconditions() =>
new[] { AnalyzersExt.HasColumn(Column), AnalyzersExt.IsString(Column) };
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment