Skip to content

Instantly share code, notes, and snippets.

@senaev
Last active August 13, 2022 09:46
Show Gist options
  • Select an option

  • Save senaev/c9e1c833bc68d3aafbadc9832f02c995 to your computer and use it in GitHub Desktop.

Select an option

Save senaev/c9e1c833bc68d3aafbadc9832f02c995 to your computer and use it in GitHub Desktop.
DECLARE $date AS String;
DECLARE $cluster AS String;
DECLARE $yt_pool AS String;
PRAGMA yt.Pool=$yt_pool;
USE yt:$cluster;
pragma yt.PoolTrees = "physical";
pragma yt.TentativePoolTrees = "cloud";
pragma yt.DefaultOperationWeight = "100.0";
pragma SimpleColumns;
pragma yson.DisableStrict;
pragma AnsiInForEmptyOrNullableItemsCollections;
pragma RegexUseRe2 = "true";
$inputDirectoryPath = '//logs/strm-gogol-log/1d';
$outputDirectoryPath = '//home/pcode/video-ads-sdk-stat-graphs/values';
$getString = ($json, $fieldPath) -> {
return Yson::ConvertToString(Yson::YPath(Yson::ParseJson($json), $fieldPath));
};
$getPlatformFromUA = ($useragent) -> {
$parsed = UserAgent::Parse($useragent);
RETURN CASE
WHEN $parsed.isRobot == true THEN "Robot"
WHEN $parsed.isMobile == false and $parsed.isTV == false THEN "Desktop"
WHEN $parsed.isMobile == true and $parsed.OSFamily == "Android" then "Android"
WHEN $parsed.isMobile == true and $parsed.OSFamily == "iOS" then "iOS"
ELSE "Unrecognised"
END
};
$getBundleName = ($data) -> {
return IF(
$getString($data, '/pcodeStats/format') is not null and $getString($data, '/pcodeStats/format') != 'inpage',
'Video_In_TGO',
$getString($data, '/bundleName')
);
};
$getValues = ($eventName, $data) -> {
$valuesDict = Yson::ConvertToDict(Yson::YPath(Yson::ParseJson($data), "/values"));
return ToDict(
ListMap(
DictItems($valuesDict),
($x) -> {
return (ListConcat(['values_', $eventName, '_', $x.0]), $x.1);
}
)
);
};
$allBeforeFlatten = (
select
$getValues(eventName, data) as Values,
$getPlatformFromUA(userAgent) as Platform,
$getBundleName(data) as Bundle
from regexp($inputDirectoryPath, $date)
where service = 'VAS'
);
$all = (
select
Values.0 as ValueName,
Yson::ConvertToDouble(Values.1) as Value,
Bundle,
Platform
from $allBeforeFlatten
flatten dict by Values
);
define subquery $report($table) as
select
ValueName,
PERCENTILE(Value, 0.01) ?? 0 AS p01,
PERCENTILE(Value, 0.05) ?? 0 AS p05,
PERCENTILE(Value, 0.10) ?? 0 AS p10,
PERCENTILE(Value, 0.25) ?? 0 AS p25,
PERCENTILE(Value, 0.50) ?? 0 AS p50,
PERCENTILE(Value, 0.75) ?? 0 AS p75,
PERCENTILE(Value, 0.90) ?? 0 AS p90,
PERCENTILE(Value, 0.95) ?? 0 AS p95,
PERCENTILE(Value, 0.99) ?? 0 AS p99,
count(*) as cnt,
Bundle,
Platform,
$date as EventDate
from $table()
group by ValueName, Bundle, Platform;
end define;
define subquery $by_overall() as
select
ValueName,
Value,
'ALL' as Bundle,
'ALL' as Platform
from $all
end define;
define subquery $by_Platform() as
select
ValueName,
Value,
'ALL' as Bundle,
Platform
from $all
end define;
define subquery $by_Bundle() as
select
ValueName,
Value,
Bundle,
'ALL' as Platform
from $all
end define;
define subquery $by_Platform_Bundle() as
select
ValueName,
Value,
Bundle,
Platform
from $all
end define;
$outputTable = (
select * from $report($by_overall)
union all
select * from $report($by_Platform)
union all
select * from $report($by_Bundle)
union all
select * from $report($by_Platform_Bundle)
);
$outputPath = $outputDirectoryPath || '/' || $date;
INSERT INTO $outputPath WITH TRUNCATE SELECT * FROM $outputTable;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment