Last active
August 13, 2022 09:46
-
-
Save senaev/c9e1c833bc68d3aafbadc9832f02c995 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| DECLARE $date AS String; | |
| DECLARE $cluster AS String; | |
| DECLARE $yt_pool AS String; | |
| PRAGMA yt.Pool=$yt_pool; | |
| USE yt:$cluster; | |
| pragma yt.PoolTrees = "physical"; | |
| pragma yt.TentativePoolTrees = "cloud"; | |
| pragma yt.DefaultOperationWeight = "100.0"; | |
| pragma SimpleColumns; | |
| pragma yson.DisableStrict; | |
| pragma AnsiInForEmptyOrNullableItemsCollections; | |
| pragma RegexUseRe2 = "true"; | |
| $inputDirectoryPath = '//logs/strm-gogol-log/1d'; | |
| $outputDirectoryPath = '//home/pcode/video-ads-sdk-stat-graphs/values'; | |
| $getString = ($json, $fieldPath) -> { | |
| return Yson::ConvertToString(Yson::YPath(Yson::ParseJson($json), $fieldPath)); | |
| }; | |
| $getPlatformFromUA = ($useragent) -> { | |
| $parsed = UserAgent::Parse($useragent); | |
| RETURN CASE | |
| WHEN $parsed.isRobot == true THEN "Robot" | |
| WHEN $parsed.isMobile == false and $parsed.isTV == false THEN "Desktop" | |
| WHEN $parsed.isMobile == true and $parsed.OSFamily == "Android" then "Android" | |
| WHEN $parsed.isMobile == true and $parsed.OSFamily == "iOS" then "iOS" | |
| ELSE "Unrecognised" | |
| END | |
| }; | |
| $getBundleName = ($data) -> { | |
| return IF( | |
| $getString($data, '/pcodeStats/format') is not null and $getString($data, '/pcodeStats/format') != 'inpage', | |
| 'Video_In_TGO', | |
| $getString($data, '/bundleName') | |
| ); | |
| }; | |
| $getValues = ($eventName, $data) -> { | |
| $valuesDict = Yson::ConvertToDict(Yson::YPath(Yson::ParseJson($data), "/values")); | |
| return ToDict( | |
| ListMap( | |
| DictItems($valuesDict), | |
| ($x) -> { | |
| return (ListConcat(['values_', $eventName, '_', $x.0]), $x.1); | |
| } | |
| ) | |
| ); | |
| }; | |
| $allBeforeFlatten = ( | |
| select | |
| $getValues(eventName, data) as Values, | |
| $getPlatformFromUA(userAgent) as Platform, | |
| $getBundleName(data) as Bundle | |
| from regexp($inputDirectoryPath, $date) | |
| where service = 'VAS' | |
| ); | |
| $all = ( | |
| select | |
| Values.0 as ValueName, | |
| Yson::ConvertToDouble(Values.1) as Value, | |
| Bundle, | |
| Platform | |
| from $allBeforeFlatten | |
| flatten dict by Values | |
| ); | |
| define subquery $report($table) as | |
| select | |
| ValueName, | |
| PERCENTILE(Value, 0.01) ?? 0 AS p01, | |
| PERCENTILE(Value, 0.05) ?? 0 AS p05, | |
| PERCENTILE(Value, 0.10) ?? 0 AS p10, | |
| PERCENTILE(Value, 0.25) ?? 0 AS p25, | |
| PERCENTILE(Value, 0.50) ?? 0 AS p50, | |
| PERCENTILE(Value, 0.75) ?? 0 AS p75, | |
| PERCENTILE(Value, 0.90) ?? 0 AS p90, | |
| PERCENTILE(Value, 0.95) ?? 0 AS p95, | |
| PERCENTILE(Value, 0.99) ?? 0 AS p99, | |
| count(*) as cnt, | |
| Bundle, | |
| Platform, | |
| $date as EventDate | |
| from $table() | |
| group by ValueName, Bundle, Platform; | |
| end define; | |
| define subquery $by_overall() as | |
| select | |
| ValueName, | |
| Value, | |
| 'ALL' as Bundle, | |
| 'ALL' as Platform | |
| from $all | |
| end define; | |
| define subquery $by_Platform() as | |
| select | |
| ValueName, | |
| Value, | |
| 'ALL' as Bundle, | |
| Platform | |
| from $all | |
| end define; | |
| define subquery $by_Bundle() as | |
| select | |
| ValueName, | |
| Value, | |
| Bundle, | |
| 'ALL' as Platform | |
| from $all | |
| end define; | |
| define subquery $by_Platform_Bundle() as | |
| select | |
| ValueName, | |
| Value, | |
| Bundle, | |
| Platform | |
| from $all | |
| end define; | |
| $outputTable = ( | |
| select * from $report($by_overall) | |
| union all | |
| select * from $report($by_Platform) | |
| union all | |
| select * from $report($by_Bundle) | |
| union all | |
| select * from $report($by_Platform_Bundle) | |
| ); | |
| $outputPath = $outputDirectoryPath || '/' || $date; | |
| INSERT INTO $outputPath WITH TRUNCATE SELECT * FROM $outputTable; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment