Skip to content

Instantly share code, notes, and snippets.

@johnw86
Last active October 8, 2024 20:06
Show Gist options
  • Save johnw86/c13d100dec49516182f7c5ba6f1ec7b8 to your computer and use it in GitHub Desktop.
Save johnw86/c13d100dec49516182f7c5ba6f1ec7b8 to your computer and use it in GitHub Desktop.
Kusto Queries
// Get top 5 storm types causing any damage in the state of Virginia during april
StormEvents
| where State == "VIRGINIA"
| where StartTime >= datetime(2007, 04, 01) and EndTime < datetime(2007, 05, 01)
| project EventType, Damage=DamageCrops+DamageProperty
| top 5 by Damage
// Count all records
StormEvents
| count
// See schema of table
StormEvents
| getschema
// Count of records grouped by State
StormEvents
| summarize EventCount = count() by State
| sort by EventCount
// Summarize with overall count, count if, and distinct count of different event types, grouped by State
StormEvents
| summarize count(),
EventsWithDamageToCrops = countif(DamageCrops > 0),
Count = dcount(EventType) by State
| sort by Count
// Distinct EventTypes that occurred in Texas
StormEvents
| where State == "TEXAS"
| distinct EventType
| sort by EventType asc
// Visualize results on a chart
// Chart types columnchart, barchart, piechart, scatterchart, pivotchart
StormEvents
| summarize count(),
EventsWithDamageToCrops = countif(DamageCrops > 0),
dcount(EventType) by State
| sort by count_
| render barchart
// Group all events by the StartTime rounded to 7days
StormEvents
| summarize count() by bin(StartTime, 7d)
| render columnchart
// Sum the total damage caused over 7 days
StormEvents
| extend damage = DamageProperty + DamageCrops
| summarize sum(damage) by bin(StartTime, 7d)
| render columnchart
// Summarize total damage by EventType and display in piechart
StormEvents
| extend damage = DamageProperty + DamageCrops
| summarize sum(damage) by EventType
| render piechart
// Extend the data set with a calculated field, filter, summarize the average damage by EventType and sort
StormEvents
| extend damage = DamageProperty + DamageCrops
| where damage > 0
| summarize avg(damage) by EventType
| sort by avg_damage
// Calculate average with a predicate and use round to clean up the average
StormEvents
| extend damage = DamageProperty + DamageCrops
| summarize round(avgif(damage, damage > 0)) by EventType
| sort by avgif_damage
// Calculate min and max damage along with average. Columns renamed for clearer results
// minif, maxif and sumif all available also to provide predicate which must be true
StormEvents
| extend damage = DamageProperty + DamageCrops
| where damage > 0
| summarize MaxDamage=max(damage), MinDamage=min(damage), AverageDamage=avg(damage) by EventType
| sort by MaxDamage
// Percentiles of damage by EventType
StormEvents
| extend damage = DamageProperty + DamageCrops
| where damage > 0
| summarize percentiles(damage, 5, 20, 50, 80, 95) by EventType
| sort by EventType asc
// let scalar values
let MinDamage = 1; // int
let EventLocation = "ARIZONA"; // string
StormEvents
| where State == EventLocation
| where DamageCrops + DamageProperty >= MinDamage
| summarize Damage=round(avg(DamageProperty + DamageCrops)) by EventType
| sort by Damage
// Convert result to scalar variable for use in another query
let MostFrequentEventType = toscalar(
StormEvents
| summarize count() by EventType
| top 1 by count_
| project EventType);
StormEvents
| where EventType == MostFrequentEventType
| summarize count() by startofmonth(StartTime)
| render columnchart
// Create a let variable set and use that to query on
let KillerStorms =
StormEvents
| where DeathsDirect + DeathsIndirect > 0
| project State, EventType, Deaths=DeathsDirect + DeathsIndirect;
KillerStorms
| summarize DistinctKillerEventTypes=dcount(EventType), TotalDeaths=sum(Deaths) by State
| sort by TotalDeaths
// User defined function
let Pcent = (portion: real, total: real) { round(100 * portion / total, 2) };
StormEvents
| extend Damage = DamageCrops + DamageProperty
| summarize TotalEvents = count(), TotalDamagingEvents = countif(Damage > 0) by EventType
| project EventType, TotalDamagingEvents, TotalEvents, Percentage = Pcent(TotalDamagingEvents, TotalEvents)
| sort by EventType asc
// Total damage in Euros by week
let Eur = 1.14; // conversion rate
StormEvents
| where EventType has "flood"
| extend Damage = (DamageProperty + DamageCrops) / Eur
| summarize sum(Damage) by bin(StartTime, 7d)
| render columnchart
// Simple join between tables
Customers
| join kind=inner SalesFact on CustomerKey
| take 10
// Join table to summarize spending from regions
Customers
| join kind=inner SalesFact on CustomerKey
| summarize TotalAmount = round(sum(SalesAmount)) by RegionCountryName
| top 3 by TotalAmount
// Countries with the lowest sales on a monthly basis
Customers
| join kind=inner SalesFact on CustomerKey
| summarize TotalAmount = round(sum(SalesAmount))
by Month = startofmonth(DateKey), RegionCountryName
| summarize arg_min(TotalAmount, RegionCountryName) by Month
| top 12 by Month desc
// Left outer join to get total count (Not performant)
Products
| join kind=leftouter SalesFact on ProductKey
| summarize TotalSales = count() by ProductCategoryName
| order by TotalSales desc
// Right anti join Products and sales to count products not sold by category
SalesFact
| join kind=rightanti Products on ProductKey
| summarize Count = count() by ProductCategoryName
| order by Count desc
// Lookup to combine data into dataset
SalesFact
| lookup Customers on CustomerKey
| take 10
// Lookup and summarize
SalesFact
| lookup Customers on CustomerKey
| summarize TotalSales = count() by RegionCountryName
| order by TotalSales desc
// Lookup query used for same results as left join. (Lookup operator is optimized for this sort calculation)
SalesFact
| lookup Products on ProductKey
| summarize TotalSales = count() by ProductCategoryName
| order by TotalSales desc
// Union using let
let AustraliaSales = SalesFact // Sales facts from Australia
| lookup Customers on CustomerKey
| where RegionCountryName == 'Australia'
| take 10;
let UnitedKingdomSales = SalesFact // Sales facts from United Kingdom
| lookup Customers on CustomerKey
| where RegionCountryName == 'United Kingdom'
| take 10;
let UnitedStatesSales = SalesFact // Sales facts from United States
| lookup Customers on CustomerKey
| where RegionCountryName == 'United States'
| take 10;
AustraliaSales
| union UnitedKingdomSales, UnitedStatesSales
// Use materialize
// Captures the value of a tabular expression for the duration of the query execution so that it can be referenced multiple times by the query without recalculation.
let Pcent = (portion: real, total: real) { round(100 * portion / total, 2) };
let USCustomers = Customers | where RegionCountryName == 'United States';
let USCustomerSales = materialize(
SalesFact
| summarize USTotalCustomerSales = sum(SalesAmount) by CustomerKey
| join kind=inner USCustomers on CustomerKey);
USCustomerSales
| summarize USTotalStateSales = round(sum(USTotalCustomerSales)) by StateProvinceName
| lookup (
USCustomerSales
| summarize arg_max(USTotalCustomerSales, *) by StateProvinceName
)
on StateProvinceName
| top 10 by USTotalStateSales
| project
StateProvinceName,
StateSales = USTotalStateSales,
TopCustomerFirstName = FirstName,
TopCustomerLastName = LastName,
TopCustomerSales = round(USTotalCustomerSales),
TopCustomerPercentage = Pcent(USTotalCustomerSales, USTotalStateSales)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment