Last active
February 15, 2024 01:30
-
-
Save hfleitas/0916b2412a4f1a538df1735e51b7d6ca to your computer and use it in GitHub Desktop.
M06-Demo5-ML.kql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// #connect cluster('igniteadxsource.eastus2').database('Occupancy') | |
#connect cluster('adxpm10774.eastus').database('IoTAnalytics') | |
print("😍 ML 🤖") | |
// Python UnSupervised Learning | |
//Create a cusotm UDF to run K-Means clustering using Python plugin | |
.create-or-alter function with (folder = "Python") kmeans_sf_OccupDetc(tbl:(*),k:int,features:dynamic,cluster_col:string) { | |
let kwargs = pack('k', k, 'features', features, 'cluster_col', cluster_col); | |
let code = | |
'\n' | |
'from sklearn.cluster import KMeans\n' | |
'\n' | |
'k = kargs["k"]\n' | |
'features = kargs["features"]\n' | |
'cluster_col = kargs["cluster_col"]\n' | |
'\n' | |
'km = KMeans(n_clusters=k)\n' | |
'df1 = df[features]\n' | |
'km.fit(df1)\n' | |
'result = df\n' | |
'result[cluster_col] = km.labels_\n'; | |
tbl | |
| evaluate python(typeof(*), code, kwargs) | |
} | |
// Invoke the custom UDF for KMeans clusters | |
Thermostats | |
| where EnqueuedTimeUTC > ago(7d) | |
| extend cluster_id=double(null) | |
| project EnqueuedTimeUTC, DeviceId, Temp, Humidity, cluster_id | |
| invoke kmeans_sf_OccupDetc(3, pack_array("Temp", "Humidity"), "cluster_id") | |
| sample 10 | |
// Python Supervised Model | |
// Custom UDF to score based on pre-trained model | |
.create-or-alter function with (folder = "Python", skipvalidation = "true") classify_sf_OccupDetc(samples:(*), models_tbl:(name:string,timestamp:datetime,model:string), model_name:string, features_cols:dynamic, pred_col:string) { | |
let model_str = toscalar(ML_Models | where name == model_name | top 1 by timestamp desc | project model); | |
let kwargs = pack('smodel', model_str, 'features_cols', features_cols, 'pred_col', pred_col); | |
let code = | |
'\n' | |
'import pickle\n' | |
'import binascii\n' | |
'\n' | |
'smodel = kargs["smodel"]\n' | |
'features_cols = kargs["features_cols"]\n' | |
'pred_col = kargs["pred_col"]\n' | |
'bmodel = binascii.unhexlify(smodel)\n' | |
'clf1 = pickle.loads(bmodel)\n' | |
'df1 = df[features_cols]\n' | |
'predictions = clf1.predict(df1)\n' | |
'\n' | |
'result = df\n' | |
'result[pred_col] = pd.DataFrame(predictions, columns=[pred_col])' | |
'\n' | |
; | |
samples | evaluate python(typeof(*), code, kwargs) | |
} | |
.append ML_Models <| datatable (name: string, timestamp: datetime, model: string) [ | |
"Occupancy",datetime(2019-11-05T15:28:53.010134Z),"800363736b6c6561726e2e6c696e6561725f6d6f64656c2e6c6f6769737469630a4c6f67697374696352656772657373696f6e0a7100298171017d710228580700000070656e616c7479710358020000006c32710458040000006475616c7105895803000000746f6c7106473f1a36e2eb1c432d5801000000437107473ff0000000000000580d0000006669745f696e746572636570747108885811000000696e746572636570745f7363616c696e6771094b01580c000000636c6173735f776569676874710a4e580c00000072616e646f6d5f7374617465710b4e5806000000736f6c766572710c58090000006c69626c696e656172710d58080000006d61785f69746572710e4b64580b0000006d756c74695f636c617373710f58040000007761726e71105807000000766572626f736571114b00580a0000007761726d5f737461727471128958060000006e5f6a6f627371134e5808000000636c61737365735f7114636e756d70792e636f72652e6d756c746961727261790a5f7265636f6e7374727563740a7115636e756d70790a6e6461727261790a71164b00857117430162711887711952711a284b014b0285711b636e756d70790a64747970650a711c58020000006231711d4b004b0187711e52711f284b0358010000007c71204e4e4e4affffffff4affffffff4b007471216289430200017122747123625805000000636f65665f7124681568164b008571256818877126527127284b014b014b05867128681c5802000000663871294b004b0187712a52712b284b0358010000003c712c4e4e4e4affffffff4affffffff4b0074712d6289432883ebdfd50687e0bf2cdaca74fa93a63fd3abc0080e6e943f650656defdad713f18f6a86bd73202bf712e74712f62580a000000696e746572636570745f7130681568164b008571316818877132527133284b014b01857134682b89430808459f57711290bf71357471366258070000006e5f697465725f7137681568164b00857138681887713952713a284b014b0185713b681c58020000006934713c4b004b0187713d52713e284b03682c4e4e4e4affffffff4affffffff4b0074713f628943040c00000071407471416258100000005f736b6c6561726e5f76657273696f6e71425806000000302e32302e33714375622e" | |
] | |
ML_Models | |
| take 10 | |
//Based on the Temp and Humidity - Is the room occupied? | |
Thermostats | |
| where EnqueuedTimeUTC > ago(15m) | |
| extend pred_Occupancy=bool(0) | |
| extend CO2=0, HumidityRatio=0 | |
| invoke classify_sf_OccupDetc(ML_Models, 'Occupancy', pack_array('Temp', 'Humidity', 'BatteryLevel', 'CO2', 'HumidityRatio'), 'pred_Occupancy') | |
// Built-in forecasting & anomaly detection | |
//Lets forcast out 12 hours | |
let start = now()-1d; | |
let end = now(); | |
Thermostats | |
| where EnqueuedTimeUTC between (start .. end) | |
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6' | |
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from start to end+12h step 1m | |
| extend NoGapsTemp=series_fill_linear(AvgTemp) | |
| project EnqueuedTimeUTC, NoGapsTemp | |
| extend forecast = series_decompose_forecast(NoGapsTemp, 720) | |
| render timechart with(title='Forecasting the next 15min by Time Series Decmposition') | |
//Are there any anomalies for this device? | |
let start = now()-24h; | |
let end = now(); | |
Thermostats | |
| where EnqueuedTimeUTC between (start .. end) | |
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6' | |
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from start to end step 1m | |
| extend NoGapsTemp=series_fill_linear(AvgTemp) | |
| project EnqueuedTimeUTC, NoGapsTemp | |
| extend anomalies = series_decompose_anomalies(NoGapsTemp,1) | |
| render anomalychart with(anomalycolumns=anomalies) | |
//Lets make it less sensative | |
Thermostats | |
| where EnqueuedTimeUTC > ago(3d) | |
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6' | |
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from ago(3d) to now() step 1m | |
| extend NoGapsTemp=series_fill_linear(AvgTemp) | |
| project EnqueuedTimeUTC, NoGapsTemp | |
| extend anomalies = series_decompose_anomalies(NoGapsTemp,1.2) | |
| render anomalychart with(anomalycolumns=anomalies) | |
// split stats into panels | |
let start = now()-3d; | |
let end = now(); | |
Thermostats | |
| where EnqueuedTimeUTC between (start .. end) | |
| where DeviceId == '34a08293-348f-47b3-ad6d-2aa1ae7039d6' | |
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from ago(3d) to now() step 1m | |
| extend NoGapsTemp=series_fill_linear(AvgTemp) | |
| project EnqueuedTimeUTC, NoGapsTemp | |
| extend (anomaly, deviation, seasonal) = series_decompose_anomalies(NoGapsTemp,1.2) | |
| render timechart with(ysplit=panels) | |
//What the anomalies I should focus on across all devices? | |
let start = now()-3d; | |
let end = now(); | |
Thermostats | |
| where EnqueuedTimeUTC between (start .. end) | |
| make-series AvgTemp=avg(Temp) default=real(null) on EnqueuedTimeUTC from start to end step 1m by DeviceId | |
| extend NoGapsTemp=series_fill_linear(AvgTemp) | |
| project EnqueuedTimeUTC, DeviceId, NoGapsTemp | |
| extend anomalies = series_decompose_anomalies(NoGapsTemp, 1.5) | |
| mv-expand EnqueuedTimeUTC, anomalies, NoGapsTemp | |
| where anomalies == 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment