Created
February 11, 2020 15:00
-
-
Save omarsar/d3bc4dcb9ebb283f44af7613a2b3fa27 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################### | |
# Helsinki Meetup | |
# Machine learning in the Elastic Stack | |
#################### | |
####### Transforms ############ | |
## 1. Check and explore the index you are working with: | |
GET kibana_sample_data_ecommerce/_search | |
## 2. Preview transformation of the data: event-centric to entity-centric | |
POST _data_frame/transforms/_preview | |
{ | |
"source": { | |
"index": [ | |
"kibana_sample_data_ecommerce" | |
] | |
}, | |
"pivot": { | |
"group_by": { | |
"customer_full_name.keyword": { | |
"terms": { | |
"field": "customer_full_name.keyword" | |
} | |
} | |
}, | |
"aggregations": { | |
"products.quantity.sum": { | |
"sum": { | |
"field": "products.quantity" | |
} | |
}, | |
"products.taxful_price.sum": { | |
"sum": { | |
"field": "products.taxful_price" | |
} | |
}, | |
"order_id.value_count": { | |
"value_count": { | |
"field": "order_id" | |
} | |
} | |
} | |
} | |
} | |
## 3. Maybe show how this is done in the interface | |
## 4. Configure the transforms | |
# create the data frame | |
PUT _data_frame/transforms/ecommerce-customer-sales | |
{ | |
"source": { | |
"index": [ | |
"kibana_sample_data_ecommerce" | |
] | |
}, | |
"pivot": { | |
"group_by": { | |
"customer_full_name.keyword": { | |
"terms": { | |
"field": "customer_full_name.keyword" | |
} | |
} | |
}, | |
"aggregations": { | |
"products.quantity.sum": { | |
"sum": { | |
"field": "products.quantity" | |
} | |
}, | |
"products.taxful_price.sum": { | |
"sum": { | |
"field": "products.taxful_price" | |
} | |
}, | |
"order_id.value_count": { | |
"value_count": { | |
"field": "order_id" | |
} | |
} | |
} | |
}, | |
"description": "Ecommerce sales by customer", | |
"dest": { | |
"index": "ecommerce-customer-sales" | |
} | |
} | |
## 5. Start the transformation | |
POST _data_frame/transforms/ecommerce-customer-sales/_start | |
## 6. Check the results of the transformation | |
GET ecommerce-customer-sales/_search | |
####### ML Task: Outlier Detection ############ | |
## 1. Configure outlier detection job | |
PUT _ml/data_frame/analytics/ecomm | |
{ | |
"source": { | |
"index": "ecommerce-customer-sales" | |
}, | |
"dest": { | |
"index": "ecommerce-outliers" | |
}, | |
"analysis": { | |
"outlier_detection": { | |
} | |
}, | |
"analyzed_fields" : { | |
"includes" : ["products.quantity.sum","products.taxful_price.sum","order_id.value_count"] | |
} | |
} | |
## 2. start the jub | |
POST _ml/data_frame/analytics/ecomm/_start | |
## 3. Check status of the job | |
GET _ml/data_frame/analytics/ecomm/_stats | |
## 4. Check the results | |
GET ecommerce-outliers/_search | |
####### Task: Classification ############ | |
# Let’s try to predict whether a flight will be delayed or not by using the sample flight data. | |
# link: https://www.elastic.co/guide/en/machine-learning/7.5/flightdata-classification.html#flightdata-classification | |
## 0.Check the index | |
GET kibana_sample_data_flights/_search | |
## Details | |
# FlightDelay: true or false (dependent variable) | |
# Cancelled: true or false | |
# FlightDelayMin: minutes | |
# FlightDelayType: "No Delay", "Weather delay", etc... | |
## 1. setup data frame analytics job | |
PUT _ml/data_frame/analytics/model-flight-delay-classification | |
{ | |
"source": { | |
"index": [ | |
"kibana_sample_data_flights" | |
] | |
}, | |
"dest": { | |
"index": "df-flight-delayed", | |
"results_field": "ml" | |
}, | |
"analysis": { | |
"classification": { | |
"dependent_variable": "FlightDelay", | |
"training_percent": 10 | |
} | |
}, | |
"analyzed_fields": { | |
"includes": [], | |
"excludes": [ | |
"Cancelled", | |
"FlightDelayMin", | |
"FlightDelayType" | |
] | |
}, | |
"model_memory_limit": "100mb" | |
} | |
## 2. start the job | |
POST _ml/data_frame/analytics/model-flight-delay-classification/_start | |
## 3. check the status of the job | |
GET _ml/data_frame/analytics/model-flight-delay-classification/_stats | |
## 4. view classification results | |
GET df-flight-delayed/_search | |
## 5. evaluate results | |
POST _ml/data_frame/_evaluate | |
{ | |
"index": "df-flight-delayed", | |
"query": { | |
"term": { | |
"ml.is_training": { | |
"value": true | |
} | |
} | |
}, | |
"evaluation": { | |
"classification": { | |
"actual_field": "FlightDelay", | |
"predicted_field": "ml.FlightDelay_prediction", | |
"metrics": { | |
"multiclass_confusion_matrix" : {} | |
} | |
} | |
} | |
} | |
## 6. Calculate generalization error on data not considered during the modeling | |
## note ml.is_training: false | |
POST _ml/data_frame/_evaluate | |
{ | |
"index": "df-flight-delayed", | |
"query": { | |
"term": { | |
"ml.is_training": { | |
"value": false | |
} | |
} | |
}, | |
"evaluation": { | |
"classification": { | |
"actual_field": "FlightDelay", | |
"predicted_field": "ml.FlightDelay_prediction", | |
"metrics": { | |
"multiclass_confusion_matrix" : {} | |
} | |
} | |
} | |
} | |
# end of Helinki Meetup | |
### >>>>>>>>>>>>>>>>>>>>>>>>>>>>> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment