Skip to content

Instantly share code, notes, and snippets.

@rhoboro
Last active March 8, 2019 01:24
Show Gist options
  • Save rhoboro/181380bcd3c8319115eaf66be549985a to your computer and use it in GitHub Desktop.
Save rhoboro/181380bcd3c8319115eaf66be549985a to your computer and use it in GitHub Desktop.
Stackdriverを使ってGCEインスタンスの長時間起動を抑制する
"""
Cloud Function: DeleteInstance
google-api-python-client==1.7.8
Environment Variable
AUTO_TOKEN: a value which is set in Stackdriver Notification WebHook
"""
import json
import os
from flask import Response
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
def handler(request):
"""インシデントを受け取り処理する
Stackdriver のNotification(WebHook)から呼ばれる
"""
auth_token = request.args.get('auth_token', None)
if auth_token != os.getenv('AUTH_TOKEN', ''):
error_msg = '403 Please pass the correct authentication token'
print(error_msg)
return Response(error_msg, 403)
# Valid Request Only
json_data = json.loads(request.data)
print(json.dumps(json_data, indent=2))
# インシデントを受け取って任意の処理を実行する
_process(incident=json_data['incident'])
return Response('OK')
def _process(incident):
"""インシデントを処理する """
if incident['state'] == 'open':
project, name = incident['resource_name'].split(' ')
_cleanup_instance(
project,
incident['resource']['labels']['zone'],
name,
)
else:
pass
def _cleanup_instance(project, zone, instance_name):
"""インスタンスの削除 """
service = build('compute', 'v1')
try:
service.instances().delete(
project=project,
zone=zone,
instance=instance_name,
).execute()
except HttpError as e:
print(e)
"""
Cloud Function: InstanceCreated
Trigger: PubSub Topic: instance-created
### Logs Exports Setting ###
PubSub Topic: instance-created
Filter:
resource.type="gce_instance"
jsonPayload.event_subtype="compute.instances.insert"
jsonPayload.event_type="GCE_OPERATION_DONE"
############################
The following alert policy monitors the instance where the stackdriver monitoring agent is installed.
You can install it with the following command.
$ curl -sSO https://dl.google.com/cloudagents/install-monitoring-agent.sh
$ sudo bash install-monitoring-agent.sh
$ curl -sSO https://dl.google.com/cloudagents/install-logging-agent.sh
$ sudo bash install-logging-agent.sh
"""
import base64
import json
import google.protobuf.json_format
from google.cloud import monitoring_v3
alert_json = r"""
{
"combiner": "OR",
"conditions": [
{
"conditionThreshold": {
"aggregations": [
{
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_RATE"
}
],
"comparison": "COMPARISON_GT",
"duration": "TIME_LIMIT",
"filter": "metric.type=\"agent.googleapis.com/agent/uptime\" resource.type=\"gce_instance\" metadata.user_labels.\"name\"=\"TARGET_NAME\"",
"trigger": {
"count": 1
}
},
"displayName": "Monitoring Agent Uptime for TARGET_NAME"
}
],
"displayName": "GCE Instance Uptime Check for TARGET_NAME",
"documentation": {
"content": "起動時間が5分を超えました。",
"mimeType": "text/markdown"
},
"enabled": true,
"notificationChannels": [
"projects/PROJECT_ID/notificationChannels/RESOURCE_ID"
]
}
"""
def handler(event, context):
"""GCEインスタンス作成完了の通知を受け取り、そのインスタンス向けのアラートポリシーを作成する """
pubsub_message = json.loads(base64.b64decode(event['data']).decode('utf-8'))
instance_name = pubsub_message['jsonPayload']['resource']['name']
project_id = pubsub_message['resource']['labels']['project_id']
limit = get_limit(instance_name)
if not limit:
print('This instance has no limit')
return
# jsonからAlertPolicyオブジェクトを作成
policy_record = google.protobuf.json_format.Parse(
alert_json.replace('TARGET_NAME', instance_name).replace('TIME_LIMIT', str(limit)),
monitoring_v3.types.alert_pb2.AlertPolicy())
# Alert Policyの作成
alert_client = monitoring_v3.AlertPolicyServiceClient()
name = alert_client.project_path(project_id)
filter_ = f"display_name=ends_with('{instance_name}')"
if list(alert_client.list_alert_policies(name, filter_=filter_)):
# 簡易的に冪等性確保
print('already exists')
else:
policy = alert_client.create_alert_policy(name, policy_record)
print(policy)
def get_limit(instance_name):
if 'rhoboro' in instance_name:
return '300s'
# Alert Policyを作成しない
return ''
"""
Cloud Function: InstanceDeleted
google-cloud-monitoring==0.31.1
Trigger: PubSub Topic: instance-deleted
### Logs Exports Setting ###
PubSub Topic: instance-deleted
Filter:
resource.type="gce_instance"
jsonPayload.event_subtype="compute.instances.delete"
jsonPayload.event_type="GCE_OPERATION_DONE"
############################
"""
import base64
import json
from google.cloud import monitoring_v3
def handler(event, context):
"""GCEインスタンス削除完了の通知を受け取り、そのインスタンス向けのアラートポリシーを削除する """
pubsub_message = json.loads(base64.b64decode(event['data']).decode('utf-8'))
instance_name = pubsub_message['jsonPayload']['resource']['name']
project_id = pubsub_message['resource']['labels']['project_id']
# Alert Policyの削除
alert_client = monitoring_v3.AlertPolicyServiceClient()
name = alert_client.project_path(project_id)
filter_ = f"display_name=ends_with('{instance_name}')"
for policy in alert_client.list_alert_policies(name, filter_=filter_):
alert_client.delete_alert_policy(policy.name, timeout=300)
# $ pip install google-cloud-monitoring が必要
# python3
>>> from google.cloud import monitoring_v3
>>> PROJECT_ID = 'PROJECT_ID'
>>> alert_client = monitoring_v3.AlertPolicyServiceClient()
>>> name = alert_client.project_path(PROJECT_ID)
>>> channel_client = monitoring_v3.NotificationChannelServiceClient()
>>> print(list(channel_client.list_notification_channels(name)))
[type: "webhook_tokenauth"
...
name: "projects/PROJECT_ID/notificationChannels/RESOURCE_ID"
...]
# Set the name to alert_json.notificationChannels in instance_created.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment