Created
May 7, 2019 07:13
-
-
Save talebook/073467bd751210e6f4016e8de38d9ce6 to your computer and use it in GitHub Desktop.
cdn-dispatch-log-jobs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
#-*- coding: UTF-8 -*- | |
import re | |
import os | |
import sys | |
import json | |
import requests | |
import urlparse | |
import logging | |
import datetime | |
from tencentcloud.common import credential | |
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException | |
from tencentcloud.scf.v20180416 import scf_client,models | |
from QcloudApi.qcloudapi import QcloudApi | |
class Job: | |
def __init__(self, config): | |
self.config = config | |
# scf api client | |
cred = credential.Credential(config['secret_id'], config['secret_key']) | |
self.scf_client = scf_client.ScfClient(cred, config['scf_region']) | |
# cdn api client | |
cdn_config = { | |
'Region': 'ap-guangzhou', | |
'method': 'GET', | |
'secretId': config['secret_id'], | |
'secretKey': config['secret_key'], | |
'SignatureMethod': 'HmacSHA1', | |
} | |
self.cdn_client = QcloudApi("cdn", cdn_config) | |
self.cos_path = config['cos_path'] | |
def get_cdn_log_urls(self, host): | |
'''获取CDN的日志下载链接''' | |
CDN_LOG_STABLE_HOURS = 12+1 | |
CDN_LOG_SAVE_HOURS = 1 | |
now = datetime.datetime.now() | |
end = now - datetime.timedelta(hours=CDN_LOG_STABLE_HOURS) | |
start = end - datetime.timedelta(hours=CDN_LOG_SAVE_HOURS) | |
action = "GetCdnLogList" | |
action_params = { | |
'host': host, | |
'startDate': start.strftime("%Y-%m-%d %H:%M:%S"), | |
'endDate': end.strftime("%Y-%m-%d %H:%M:%S"), | |
} | |
rsp = self.cdn_client.call(action, action_params) | |
data = json.loads(rsp) | |
if data['code'] != 0: | |
logging.error("API %s error: %s" % (action, data)) | |
return [] | |
urls = [ v['link'] for v in data['data']['list'] if v['type'] ] | |
if urls: | |
logging.info("time(%s~%s) host[%s] log urls are:\n%s\n." % (start, end, host, "\n".join(urls)) ) | |
else: | |
logging.info("time(%s~%s) host[%s] log urls are empty" % (start, end, host) ) | |
return urls | |
def get_cdn_hosts(self): | |
'''获取账号下全部域名列表''' | |
action = "DescribeCdnHosts" | |
end = datetime.datetime.now() | |
start = end - datetime.timedelta(days=1) | |
action_params = { | |
'detail': 0, | |
} | |
rsp = self.cdn_client.call(action, action_params) | |
data = json.loads(rsp) | |
if data['code'] != 0: | |
logging.error("API %s error: %s" % (action, data)) | |
return [] | |
hosts = [ v['host'] for v in data['data']['hosts'] ] | |
logging.info("cdn hosts = %s" % hosts) | |
return hosts | |
def get_cos_key(self, url): | |
''' | |
解析URL,生成COS上的存储路径格式 | |
URL格式为: /day/hour/dayhour-host.gz | |
''' | |
parts = urlparse.urlparse(url) | |
r = r'/(?P<day>[^/]*)/(?P<hour>[^/]*)/(?P<filename>[^-]*-(?P<host>[^/]*).gz)' | |
m = re.match(r, parts.path) | |
if not m: | |
raise RuntimeError("cdn log url format is not support: %s" % url) | |
v = m.groupdict() | |
key = self.cos_path % v | |
return key | |
def invoke_cos_upload(self, url): | |
event = dict(self.config) | |
event.update({"url": url, "cos_key": self.get_cos_key(url) }) | |
action = "Invoke" | |
action_params = { | |
'InvocationType': "Event", # 异步 | |
'FunctionName': self.config['scf_function'], | |
'ClientContext': json.dumps(event), | |
} | |
# 调用接口,发起请求,并打印返回结果 | |
try: | |
ret = self.scf_client.call(action, action_params) | |
print(json.loads(ret)["Response"]["Result"]["RetMsg"]) | |
except TencentCloudSDKException as err: | |
print(err) | |
def run(self): | |
hosts = self.config['cdn_host'] | |
if not hosts: | |
hosts = self.get_cdn_hosts() | |
cnt = 0 | |
for host in hosts: | |
urls = self.get_cdn_log_urls(host) | |
for url in urls: | |
self.invoke_cos_upload(url) | |
cnt += len(urls) | |
return {"status": "jobs dispatched", "count_url": cnt, "count_host": len(hosts)} | |
def run_app(): | |
config = { | |
'secret_id': '****************', | |
'secret_key': '***************', | |
#COS存储桶的区域 | |
'cos_region': 'ap-chengdu', | |
'cos_bucket': 'bucket-1251001234', | |
'cos_path': '/cdnlog/%(host)s/%(day)s/%(filename)s', | |
# SCF配置 | |
'scf_region': 'ap-guangzhou', | |
'scf_function': 'cdn-save-log-into-cos', | |
# CDN配置 | |
# 如果域名列表为空,则表示同步整个账号下全部域名的日志 | |
'cdn_host': [], | |
#'cdn_host': ['tx-cdn.talebook.org','js.talebook.org'], | |
} | |
job = Job(config) | |
return job.run() | |
def main_handler(event, context): | |
rsp = run_app() | |
return { "statusCode": 200, "body": rsp } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment