Last active
June 25, 2024 06:50
-
-
Save freemandealer/e54252777d3333a9ace2f7594faa00b6 to your computer and use it in GitHub Desktop.
通过性能平台的 stepid 获取对应测试的 log 和指定 sql 的 profile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# @Time : 2024/06/01 | |
# @Author : zhengyu ([email protected]) | |
# @Description: 通过性能平台的 stepid 获取对应测试的 log 和指定 sql 的 profile | |
# 用法:python3 perf.py --stepid=stepid --sql=sql --idx=idx --output=output | |
# 必要参数: | |
# stepid: stepid | |
# sql: 如 q09、q19 (tpch格式)、query01、query68 (tpcds格式) | |
# 可选参数: | |
# idx: 如果sql被执行多次,可以指定idx。可以为:0(代表冷读)、1,2,3,默认 0 | |
# output: 输出文件名的前缀,可以自由指定,默认值空 | |
import paramiko | |
import re | |
from bs4 import BeautifulSoup | |
import requests | |
import argparse | |
import mysql.connector | |
from mysql.connector import Error | |
# MySQL 数据库配置信息 | |
dbhost = '172.20.48.32' # 例如 'localhost' 或数据库服务器的 IP 地址 | |
database = 'platform' # 你要连接的数据库名 | |
dbuser = 'root' # 数据库用户名 | |
dbpassword = 'xxx' # 数据库用户密码 | |
dbport = 3306 | |
# log server | |
host = '172.20.48.32' | |
username = "root" | |
password = "xxx" | |
def query_meta_base(query): | |
try: | |
connection = mysql.connector.connect(host=dbhost, | |
database=database, | |
user=dbuser, | |
password=dbpassword, | |
port=dbport) | |
if connection.is_connected(): | |
cursor = connection.cursor() | |
cursor.execute(query) | |
results = cursor.fetchall() | |
for row in results: | |
print(row) | |
cursor.close() | |
except Error as e: | |
print(f"Error: {e}") | |
finally: | |
if connection.is_connected(): | |
connection.close() | |
print("MySQL connection is closed") | |
return results | |
def get_workflowid_and_date_from_stepid(stepid): | |
query_template = ''' | |
SELECT | |
`steps`.`workflow_uid` AS `workflow_uid`, | |
`steps`.`start_at` AS `start_at` | |
FROM | |
`steps` | |
WHERE | |
`steps`.`uid` = '{stepid}' | |
LIMIT | |
100 | |
''' | |
query = query_meta_base(query_template.format(stepid=stepid)) | |
if (query == None) or (len(query) == 0) or (query[0] == None) or (query[0][0] == None) or (query[0][1] == None): | |
return None, None | |
return query[0][0], query[0][1].date() # workflowid, date (yy-mm-dd no hour and minute) | |
def get_workflow_log_location(date, workflowid, stepid): | |
location_template = "/mnt/hdd01/performance/logs/{date}/{workflowid}/{stepid}" | |
return location_template.format(date=date, workflowid=workflowid, stepid=stepid) | |
def download_file(server_ip, username, password, remote_file_path, local_file_path): | |
ssh = paramiko.SSHClient() | |
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) | |
ssh.connect(server_ip, username=username, password=password) | |
ftp = ssh.open_sftp() | |
ftp.get(remote_file_path, local_file_path) | |
ftp.close() | |
def download_log_file(remote_log_location, stepid): | |
save_location = "/tmp/{stepid}.log".format(stepid=stepid) | |
download_file(host, username, password, remote_log_location, save_location) | |
return save_location | |
def get_profile_from_log(save_location, sql, idx): | |
ret_url = "" | |
with open(save_location, "r") as f: | |
lines = f.readlines() | |
for line in lines: | |
if f"{sql}.sql_{idx}_profile.html" in line: | |
print(line) | |
ret_url = line | |
break | |
if ret_url == "": | |
return ret_url | |
else: | |
url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' | |
url = re.findall(url_pattern, ret_url) | |
return url[0] | |
def download_profile(url, output_path): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# 获取网页的可视化文本 | |
visible_text = soup.get_text('\n') | |
# 将文本保存到文件 | |
with open(output_path, "w") as f: | |
f.write(visible_text) | |
def main(stepid, sql, idx="0", output=""): | |
workflowid, date = get_workflowid_and_date_from_stepid(stepid) | |
if (workflowid == None) or (date == None): | |
print("Workflow ID not found, bad stepid?") | |
return | |
else: | |
print("workflowid:", workflowid, " date:", date) | |
remote_log_location = get_workflow_log_location(date, workflowid, stepid) | |
print("log location:", remote_log_location) | |
save_location = download_log_file(remote_log_location, stepid) | |
print("Downloaded log file to:", save_location) | |
profile_url = get_profile_from_log(save_location, sql, idx) | |
if profile_url == "": | |
print("Profile URL not found") | |
return | |
else: | |
print("Profile URL:", profile_url) | |
output_path = "/tmp/{output}_{stepid}_{sql}_{idx}.txt".format(output=output, | |
stepid=stepid, | |
sql=sql, | |
idx=idx) | |
download_profile(profile_url, output_path) | |
print("Downloaded profile to:", output_path) | |
return | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Process input parameters.') | |
parser.add_argument('--stepid', required=True, help='Step ID') | |
parser.add_argument('--sql', required=True, help='Query SQL') | |
parser.add_argument('--idx', default='0', help='Query Idx of the same SQL (optional)') | |
parser.add_argument('--output', default='', help='Output (optional)') | |
args = parser.parse_args() | |
main(args.stepid, args.sql, args.idx, args.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment