Created
September 12, 2016 11:15
-
-
Save diggzhang/93be727e66ab75c9e12572ccefbf3efd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# _*_ coding:utf-8 _*_ | |
""" | |
该脚本的gist地址 https://gist.github.com/diggzhang/480bf81101173525cc55926e990f8858 | |
关联脚本1: cronjob 备份 https://gist.github.com/diggzhang/cc0326d54bb592a0b2d7d7170fb94a17 | |
在跑这个脚本前最好建立索引 | |
db.httplogs.ensureIndex({ | |
"url": 1, | |
"method": 1, | |
"status": 1 | |
}) | |
""" | |
import re | |
from pymongo import MongoClient | |
from bson.objectid import ObjectId | |
import jwt | |
import time | |
import datetime | |
# TODO: change mongo env as online configure | |
# onion_v4_DB_instance = MongoClient('localhost', 27017)['eventsV4_test'] | |
onion_v4_DB_instance = MongoClient('localhost', 27017)['eventsV4'] | |
httplogs_collection = onion_v4_DB_instance['httplogs'] | |
events = onion_v4_DB_instance['eventV4'] | |
generater_events_collection = onion_v4_DB_instance['eventV4'] | |
""" | |
解析token部分 | |
""" | |
def parse_token(token): | |
secret = "follow your heart&intuition" | |
this_token = token.split(' ')[-1] | |
try: | |
decode = jwt.decode(this_token, secret, verify=False) | |
return decode | |
except Exception as e: | |
return None | |
""" | |
转换埋点的主要逻辑 | |
""" | |
def get_meta_data(doc_list, res_info): | |
result = [] | |
for doc in doc_list: | |
event_obj = { | |
"eventKey": res_info['eK'], | |
"platform": "backend", | |
"ua": doc['ua'], | |
"url": doc['url'] | |
} | |
event_obj['category'] = doc['apptag'] | |
if doc['ip'] != None: | |
event_obj['ip'] = doc['ip'] | |
event_obj['location'] = doc['location'] | |
if doc['token'] != None and doc['token'] != "": | |
token_obj = parse_token(doc['token']) | |
if token_obj != None: | |
event_obj['user'] = ObjectId(token_obj['id']) | |
if 'role' in token_obj: | |
event_obj['role'] = token_obj['role'] | |
if 'eventTime' in doc: | |
event_obj['eventTime'] = doc['eventTime'] | |
if 'device' in doc: | |
event_obj['device'] = doc['device'] | |
event_obj['serverTime'] = doc['serverTime'] | |
if 'response' in doc and doc['response'] != None and doc['response'] != "": | |
res_swap = doc['response'] | |
if type(res_swap) != type({}): | |
event_obj['response'] = {"error": res_swap} | |
else: | |
event_obj['response'] = res_swap | |
if 'request' in doc and doc['request'] != None and doc['request'] != "": | |
res_swap = doc['request'] | |
if type(res_swap) != type({}): | |
event_obj['request'] = {"error": res_swap} | |
else: | |
event_obj['request'] = res_swap | |
if 'status' in doc: | |
event_obj['httpStatus'] = doc['status'] | |
if 'method' in doc: | |
event_obj['method'] = doc['method'] | |
if 'signup' in doc['url']: | |
event_obj['request']['password'] = "password_missing" | |
if 'login' in doc['url']: | |
event_obj['request']['password'] = "password_missing" | |
result.append(event_obj) | |
return result | |
""" | |
读取config.csv信息 | |
@ file_path config文件的路径 | |
""" | |
def generate_config(file_path): | |
file = open(file_path).readlines() | |
result = {"req": [], "res": []} | |
for each in file: | |
txt = each.strip() | |
txt_array = txt.split(',,') | |
unit_req = {} | |
unit_res = {} | |
unit_req['url'] = re.compile(txt_array[0].split(',')[0]) | |
unit_req['m'] = txt_array[0].split(',')[1] | |
unit_res['eK'] = txt_array[1] | |
unit_res['req'] = txt_array[2] | |
unit_res['res'] = txt_array[3] | |
result['req'].append(unit_req) | |
result['res'].append(unit_res) | |
return result | |
""" | |
main func | |
""" | |
def convert_main(file_path, http_log, generater_events): | |
config = generate_config(file_path) | |
requests = config['req'] | |
responses = config['res'] | |
for i in range(len(requests)): | |
unit_info = requests[i] | |
condition = { | |
"url": {"$regex": unit_info['url']}, | |
"method": unit_info['m'] | |
} | |
if 'Failure' in responses[i]['eK']: | |
condition.update({"status": {"$nin": ["200", "204"]} }) | |
else: | |
condition.update({"status": {"$in": ["200", "204"]} }) | |
log_list = list(http_log.find(condition)) | |
if log_list == None or log_list == []: | |
print("condition条件下没有符合项 %s")%(responses[i]['eK']) | |
else: | |
result = get_meta_data(log_list, responses[i]) | |
if result != []: | |
generater_events.insert_many(result) | |
http_log.delete_many(condition) | |
print ("%s 后端生成埋点/清楚http log数: %d")%(responses[i]['eK'],len(result)) | |
""" | |
processing initial | |
相比于V3,取消了生成客服表的逻辑 | |
将req、res放到了eventKey同层 | |
""" | |
convert_main("./config.csv", httplogs_collection, generater_events_collection) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment