Last active
July 18, 2017 06:39
-
-
Save ronfe/420b9afb678c3aa4733a76353c1c7bae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymongo import MongoClient | |
from bson.objectid import ObjectId | |
from datetime import datetime | |
db_path = "path/to/db:27017" | |
conn = MongoClient(db_path) | |
conquer_track = conn["conquerTrack"] | |
## 所有表每日全量更新 | |
## DB name: conquerTrack | |
## Collection name - Schema | |
### teams - teams | |
### schoolConquerors - school_conquerors | |
### roomConquerors - room_conquerors | |
### userDaily - user_daily | |
### Schema begin | |
teams = { | |
"_id": ObjectId(), # 地推服务商ID | |
"name": str() # 地推服务商名字,编辑更新 | |
} | |
### 只有关联的学校 | |
school_conquerors = { | |
"_id": ObjectId(), # onions - 学校ID | |
"capturedBy": ObjectId # 攻克的地推服务商ID,ObjectId, 如果服务商或关联被删除,该字段会重置,默认null | |
"capturedDate": datetime(1970, 1, 1) # 攻克时间,ISODate,小时及以下单位均为0。如果服务商或关联被删除,该字段会重置,默认值为1970-01-01 | |
} | |
### 只有关联的班级 | |
room_conquerors = { | |
"_id": ObjectId(), # room id | |
"roomRef": str(), # room ref | |
"teacherId": ObjectId(), # room owner的uid | |
"schoolId": ObjectId(), # room所在学校的ID | |
"conquererId": ObjectId() # 对应活跃的地推服务商ID | |
} | |
### 每日用户记录,只记录渠道域内的活跃用户 | |
user_daily = { | |
"_id": ObjectId(), | |
"userId": ObjectId(), # 记录对应的uid | |
"role": Str(), # 对应的用户类别,只有student或teacher | |
"channel": str(), # 对应的用户渠道号,预留给下期 | |
"logDate": datetime(), # 记录日,ISODate格式,小时及以下单位均为0 | |
"teamId": ObjectId(), # 对应的用户地推服务商ID,如果有多个地推服务商,则有多条记录 | |
"conflictType": 0, # 元字段,记录该日该用户指派conqueror是否存在及存在何种规则冲突 | |
# 算法:conflictType(d, n) = K(ROOM, d, n) + 100K(SCHOOL, d, n) - 1 | |
# 正常:无班级有学校 = 99(活跃攻克),单班级无学校 = 0 (活跃攻克) | |
# K(RULE, d, n) = 用户n在d日RULE中存在的不同服务商ID数量 | |
"conquerAssignRule": str(), # 元字段,该记录的服务商ID是通过何种规则指派 | |
# bySchool - 通过用户数据中school字段 | |
# byRoom - 通过班级地推方式 | |
# byRoom优先 | |
"paidAmount": int(), # 该用户在该日支付成功订单的实际金额总和 | |
"isUnique": False # 该用户当天是否只有一条记录,单记录用户及多记录用户的首条记录为True,用于计算总活跃和总付费筛选 | |
} | |
### Schema end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymongo import MongoClient | |
import datetime | |
db_path = "10.8.8.111:27017" | |
def calc_yesterday(): | |
today = datetime.datetime.now() | |
y, m, d = today.year, today.month, today.day | |
# return datetime.datetime(y, m, d-1) | |
return datetime.datetime(y, m, d) - datetime.timedelta(days=1) | |
def find_team_by_room(room_id, col): | |
x = col.find_one({"_id": room_id}) | |
if not x: | |
return None | |
return x["conquererId"] | |
def find_school_by_room(col): | |
x = col.find() | |
result = dict() | |
for doc in x: | |
result[doc["_id"]] = doc["schoolId"] | |
return result | |
def find_payment_by_uid(uid, col, d): | |
x = col.find({ | |
"eventKey": "paymentSuccess", | |
"serverTime": {"$gte": d - datetime.timedelta(hours=8)}, | |
"user": uid | |
}) | |
amount = 0 | |
for doc in x: | |
if "actualOrderAmount" in doc: | |
amount += doc["actualOrderAmount"] | |
return amount | |
def main(): | |
conn = MongoClient(db_path) | |
db = conn["trackDev"] | |
capture_schools = db["schools"] | |
# get school-team map | |
school_team_map = dict() | |
for doc in capture_schools.find(): | |
if datetime.datetime.now() >= doc["capturedDate"]: | |
school_team_map[doc["_id"]] = doc["capturedBy"] | |
# populate rooms | |
# get rooms by school - teachers | |
users = conn["onions"]["users"] | |
school_ids = list(school_team_map.keys()) | |
teachers = users.find({ | |
"school": {"$in": school_ids}, | |
"role": "teacher", | |
}, {"school": 1}) | |
room_ids = list() | |
teacher_school_map = dict() | |
rooms = conn["onions"]["rooms"] | |
capture_rooms = db["rooms"] | |
capture_rooms.drop() | |
for t in teachers: | |
x = rooms.find({"owners": t["_id"]}, {"ref": 1, "owners": 1}) | |
if x.count() > 0: | |
teacher_school_map[t["_id"]] = t["school"] | |
for doc in x: | |
room_ids.append(doc["_id"]) | |
temp = { | |
"_id": doc["_id"], | |
"roomRef": doc["ref"], | |
"teacherId": doc["owners"][0], | |
"schoolId": teacher_school_map[doc["owners"][0]] | |
} | |
temp["conquererId"] = school_team_map[temp["schoolId"]] | |
capture_rooms.insert_one(temp) | |
del temp | |
# generate user daily | |
daily_users = db["userdailies"] | |
user_attr = conn["cache"]["userAttr"] | |
order_events = conn["eventsV4"]["orderEvents"] | |
yesterday = calc_yesterday() | |
yesterday_str = datetime.datetime.strftime(yesterday, "%Y%m%d") | |
x = users.find({"rooms": {"$in": room_ids}}, { | |
"rooms": 1, | |
"channel": 1, | |
"role": 1, | |
"school": 1 | |
}) | |
# byroom rule | |
recorded_users = set() | |
room_school_map = find_school_by_room(capture_rooms) | |
for t in x: | |
if "role" not in t or t["role"] not in ["student", "teacher"]: | |
continue | |
y = user_attr.find_one({"user": str(t["_id"])}, {"daily": 1}) | |
if not y: | |
continue | |
if "daily" not in y or yesterday_str not in y["daily"]: | |
continue | |
temp = list() | |
# generate doc model | |
model_rec = dict({ | |
"userId": t["_id"], | |
"role": t["role"], | |
"logDate": yesterday, | |
"conquerAssignRule": "byRoom", | |
"channel": None | |
}) | |
model_rec["paidAmount"] = find_payment_by_uid(t["_id"], order_events, yesterday) | |
if "channel" in t: | |
model_rec["channel"] = t["channel"] | |
conflict_factor = 1 | |
# room | |
# init_room, *other_rooms = t["rooms"] | |
init_room = t["rooms"][0] | |
other_rooms = t["rooms"][1:] | |
if len(other_rooms) > 0: | |
conflict_factor += len(other_rooms) | |
model_rec["conflictType"] = conflict_factor - 1 | |
init_doc = model_rec.copy() | |
init_doc["teamId"] = find_team_by_room(init_room, capture_rooms) | |
init_doc["isUnique"] = True | |
# if "school" in t and t["school"] in school_ids and t["school"] != room_school_map[init_room]: | |
# conflict_factor += 100 | |
temp.append(init_doc) | |
for r in other_rooms: | |
init_doc = model_rec.copy() | |
init_doc["teamId"] = find_team_by_room(r, capture_rooms) | |
init_doc["isUnique"] = False | |
# if "school" in t and t["school"] in school_ids and t["school"] != room_school_map[r]: | |
# conflict_factor += 100 | |
temp.append(init_doc) | |
# insert | |
daily_users.insert_many(temp) | |
recorded_users.add(t["_id"]) | |
del temp | |
# by school rule | |
x = users.find({"school": {"$in": school_ids}}, { | |
"channel": 1, | |
"role": 1, | |
"school": 1 | |
}) | |
for t in x: | |
if t["_id"] in recorded_users: | |
continue | |
if "role" not in t or t["role"] not in ["student", "teacher"]: | |
continue | |
y = user_attr.find_one({"user": str(t["_id"])}, {"daily": 1}) | |
if not y: | |
continue | |
if "daily" not in y or yesterday_str not in y["daily"]: | |
continue | |
model_rec = dict({ | |
"userId": t["_id"], | |
"role": t["role"], | |
"logDate": yesterday, | |
"teamId": school_team_map[t["school"]], | |
"conquerAssignRule": "bySchool", | |
"channel": None, | |
"conflictType": 99, | |
"isUnique": True | |
}) | |
model_rec["paidAmount"] = find_payment_by_uid(t["_id"], order_events, yesterday) | |
if "channel" in t: | |
model_rec["channel"] = t["channel"] | |
daily_users.insert_one(model_rec) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment