Skip to content

Instantly share code, notes, and snippets.

@ronfe
Last active May 10, 2016 10:35
Show Gist options
  • Save ronfe/6a575c21346678ae25556b3d9cb4e9d2 to your computer and use it in GitHub Desktop.
Save ronfe/6a575c21346678ae25556b3d9cb4e9d2 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Created on Fri May 6 11:01:25 2016
@author: xinruyue
"""
from pymongo import MongoClient
db = MongoClient("10.8.8.111:27017")["eventsV35"]
events = db['eventV35']
def video_ids(eventKey,num):
pipeline = [
{"$match":{"eventKey":eventKey}},
{"$group": {"_id":None, "videoId": {"$push": "$eventValue.videoId"}}}]
data = list(events.aggregate(pipeline))[0]
videoId = data["videoId"]
sum_video = {}
for i in videoId:
sum_video[i] = videoId.count(i)
sortSV = sorted(sum_video.iteritems(), key=lambda d:d[1], reverse = True)
videoIds = []
for each in sortSV:
if sortSV.index(each) < num:
videoIds.append(each[0])
return(videoIds)
EA_lists = ["enterVideo","startVideo",["finishVideo","clickVideoExit"],
{"enterTAVideoExitPoll":["refuse","quit"]}]
EP_lists = ["enterVideo","startVideo",["finishVideo","clickVideoExit"],
{"enterTPVideoExitPoll":["refuse","quit"]}]
#get video_ids
videoA_ids = video_ids("enterTAVideoExitPoll",10)
videoP_ids = video_ids("enterTPVideoExitPoll",5)
def group_users(eventKey,eventValue,input_var,step_users,device_mark):
pipeline = [
{"$match": {"eventKey": eventKey, "eventValue.videoId":
{"$in": input_var['video_ids']},"user":{"$in":step_users}}},
{"$group": {"_id": None, "users": {"$addToSet": "$user"}}}]
if step_users == None:
del pipeline[0]["$match"]["user"]
if device_mark:
del pipeline[0]["$match"]["user"]
pipeline[0]["$match"]["device"]= {"$in":step_users}
if eventValue:
pipeline.insert(1,{"$match":{"eventValue."+eventKey:eventValue}})
full_users = list(events.aggregate(pipeline))[0]['user']
return(full_users)
def caculate_data(input_var,device_mark):
#第一个漏斗,进入视频
first_step = input_var["event_lists"][0]
full_users = group_users(first_step,None,input_var,None,device_mark)
result = {first_step:len(full_users)}
#其他漏斗
for each_step in input_var['event_lists'][1:]:
if type(each_step) == list:
for each in each_step:
each_step = each
this_step_users = group_users(each_step,None,input_var,full_users,device_mark)
result[each_step]= len(this_step_users)
if each_step[-1]:
full_users = this_step_users
if type(each_step) == dict:
this_event_key = each_step.keys()
event_values = each_step.values()[0]
for each in event_values:
this_event_value = each
this_step_users = group_users(this_event_key,this_event_value,input_var,full_users,device_mark)
result[this_event_key+'.'+this_event_value]=len(this_step_users)
else:
this_step_users = group_users(each_step,None,input_var,full_users,device_mark)
result[each_step]= len(this_step_users)
full_users = this_step_users
return(result)
def integrate_data(event_lists,videoX_ids):
input_var = {"event_lists":event_lists,"video_ids":videoX_ids}
caculate_data(input_var,device_mark)
if :
device_mark = True
result_A = integrate_data(EA_lists,videoA_ids)
result_P = integrate_data(EP_lists,videoP_ids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment