Created
April 12, 2013 21:54
-
-
Save Zenuncl/5375461 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -* | |
######################################################################### | |
#假设类似CTRL + A这样的组合按键算一次 | |
#实现思路: | |
#首先开始直接输出A的个数必须为3,4,5中的一个,因为1和2对其做复制得不偿失,而6的时候已经相当于3个A作一次复制 | |
#(在测试时发现使M最大的情况中直接输出5个A的情况没有出现过,但是没有找到什么理由不去考虑5个A,也许N足够大会用到5?) | |
#其次,输出A后需要紧接着作一次“全选复制粘贴” | |
#第三,再往后需要“全选复制粘贴”和单独“粘贴”做任意顺序组合,找出实现最大化的情况 | |
#ps:这个程序在跑到N = 38的时候我的笔记本就很吃力了,所以只测试了38以下 | |
#ps:发现规律:按键基本形式为4A(or 3A) 3 2 3 2 3 2 。。。(4A代表直接输出4个A,3代表全选复制粘贴,2代表单独两次粘贴) | |
######################################################################### | |
#需用到itertools中的 组合 | |
from itertools import combinations | |
#定义全局变量 | |
N = 0 #按键次数 | |
remaining_num = N #剩余按键次数,随着按键增加递减 | |
key_list = [] #记录按键 | |
clipboard = 0 #剪贴板 | |
result = 0 #每种情况下输出A的数量 | |
result_lsit = [] #结果,记录所有情况和按键次数 | |
#将结果放到结果list中 | |
def push_to_result(): | |
my_dict = {} | |
my_dict["key_list"] = key_list | |
my_dict["length"] = result | |
result_lsit.append(my_dict) | |
#全局变量初始化 | |
def init(): | |
global N,clipboard,result,key_list,remaining_num | |
remaining_num = N | |
key_list = [] | |
clipboard = 0 | |
result = 0 | |
#以下函数模拟真实的CTRL+A CTRL+C CTRL+V 和 A | |
def press_ctrl_a(): | |
global remaining_num | |
remaining_num = remaining_num - 1 | |
key_list.append("CTRL+A") | |
def press_ctrl_c(): | |
global remaining_num,clipboard | |
remaining_num = remaining_num - 1 | |
key_list.append("CTRL+C") | |
clipboard = result | |
def press_ctrl_v(): | |
global remaining_num,result | |
remaining_num = remaining_num - 1 | |
key_list.append("CTRL+V") | |
result += clipboard | |
def press_a(): | |
global remaining_num,result | |
remaining_num = remaining_num - 1 | |
key_list.append("A") | |
result += 1 | |
#定义一个原子操作,完成全选复制粘贴功能 | |
def do_copy(): | |
press_ctrl_a() | |
press_ctrl_c() | |
press_ctrl_v() | |
def main(): | |
global N,remaining_num,key_list,result,clipboard | |
N = int(raw_input("Please type in N and press Enter \n")) | |
remaining_num = N | |
#1到6直接按A即可 | |
if remaining_num < 7: | |
for x in range(0,N): | |
press_a() | |
#N = 7单列出来,因为开头输入5个A的时候不能完成一次“全选复制粘贴” | |
elif remaining_num == 7: | |
for x in range(0,3): | |
press_a() | |
do_copy() | |
press_ctrl_v() | |
#N > 7的时候进入循环迭代,遍历各种情况 | |
else: | |
#遍历开始输出3个A、4个A、5个A的情况 | |
for x in range(3,5): | |
init()#每循环一次初始化一次全局变量 | |
for a_num in range(0,x): | |
press_a() | |
do_copy() | |
#如果剩余按键次数不能容纳一次“全选复制粘贴”,就只做粘贴 | |
if remaining_num < 3: | |
for ctrl_v_count in range(0,remaining_num): | |
press_ctrl_v() | |
push_to_result() | |
#否则 | |
else: | |
#进入分支,以下四句备份全局变量 | |
remaining_num_bak = remaining_num | |
key_list_bak = key_list[:] | |
clipboard_bak = clipboard | |
result_bak = result | |
#将do_copy()和press_ctrl_v()任意顺序组合 | |
for do_copy_num in range(0,remaining_num / 3 + 1): | |
ctrl_v_num = remaining_num - do_copy_num * 3 | |
key_list_temp = [index for index in range(0,do_copy_num + ctrl_v_num)] | |
for do_copy_position in combinations(key_list_temp,do_copy_num): | |
for index in range(0,do_copy_num + ctrl_v_num): | |
if index in do_copy_position: | |
do_copy() | |
else: | |
press_ctrl_v() | |
push_to_result() | |
#以下四句恢复全局变量 | |
remaining_num = remaining_num_bak | |
key_list = key_list_bak[:] | |
clipboard = clipboard_bak | |
result = result_bak | |
#打印出结果 | |
max_length = max([item["length"] for item in result_lsit]) | |
print "\nHere is the key list:" | |
for item in result_lsit: | |
if item["length"] == max_length: | |
print item["key_list"] | |
print "The result M is %d "%max_length | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -* | |
######################################################################### | |
#假设两次以上包含两次 | |
#假设第三条“所有以上用户指”满足1,2条件的用户 | |
#这种方法对所有日志文件遍历了两次 | |
#第一次遍历找出符合要求的用户,第二次遍历通过已得出的用户列表找这些人每天访问两次的路径 | |
#ps:这种方法可能会消耗较多时间而占用较小内存 | |
######################################################################### | |
log_path = "./" #日志文件路径 | |
result_user_list=[] #记录符合要求的用户 | |
#找出符合要求用户 | |
#第一天,初始化 result_user_list | |
day = 1 | |
user_dict = {} #形如{"1":"/topic/XXX"},用于找出当天访问不同topic的用户 | |
for hour in range(0,24): | |
file_name = log_path + "2013-01-%02d-%02d.log"%(day,hour) | |
log_file = open(file_name) | |
for line in log_file: | |
user_id = line.split()[3] | |
request_url = line.split()[6] | |
#如果用户访问了topic | |
if "/topic/" in request_url and user_id not in result_user_list: | |
#如果用户访问过topic | |
if user_id in user_dict: | |
#如果用户访问的topic存在不同 | |
if user_dict[user_id] != request_url: | |
result_user_list.append(user_id) | |
del(user_dict[user_id]) #符合要求用户入列后,删除dict中记录,释放内存 | |
else: | |
user_dict[user_id] = request_url | |
log_file.close() | |
#从第二天开始循环 | |
for day in range(1,31): | |
user_dict = {} #形如{"1":"/topic/XXX"},用于找出当天访问不同topic的用户 | |
for hour in range(0,24): | |
file_name = log_path + "2013-01-%02d-%02d.log"%(day,hour) | |
log_file = open(file_name) | |
temp_user_id_list = [] | |
for line in log_file: | |
user_id = line.split()[3] | |
request_url = line.split()[6] | |
#如果用户前一天满足要求,且今天又访问了topic | |
if user_id in result_user_list and user_id not in temp_user_id_list and "/topic/" in request_url: | |
#如果用户今天访问过topic | |
if user_id in user_dict: | |
#如果用户今天访问了不同的topic | |
if user_dict[user_id] != request_url: | |
temp_user_id_list.append(user_id) | |
del(user_dict[user_id]) #符合要求用户入列后,删除dict中记录,释放内存 | |
else: | |
user_dict[user_id] = request_url | |
log_file.close() | |
result_user_list = temp_user_id_list[:] | |
#找出符合要求的路径 | |
path_set = set() | |
for day in range(1,4): | |
path_set_each_day = set() | |
path_list_each_day = [] | |
for hour in range(18,19): | |
file_name = log_path + "2013-01-%02d-%02d.log"%(day,hour) | |
log_file = open(file_name) | |
for line in log_file: | |
user_id = line.split()[3] | |
request_url = line.split()[6] | |
if user_id in result_user_list: | |
path_list_each_day.append(request_url) | |
log_file.close() | |
#将每天符合要求路径放入集合中 | |
path_set_each_day = set([item for item in path_list_each_day if path_list_each_day.count(item) >= 2]) | |
#若为第一天就初始化set | |
if day == 1: | |
path_set = path_set_each_day | |
#否则,每天取交集的,得结果 | |
else: | |
path_set = path_set & path_set_each_day | |
print "UserList:" | |
print result_user_list | |
print "PathList" | |
print list(path_set) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -* | |
######################################################################### | |
#假设两次以上包含两次 | |
#假设第三条“所有以上用户”指满足1,2条件的用户 | |
#首先讲解下我的实现思路: | |
#这种方法通过只需对所有文件遍历一次即可得出结果,要实现这种效果需要在内存中维护一个dictionary,格式如下: | |
#{ | |
#"user_id1":[["/topic/XXX","/topic/XXX","/question/XXX"...],[...],[...]...共三十天共30个list], | |
#"user_id2":[["/topic/XXX","/topic/XXX","/question/XXX"...],[...],[...]...共三十天共30个list] | |
#} | |
#要求以每天为单位,所以按天(每24个文件)对这个dictionary的项进行增删 | |
#每遍历一天,以用户id为key的list就增加一个元素,记录他这一天访问的路径 | |
#同时,每遍历一天就依据所给前两个条件对这个dictionary进行check,删除不符合要求的记录 | |
#因此,这个dictionary是横向增长而纵向收缩的,遍历第一天并check后记录最多,以后只需记录前一天符合要求的user_id的信息, | |
#也就是这个dictionary现存的信息 | |
#文件遍历完成,这个dictionary的keys就是所有符合条件的用户id,然后按天遍历这个dictionary,把每一天访问两次以上的路径取交集,就是这些人每天都会访问两次以上的路径 | |
#ps:这种方法一次文件遍历可以解决问题,但是可能需要内存较大 | |
######################################################################### | |
log_path = "./" #日志文件路径 | |
user_info_dict = {} #需要在内存中维护的dictionary,记录符合前两个符合要求的用户的路径访问信息 | |
#用来对user_info_dict进行check,删除不合条件记录 | |
#index:记录当天序号的下标,第一天为0 | |
def check_user(user_info_dict,index): | |
for key in user_info_dict.keys(): | |
path_len = len(set([path for path in user_info_dict[key][index] if path.startswith("/topic")]))#一个人访问的不同的topic个数 | |
if len(user_info_dict[key]) < day or path_len < 2: | |
del(user_info_dict[key]) | |
#第一天,用当天数据初始化dict | |
day = 1 | |
index = day - 1 #list下标,从0开始,第一天的数据就是0,第二天为1,依此类推 | |
for hour in range(0,24): | |
file_name = log_path + "2013-01-%02d-%02d.log"%(day,hour) | |
log_file = open(file_name) | |
for line in log_file: | |
user_id = line.split()[3] #获取user_id | |
path = line.split()[6] #获取path | |
#如果还不存在就初始化一条记录 | |
if user_id not in user_info_dict: | |
user_info_dict[user_id] = [] | |
user_info_dict[user_id].append([]) | |
user_info_dict[user_id][index].append(path) #将这个人的路径信息存如dict | |
log_file.close() | |
check_user(user_info_dict,index) #对第一天数据进行检查,删除不合条件者 | |
#从第二天开始进入天数for循环 | |
for day in range(1,31): | |
index = day - 1 | |
for hour in range(0,24): | |
file_name = log_path + "2013-01-%02d-%02d.log"%(day,hour) | |
log_file = open(file_name) | |
for line in log_file: | |
user_id = line.split()[3] | |
#只需要关心前几天的符合要求的用户 | |
if user_id in user_info_dict: | |
path = line.split()[6] | |
if len(user_info_dict[user_id]) == index: | |
user_info_dict[user_id].append([]) | |
user_info_dict[user_id][index].append(path) | |
log_file.close() | |
check_user(user_info_dict,index) #一天循环结束,运行检查 | |
print "UserList:" | |
print user_info_dict.keys() | |
#获取这些用户每天都要访问的两次以上的path | |
path_set = set() #使用集合,以便进行交集操作 | |
for index in xrange(0,2): | |
path_set_each_day = set() | |
path_list = [] | |
#把一天内所有符合要求用户的path汇到一个list | |
for user_id in user_info_dict: | |
path_list.extend(user_info_dict[user_id][index]) | |
#找到list中访问两次以上的url放到当天的set中 | |
for path in path_list: | |
if path_list.count(path) >= 2 and path not in path_set_each_day: | |
path_set_each_day.add(path) | |
#如果为第一天,就初始化set | |
if index == 0: | |
path_set = path_set_each_day | |
#否则每天路径都做交集,得出结果 | |
else: | |
path_set = path_set_each_day & path_set | |
print "PathList:" | |
print list(path_set) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment