altbdoor · September 29, 2018 14:57
diff --git a/stat.py b/stat.py
 import datetime
 import os
 import re
 import urllib.error
 import urllib.request


 def download_log_files(date_obj):
    today = datetime.datetime.now()
    today = today.replace(hour=0, minute=0, second=0, microsecond=0)

    first_half_url = date_obj.strftime('%B%%20%Y')
    second_half_url = date_obj.strftime('%Y-%m-%d')
    log_file = f'data/{second_half_url}.txt'

    if not os.path.exists(log_file):
        if date_obj < today:
            url = f'https://overrustlelogs.net/Singsing%20chatlog/{first_half_url}/{second_half_url}.txt'

            try:
                req = urllib.request.Request(url, headers={
                    # masquerade as IE6 KEK
                    'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)',
                })

                with urllib.request.urlopen(req) as r, open(log_file, 'wb') as f:
                    f.write(r.read())

            except urllib.error.HTTPError:
                pass


 chat_regex = re.compile(r'\[(.+?)\] (.+?): (.+)')


 def parse_log_files(date_obj):
    chat_data = []
    log_file_path = f'data/{date_obj.strftime("%Y-%m-%d")}.txt'

    try:
        with open(log_file_path, 'r', encoding='utf-8') as f:
            for line in f:
                match = chat_regex.search(line)

                if match and len(match.groups()) == 3:
                    chat_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S UTC')
                    chat_user = match.group(2)
                    chat_message = match.group(3)

                    chat_data.append({
                        'date': chat_date,
                        'user': chat_user,
                        'message': chat_message,
                    })
                # else:
                #     raise Exception('Corrupted message format!')

    except FileNotFoundError:
        pass

    return chat_data


 def create_date_iterator(from_date, to_date=None):
    from_date = datetime.datetime.strptime(from_date, '%Y-%m-%d')
    from_date -= datetime.timedelta(days=1)

    if to_date:
        to_date = datetime.datetime.strptime(to_date, '%Y-%m-%d')
    else:
        to_date = datetime.datetime.now()
        to_date = to_date.replace(hour=0, minute=0, second=0, microsecond=0)
        to_date -= datetime.timedelta(days=1)

    while from_date < to_date:
        from_date += datetime.timedelta(days=1)
        yield from_date


 def get_top_100_for_dict(input_dict):
    def _sort_fn(kv):
        return kv[1]

    output_list = sorted(input_dict.items(), key=_sort_fn, reverse=True)
    return output_list[:100]


 date_list = create_date_iterator('2018-08-01', '2018-08-31')
 emote_list = ('PepeLaugh', 'Kekk', )
 emote_regex = re.compile(f'(?:^| )({"|".join(emote_list)})(?: |$)')

 for date_obj in date_list:
    # used to download the log file
    download_log_files(date_obj)

    # parse the data
    chat_data = parse_log_files(date_obj)
    user_dict = {}
    cmd_dict = {}
    emote_dict = {}

    for i in chat_data:
        if i['user'] not in user_dict:
            user_dict[i['user']] = 0

        user_dict[i['user']] += 1

        if i['message'].startswith('!'):
            cmd = i['message'].split(' ')[0]

            if cmd not in cmd_dict:
                cmd_dict[cmd] = 0

            cmd_dict[cmd] += 1

        em_match = emote_regex.search(i['message'])
        if em_match:
            for em in em_match.groups():
                if em not in emote_dict:
                    emote_dict[em] = 0

                emote_dict[em] += 1

    if chat_data:
        emote_dict = get_top_100_for_dict(emote_dict)
        print(emote_dict)
	import datetime
	import os
	import re
	import urllib.error
	import urllib.request


	def download_log_files(date_obj):
	today = datetime.datetime.now()
	today = today.replace(hour=0, minute=0, second=0, microsecond=0)

	first_half_url = date_obj.strftime('%B%%20%Y')
	second_half_url = date_obj.strftime('%Y-%m-%d')
	log_file = f'data/{second_half_url}.txt'

	if not os.path.exists(log_file):
	if date_obj < today:
	url = f'https://overrustlelogs.net/Singsing%20chatlog/{first_half_url}/{second_half_url}.txt'

	try:
	req = urllib.request.Request(url, headers={
	# masquerade as IE6 KEK
	'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)',
	})

	with urllib.request.urlopen(req) as r, open(log_file, 'wb') as f:
	f.write(r.read())

	except urllib.error.HTTPError:
	pass


	chat_regex = re.compile(r'\[(.+?)\] (.+?): (.+)')


	def parse_log_files(date_obj):
	chat_data = []
	log_file_path = f'data/{date_obj.strftime("%Y-%m-%d")}.txt'

	try:
	with open(log_file_path, 'r', encoding='utf-8') as f:
	for line in f:
	match = chat_regex.search(line)

	if match and len(match.groups()) == 3:
	chat_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S UTC')
	chat_user = match.group(2)
	chat_message = match.group(3)

	chat_data.append({
	'date': chat_date,
	'user': chat_user,
	'message': chat_message,
	})
	# else:
	# raise Exception('Corrupted message format!')

	except FileNotFoundError:
	pass

	return chat_data


	def create_date_iterator(from_date, to_date=None):
	from_date = datetime.datetime.strptime(from_date, '%Y-%m-%d')
	from_date -= datetime.timedelta(days=1)

	if to_date:
	to_date = datetime.datetime.strptime(to_date, '%Y-%m-%d')
	else:
	to_date = datetime.datetime.now()
	to_date = to_date.replace(hour=0, minute=0, second=0, microsecond=0)
	to_date -= datetime.timedelta(days=1)

	while from_date < to_date:
	from_date += datetime.timedelta(days=1)
	yield from_date


	def get_top_100_for_dict(input_dict):
	def _sort_fn(kv):
	return kv[1]

	output_list = sorted(input_dict.items(), key=_sort_fn, reverse=True)
	return output_list[:100]


	date_list = create_date_iterator('2018-08-01', '2018-08-31')
	emote_list = ('PepeLaugh', 'Kekk', )
	emote_regex = re.compile(f'(?:^\| )({"\|".join(emote_list)})(?: \|$)')

	for date_obj in date_list:
	# used to download the log file
	download_log_files(date_obj)

	# parse the data
	chat_data = parse_log_files(date_obj)
	user_dict = {}
	cmd_dict = {}
	emote_dict = {}

	for i in chat_data:
	if i['user'] not in user_dict:
	user_dict[i['user']] = 0

	user_dict[i['user']] += 1

	if i['message'].startswith('!'):
	cmd = i['message'].split(' ')[0]

	if cmd not in cmd_dict:
	cmd_dict[cmd] = 0

	cmd_dict[cmd] += 1

	em_match = emote_regex.search(i['message'])
	if em_match:
	for em in em_match.groups():
	if em not in emote_dict:
	emote_dict[em] = 0

	emote_dict[em] += 1

	if chat_data:
	emote_dict = get_top_100_for_dict(emote_dict)
	print(emote_dict)
No results found