Skip to content

Instantly share code, notes, and snippets.

@altbdoor
Created September 29, 2018 14:57
Show Gist options
  • Select an option

  • Save altbdoor/dd8be9551bda4ecb7337d6092fa09c2e to your computer and use it in GitHub Desktop.

Select an option

Save altbdoor/dd8be9551bda4ecb7337d6092fa09c2e to your computer and use it in GitHub Desktop.
Python script to do some chat analytics
import datetime
import os
import re
import urllib.error
import urllib.request
def download_log_files(date_obj):
today = datetime.datetime.now()
today = today.replace(hour=0, minute=0, second=0, microsecond=0)
first_half_url = date_obj.strftime('%B%%20%Y')
second_half_url = date_obj.strftime('%Y-%m-%d')
log_file = f'data/{second_half_url}.txt'
if not os.path.exists(log_file):
if date_obj < today:
url = f'https://overrustlelogs.net/Singsing%20chatlog/{first_half_url}/{second_half_url}.txt'
try:
req = urllib.request.Request(url, headers={
# masquerade as IE6 KEK
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)',
})
with urllib.request.urlopen(req) as r, open(log_file, 'wb') as f:
f.write(r.read())
except urllib.error.HTTPError:
pass
chat_regex = re.compile(r'\[(.+?)\] (.+?): (.+)')
def parse_log_files(date_obj):
chat_data = []
log_file_path = f'data/{date_obj.strftime("%Y-%m-%d")}.txt'
try:
with open(log_file_path, 'r', encoding='utf-8') as f:
for line in f:
match = chat_regex.search(line)
if match and len(match.groups()) == 3:
chat_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S UTC')
chat_user = match.group(2)
chat_message = match.group(3)
chat_data.append({
'date': chat_date,
'user': chat_user,
'message': chat_message,
})
# else:
# raise Exception('Corrupted message format!')
except FileNotFoundError:
pass
return chat_data
def create_date_iterator(from_date, to_date=None):
from_date = datetime.datetime.strptime(from_date, '%Y-%m-%d')
from_date -= datetime.timedelta(days=1)
if to_date:
to_date = datetime.datetime.strptime(to_date, '%Y-%m-%d')
else:
to_date = datetime.datetime.now()
to_date = to_date.replace(hour=0, minute=0, second=0, microsecond=0)
to_date -= datetime.timedelta(days=1)
while from_date < to_date:
from_date += datetime.timedelta(days=1)
yield from_date
def get_top_100_for_dict(input_dict):
def _sort_fn(kv):
return kv[1]
output_list = sorted(input_dict.items(), key=_sort_fn, reverse=True)
return output_list[:100]
date_list = create_date_iterator('2018-08-01', '2018-08-31')
emote_list = ('PepeLaugh', 'Kekk', )
emote_regex = re.compile(f'(?:^| )({"|".join(emote_list)})(?: |$)')
for date_obj in date_list:
# used to download the log file
download_log_files(date_obj)
# parse the data
chat_data = parse_log_files(date_obj)
user_dict = {}
cmd_dict = {}
emote_dict = {}
for i in chat_data:
if i['user'] not in user_dict:
user_dict[i['user']] = 0
user_dict[i['user']] += 1
if i['message'].startswith('!'):
cmd = i['message'].split(' ')[0]
if cmd not in cmd_dict:
cmd_dict[cmd] = 0
cmd_dict[cmd] += 1
em_match = emote_regex.search(i['message'])
if em_match:
for em in em_match.groups():
if em not in emote_dict:
emote_dict[em] = 0
emote_dict[em] += 1
if chat_data:
emote_dict = get_top_100_for_dict(emote_dict)
print(emote_dict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment