Created
September 8, 2015 21:08
-
-
Save lrvdijk/25c7e70d73b19af91b7c to your computer and use it in GitHub Desktop.
Hockey Team Stats generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Graph generator for wiebetaaltwat.nl | |
==================================== | |
Written by Lucas van Dijk <[email protected]> | |
""" | |
from bs4 import BeautifulSoup | |
from pickle import dump, load | |
import argparse | |
import sys | |
import re | |
participant_multiplier_re = re.compile(r'([0-9]+)x') | |
def create_database(args): | |
""" | |
Reads the given HTML files from wiebetaaltwat.nl, and puts all | |
data in a pickable object | |
""" | |
data = [] | |
for file in args.files: | |
if not file.endswith('.html'): | |
continue | |
soup = BeautifulSoup(open(file), "lxml") | |
payments_by = soup.find_all('td', 'payment-by') | |
descriptions = soup.find_all('td', 'description') | |
amounts = soup.find_all('td', 'amount') | |
participants = soup.find_all('td', 'participants') | |
for i in range(len(payments_by)): | |
data.append({ | |
'payment_by': str(payments_by[i].string).strip(), | |
'description': str(descriptions[i].string).strip(), | |
'amount': float(amounts[i].string.replace(',', '.')[2:]), | |
'participants': [participant_multiplier_re.sub('', x.replace('\n', '')).strip() | |
for x in participants[i].string.split(',')] | |
}) | |
with open(args.output, 'wb') as f: | |
dump(data, f) | |
def create_graphs(args): | |
# Import module given at command line | |
graphs = __import__(args.module) | |
data = None | |
with open(args.file, "rb") as f: | |
data = load(f) | |
funcs = args.graphs | |
if not funcs: | |
funcs = [func for func in dir(graphs) if func.endswith('_graph') and not func.startswith('_')] | |
for funcname in funcs: | |
if hasattr(graphs, funcname): | |
func = getattr(graphs, funcname) | |
fig = func(data) | |
fig.savefig(funcname + '.png') | |
if __name__ == '__main__': | |
argparser = argparse.ArgumentParser(description="Generate graphs from wiebetaaltwat.nl data") | |
subparsers = argparser.add_subparsers(title='Subcommands', | |
description='This program can either create the graphs, or update the database') | |
parser_graphs = subparsers.add_parser('graph') | |
parser_graphs.set_defaults(func=create_graphs) | |
parser_graphs.add_argument('-m', '--module', default="graphs", help="Python module where graph generation functions are located") | |
parser_graphs.add_argument('-f', '--file', default="data.pickle", help="The pickled database file") | |
parser_graphs.add_argument('graphs', nargs='?', help="Graph generation functions to execute") | |
parser_db = subparsers.add_parser('updatedb') | |
parser_db.set_defaults(func=create_database) | |
parser_db.add_argument("files", nargs='+', help='Files to read from, when creating the database') | |
parser_db.add_argument('-o', '--output', default='data.pickle', help='Output file for the database') | |
args = argparser.parse_args(sys.argv[1:]) | |
args.func(args) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Graph generator for wiebetaaltwat.nl | |
==================================== | |
Written by Lucas van Dijk <[email protected]> | |
""" | |
from pylab import * | |
import random | |
import string | |
def is_game(entry): | |
game = False | |
game_classifiers = ['wedstrijd', 'zondag', 'barendrecht', 'kratje'] | |
description = entry['description'].lower() | |
for classifier in game_classifiers: | |
if classifier in description: | |
game = True | |
break | |
return game | |
def is_training(entry): | |
training = False | |
training_classifiers = ['training', 'woensdag'] | |
description = entry['description'].lower() | |
for classifier in training_classifiers: | |
if classifier in description: | |
training = True | |
break | |
return training | |
def get_totals(data): | |
total = 0.0 | |
totals = {} | |
totals['wedstrijd'] = 0.0 | |
totals['training'] = 0.0 | |
totals['dixo'] = 0.0 | |
totals['toernooi'] = 0.0 | |
for entry in data: | |
total += entry['amount'] | |
if is_game(entry): | |
totals['wedstrijd'] += entry['amount'] | |
elif is_training(entry): | |
totals['training'] += entry['amount'] | |
elif 'dixo' in entry['description'].lower(): | |
totals['dixo'] += entry['amount'] | |
elif 'toernooi' in entry['description'].lower(): | |
totals['toernooi'] += entry['amount'] | |
else: | |
print(entry['description'], entry['amount']) | |
totals['overig'] = (total - totals['wedstrijd'] - | |
totals['training'] - totals['dixo'] - totals['toernooi']) | |
return (total, totals) | |
def total_graph(data): | |
""" | |
Calculates the total amount of euros spent, and breaks them down | |
into 'training' and 'wedstrijd'. | |
""" | |
(total, totals) = get_totals(data) | |
fractions = [ | |
(totals['wedstrijd'] / total) * 100, | |
(totals['training'] / total) * 100, | |
(totals['dixo'] / total) * 100, | |
(totals['toernooi'] / total) * 100, | |
(totals['overig'] / total) * 100 | |
] | |
labels = ['Wedstrijd', 'Training', 'Dixo', 'Toernooi', 'Overig'] | |
fig = figure(figsize=(10, 10)) | |
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) | |
ax.pie(fractions, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90) | |
ax.set_title('Totaalbedrag van {0:.2f} euro onderverdeeld'.format(total)) | |
return fig | |
def averages_game_training_graph(data): | |
""" | |
Calculates the average amount per person per training/game | |
""" | |
averages_training = [] | |
averages_game = [] | |
averages_dixo = [] | |
for entry in data: | |
average = entry['amount'] / len(entry['participants']) | |
if is_game(entry): | |
averages_game.append(average) | |
elif is_training(entry): | |
averages_training.append(average) | |
elif 'dixo' in entry['description'].lower(): | |
averages_dixo.append(average) | |
fig = figure(figsize=(10, 10)) | |
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) | |
average_training = sum(averages_training) / len(averages_training) | |
average_game = sum(averages_game) / len(averages_game) | |
average_dixo = sum(averages_dixo) / len(averages_dixo) | |
colors = ['r', 'g', 'b'] | |
ax.bar(range(3), [average_training, average_game, average_dixo], | |
align='center', color=colors) | |
ax.annotate('{0:.2f}'.format(average_training), xy=(0, average_training + 0.1)) | |
ax.annotate('{0:.2f}'.format(average_game), xy=(1, average_game + 0.1 )) | |
ax.annotate('{0:.2f}'.format(average_dixo), xy=(2, average_dixo + 0.1)) | |
ax.set_ylabel('Euro') | |
ax.set_title('Gemiddeld aantal euro per persoon per activiteit') | |
ax.set_xticks(range(3)) | |
ax.set_xticklabels(['Training', 'Wedstrijd', 'Dixo']) | |
fig.autofmt_xdate() | |
return fig | |
def most_payed_graph(data): | |
""" | |
Calculates which persons paid the most | |
""" | |
total_payed = {} | |
for entry in data: | |
average_per_person = entry['amount'] / len(entry['participants']) | |
for person in entry['participants']: | |
if not person in total_payed: | |
total_payed[person] = 0.0 | |
total_payed[person] += average_per_person | |
fig = figure(figsize=(17, 10)) | |
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) | |
colors = ['#{0}'.format("".join([hex(random.randrange(0, 255))[2:].zfill(2) for i in range(3)])) for person in total_payed] | |
ax.bar(range(len(total_payed)), total_payed.values(), align='center', color=colors) | |
i = 0 | |
for key, value in total_payed.items(): | |
ax.annotate('{0:.2f}'.format(value), xy=(i-0.5, value+3)) | |
i += 1 | |
ax.set_ylabel('Euro') | |
ax.set_title('Wie heeft in totaal het meest betaald') | |
ax.set_xticks(range(len(total_payed))) | |
ax.set_xticklabels(list(total_payed.keys())) | |
fig.autofmt_xdate() | |
return fig | |
def most_present_graph(data): | |
""" | |
Calculates which persons where present the most | |
""" | |
total_present = {} | |
for entry in data: | |
for person in entry['participants']: | |
if not person in total_present: | |
total_present[person] = 0 | |
total_present[person] += 1 | |
fig = figure(figsize=(17, 10)) | |
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) | |
colors = ['#{0}'.format("".join([hex(random.randrange(0, 255))[2:].zfill(2) for i in range(3)])) for person in total_present] | |
ax.bar(range(len(total_present)), total_present.values(), align='center', color=colors) | |
i = 0 | |
for key, value in total_present.items(): | |
ax.annotate('{0}'.format(value), xy=(i-0.3, value+3)) | |
i += 1 | |
ax.set_ylabel('Aantal keer') | |
ax.set_title('Wie betaalt er het vaakst mee op wiebetaaltwat.nl') | |
ax.set_xticks(range(len(total_present))) | |
ax.set_xticklabels(list(total_present.keys()), size='small') | |
fig.autofmt_xdate() | |
return fig | |
def most_buyed_graph(data): | |
""" | |
Calculates which persons buys the most | |
""" | |
total_buyed = {} | |
for entry in data: | |
if not entry['payment_by'] in total_buyed: | |
total_buyed[entry['payment_by']] = 0 | |
total_buyed[entry['payment_by']] += entry['amount'] | |
fig = figure(figsize=(17, 10)) | |
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) | |
colors = ['#{0}'.format("".join([hex(random.randrange(0, 255))[2:].zfill(2) for i in range(3)])) for person in total_buyed] | |
ax.bar(range(len(total_buyed)), total_buyed.values(), align='center', color=colors) | |
i = 0 | |
for key, value in total_buyed.items(): | |
ax.annotate('{0:.2f}'.format(value), xy=(i-0.5, value+5)) | |
i += 1 | |
ax.set_ylabel('Euro') | |
ax.set_title('Wie heeft het meeste geld ingelegd') | |
ax.set_xticks(range(len(total_buyed))) | |
ax.set_xticklabels(list(total_buyed.keys())) | |
fig.autofmt_xdate() | |
return fig | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
with open("data.pickle", "rb") as f: | |
data = pickle.load(f) | |
for row in data: | |
if row['payment_by'] == 'Lucas van Dijk': | |
print(row['amount'], row['description']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment