Last active
February 2, 2017 17:48
-
-
Save Sh4kE/99777bc5f40a4708e891447e0e9f5a06 to your computer and use it in GitHub Desktop.
python scripts for extracting player prices from ofm transfer xls'
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Created by .ignore support plugin (hsz.mobi) | |
### VirtualEnv template | |
# Virtualenv | |
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ | |
.Python | |
[Bb]in | |
[Ii]nclude | |
[Ll]ib | |
[Ll]ib64 | |
[Ll]ocal | |
[Ss]cripts | |
pyvenv.cfg | |
.venv | |
pip-selfcheck.json | |
### Linux template | |
*~ | |
# temporary files which can be created if a process still has a handle open of a deleted file | |
.fuse_hidden* | |
# KDE directory preferences | |
.directory | |
# Linux trash folder which might appear on any partition or disk | |
.Trash-* | |
# .nfs files are created when an open file is removed but is still being accessed | |
.nfs* | |
### Example user template template | |
### Example user template | |
# IntelliJ project files | |
.idea | |
*.iml | |
out | |
gen### Python template | |
# Byte-compiled / optimized / DLL files | |
__pycache__/ | |
*.py[cod] | |
*$py.class | |
# C extensions | |
*.so | |
# Distribution / packaging | |
env/ | |
build/ | |
develop-eggs/ | |
dist/ | |
downloads/ | |
eggs/ | |
.eggs/ | |
lib/ | |
lib64/ | |
parts/ | |
sdist/ | |
var/ | |
*.egg-info/ | |
.installed.cfg | |
*.egg | |
# PyInstaller | |
# Usually these files are written by a python script from a template | |
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |
*.manifest | |
*.spec | |
# Installer logs | |
pip-log.txt | |
pip-delete-this-directory.txt | |
# Unit test / coverage reports | |
htmlcov/ | |
.tox/ | |
.coverage | |
.coverage.* | |
.cache | |
nosetests.xml | |
coverage.xml | |
*,cover | |
.hypothesis/ | |
# Translations | |
*.mo | |
*.pot | |
# Django stuff: | |
*.log | |
local_settings.py | |
# Flask stuff: | |
instance/ | |
.webassets-cache | |
# Scrapy stuff: | |
.scrapy | |
# Sphinx documentation | |
docs/_build/ | |
# PyBuilder | |
target/ | |
# Jupyter Notebook | |
.ipynb_checkpoints | |
# pyenv | |
.python-version | |
# celery beat schedule file | |
celerybeat-schedule | |
# dotenv | |
.env | |
# virtualenv | |
.venv/ | |
venv/ | |
ENV/ | |
# Spyder project settings | |
.spyderproject | |
# Rope project settings | |
.ropeproject | |
### JetBrains template | |
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm | |
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | |
# User-specific stuff: | |
.idea/workspace.xml | |
.idea/tasks.xml | |
# Sensitive or high-churn files: | |
.idea/dataSources/ | |
.idea/dataSources.ids | |
.idea/dataSources.xml | |
.idea/dataSources.local.xml | |
.idea/sqlDataSources.xml | |
.idea/dynamic.xml | |
.idea/uiDesigner.xml | |
# Gradle: | |
.idea/gradle.xml | |
.idea/libraries | |
# Mongo Explorer plugin: | |
.idea/mongoSettings.xml | |
## File-based project format: | |
*.iws | |
## Plugin-specific files: | |
# IntelliJ | |
/out/ | |
# mpeltonen/sbt-idea plugin | |
.idea_modules/ | |
# JIRA plugin | |
atlassian-ide-plugin.xml | |
# Crashlytics plugin (for Android Studio and IntelliJ) | |
com_crashlytics_export_strings.xml | |
crashlytics.properties | |
crashlytics-build.properties | |
fabric.properties | |
# ofm transfers | |
transfers/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
for FILE in *.xls; do | |
FILENAME=$(echo $FILE | awk '{split($0,a,"."); print a[1]}') | |
if [ -e "$FILENAME.csv" ] | |
then | |
echo "File already exists: $FILENAME.csv" | |
else | |
# convert to csv | |
libreoffice --headless --convert-to csv $FILE | |
START=$(grep -n Nr. $FILENAME.csv | awk '{split($0,a,":"); print a[1]}') | |
# drop first comment line | |
tail -n +$START "$FILENAME.csv" > "$FILENAME.tmp" | |
# remove csv in order to append to it later | |
rm -f "$FILENAME.csv" | |
# convert from windows encoding to utf-8 | |
iconv -f cp1252 -t utf-8 "$FILENAME.tmp" > "$FILENAME.csv" | |
# delete temporary file | |
rm -f "$FILENAME.tmp" | |
fi | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import matplotlib.pyplot as plt | |
import matplotlib.ticker as mtick | |
import numpy as np | |
import pandas as pd | |
from matplotlib import style | |
style.use('ggplot') | |
TRANSFERS_DIR = 'transfers' | |
def load_data(): | |
main_df = pd.DataFrame() | |
for file in os.listdir(TRANSFERS_DIR): | |
if file.endswith('csv'): | |
df = pd.read_csv('{}/{}'.format(TRANSFERS_DIR, file), | |
index_col=0, | |
dtype={7: np.int32, 8: np.int32, 9: np.int32}, | |
skip_blank_lines=True, | |
) | |
df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True) | |
df = df.rename(columns={df.columns[0]: "Matchday", | |
df.columns[1]: "Season", | |
df.columns[2]: "Position", | |
df.columns[3]: "Age", | |
df.columns[4]: "Strength", | |
df.columns[5]: "Price", | |
}) | |
if main_df.empty: | |
main_df = df | |
else: | |
main_df = main_df.append(df) | |
return main_df | |
def filter_transfers(df, positions=None, ages=None, strengths=None, seasons=None, matchdays=None): | |
filtered_df = df.copy() | |
if positions: | |
filtered_df = df[df.Position.isin(positions)] | |
if ages: | |
filtered_df = filtered_df[filtered_df.Age.isin(ages)] | |
if strengths: | |
filtered_df = filtered_df[filtered_df.Strength.isin(strengths)] | |
if seasons: | |
filtered_df = filtered_df[filtered_df.Season.isin(seasons)] | |
if matchdays: | |
filtered_df = filtered_df[filtered_df.Matchday.isin(matchdays)] | |
return filtered_df | |
def get_prices_grouped_by_strength(position='MS', age=33): | |
df = filter_transfers(load_data(), | |
positions=[position], | |
ages=[age], | |
) | |
return df.groupby('Strength').Price | |
def get_prices_grouped_by_age(position='MS', strength=16): | |
df = filter_transfers(load_data(), | |
positions=[position], | |
strengths=[strength], | |
) | |
return df.groupby('Age').Price | |
def draw_prices(prices, title, xlabel, ylabel='Preis'): | |
x = np.array(prices.mean().index) | |
y = prices.mean() | |
yerr = prices.std() | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
plt.errorbar(x, y, yerr=yerr, fmt='o') | |
plt.xlabel(xlabel) | |
plt.ylabel(ylabel) | |
plt.title(title) | |
ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}')) | |
plt.show() | |
if __name__ == '__main__': | |
#draw_prices(get_prices_grouped_by_age(), | |
# title="Preise: MS/16er", | |
# xlabel='Alter') | |
draw_prices(get_prices_grouped_by_strength(), | |
title="Preise: MS, 33 Jahre", | |
xlabel='Stärke') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pandas | |
pandas-datareader | |
matplotlib | |
numpy | |
BeautifulSoup4 | |
sklearn | |
xlrd |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment