Sh4kE · February 2, 2017 17:48
diff --git a/.gitignore b/.gitignore
 # Created by .ignore support plugin (hsz.mobi)
 ### VirtualEnv template
 # Virtualenv
 # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 .Python
 [Bb]in
 [Ii]nclude
 [Ll]ib
 [Ll]ib64
 [Ll]ocal
 [Ss]cripts
 pyvenv.cfg
 .venv
 pip-selfcheck.json
 ### Linux template
 *~

 # temporary files which can be created if a process still has a handle open of a deleted file
 .fuse_hidden*

 # KDE directory preferences
 .directory

 # Linux trash folder which might appear on any partition or disk
 .Trash-*

 # .nfs files are created when an open file is removed but is still being accessed
 .nfs*
 ### Example user template template
 ### Example user template

 # IntelliJ project files
 .idea
 *.iml
 out
 gen### Python template
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class

 # C extensions
 *.so

 # Distribution / packaging
 env/
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 *.egg-info/
 .installed.cfg
 *.egg

 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec

 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt

 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *,cover
 .hypothesis/

 # Translations
 *.mo
 *.pot

 # Django stuff:
 *.log
 local_settings.py

 # Flask stuff:
 instance/
 .webassets-cache

 # Scrapy stuff:
 .scrapy

 # Sphinx documentation
 docs/_build/

 # PyBuilder
 target/

 # Jupyter Notebook
 .ipynb_checkpoints

 # pyenv
 .python-version

 # celery beat schedule file
 celerybeat-schedule

 # dotenv
 .env

 # virtualenv
 .venv/
 venv/
 ENV/

 # Spyder project settings
 .spyderproject

 # Rope project settings
 .ropeproject
 ### JetBrains template
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

 # User-specific stuff:
 .idea/workspace.xml
 .idea/tasks.xml

 # Sensitive or high-churn files:
 .idea/dataSources/
 .idea/dataSources.ids
 .idea/dataSources.xml
 .idea/dataSources.local.xml
 .idea/sqlDataSources.xml
 .idea/dynamic.xml
 .idea/uiDesigner.xml

 # Gradle:
 .idea/gradle.xml
 .idea/libraries

 # Mongo Explorer plugin:
 .idea/mongoSettings.xml

 ## File-based project format:
 *.iws

 ## Plugin-specific files:

 # IntelliJ
 /out/

 # mpeltonen/sbt-idea plugin
 .idea_modules/

 # JIRA plugin
 atlassian-ide-plugin.xml

 # Crashlytics plugin (for Android Studio and IntelliJ)
 com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 fabric.properties

 # ofm transfers
 transfers/
diff --git a/convert_xls_to_csv.sh b/convert_xls_to_csv.sh
 #!/bin/bash
 for FILE in *.xls; do
    FILENAME=$(echo $FILE | awk '{split($0,a,"."); print a[1]}')
    if [ -e "$FILENAME.csv" ]
    then
        echo "File already exists: $FILENAME.csv"
    else
        # convert to csv
        libreoffice --headless --convert-to csv $FILE

        START=$(grep -n Nr. $FILENAME.csv | awk '{split($0,a,":"); print a[1]}')

        # drop first comment line
        tail -n +$START "$FILENAME.csv" > "$FILENAME.tmp"

        # remove csv in order to append to it later
        rm -f "$FILENAME.csv"

        # convert from windows encoding to utf-8
        iconv -f cp1252 -t utf-8  "$FILENAME.tmp" > "$FILENAME.csv"

        # delete temporary file
        rm -f "$FILENAME.tmp"
    fi
 done
diff --git a/player_prices.py b/player_prices.py
 import os

 import matplotlib.pyplot as plt
 import matplotlib.ticker as mtick
 import numpy as np
 import pandas as pd
 from matplotlib import style


 style.use('ggplot')

 TRANSFERS_DIR = 'transfers'


 def load_data():
    main_df = pd.DataFrame()
    for file in os.listdir(TRANSFERS_DIR):
        if file.endswith('csv'):
            df = pd.read_csv('{}/{}'.format(TRANSFERS_DIR, file),
                             index_col=0,
                             dtype={7: np.int32, 8: np.int32, 9: np.int32},
                             skip_blank_lines=True,
                             )
            df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True)

            df = df.rename(columns={df.columns[0]: "Matchday",
                                    df.columns[1]: "Season",
                                    df.columns[2]: "Position",
                                    df.columns[3]: "Age",
                                    df.columns[4]: "Strength",
                                    df.columns[5]: "Price",
                                    })

            if main_df.empty:
                main_df = df
            else:
                main_df = main_df.append(df)

    return main_df


 def filter_transfers(df, positions=None, ages=None, strengths=None, seasons=None, matchdays=None):
    filtered_df = df.copy()
    if positions:
        filtered_df = df[df.Position.isin(positions)]
    if ages:
        filtered_df = filtered_df[filtered_df.Age.isin(ages)]
    if strengths:
        filtered_df = filtered_df[filtered_df.Strength.isin(strengths)]
    if seasons:
        filtered_df = filtered_df[filtered_df.Season.isin(seasons)]
    if matchdays:
        filtered_df = filtered_df[filtered_df.Matchday.isin(matchdays)]
    return filtered_df


 def get_prices_grouped_by_strength(position='MS', age=33):
    df = filter_transfers(load_data(),
                          positions=[position],
                          ages=[age],
                          )

    return df.groupby('Strength').Price


 def get_prices_grouped_by_age(position='MS', strength=16):
    df = filter_transfers(load_data(),
                          positions=[position],
                          strengths=[strength],
                          )

    return df.groupby('Age').Price


 def draw_prices(prices, title, xlabel, ylabel='Preis'):
    x = np.array(prices.mean().index)
    y = prices.mean()
    yerr = prices.std()

    fig = plt.figure()
    ax = fig.add_subplot(111)

    plt.errorbar(x, y, yerr=yerr, fmt='o')

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)

    ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

    plt.show()

 if __name__ == '__main__':
    #draw_prices(get_prices_grouped_by_age(),
    #            title="Preise: MS/16er",
    #            xlabel='Alter')

    draw_prices(get_prices_grouped_by_strength(),
                title="Preise: MS, 33 Jahre",
                xlabel='Stärke')
diff --git a/requirements.txt b/requirements.txt
 pandas
 pandas-datareader
 matplotlib
 numpy
 BeautifulSoup4
 sklearn
 xlrd
	# Created by .ignore support plugin (hsz.mobi)
	### VirtualEnv template
	# Virtualenv
	# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
	.Python
	[Bb]in
	[Ii]nclude
	[Ll]ib
	[Ll]ib64
	[Ll]ocal
	[Ss]cripts
	pyvenv.cfg
	.venv
	pip-selfcheck.json
	### Linux template
	*~

	# temporary files which can be created if a process still has a handle open of a deleted file
	.fuse_hidden*

	# KDE directory preferences
	.directory

	# Linux trash folder which might appear on any partition or disk
	.Trash-*

	# .nfs files are created when an open file is removed but is still being accessed
	.nfs*
	### Example user template template
	### Example user template

	# IntelliJ project files
	.idea
	*.iml
	out
	gen### Python template
	# Byte-compiled / optimized / DLL files
	__pycache__/
	*.py[cod]
	*$py.class

	# C extensions
	*.so

	# Distribution / packaging
	env/
	build/
	develop-eggs/
	dist/
	downloads/
	eggs/
	.eggs/
	lib/
	lib64/
	parts/
	sdist/
	var/
	*.egg-info/
	.installed.cfg
	*.egg

	# PyInstaller
	# Usually these files are written by a python script from a template
	# before PyInstaller builds the exe, so as to inject date/other infos into it.
	*.manifest
	*.spec

	# Installer logs
	pip-log.txt
	pip-delete-this-directory.txt

	# Unit test / coverage reports
	htmlcov/
	.tox/
	.coverage
	.coverage.*
	.cache
	nosetests.xml
	coverage.xml
	*,cover
	.hypothesis/

	# Translations
	*.mo
	*.pot

	# Django stuff:
	*.log
	local_settings.py

	# Flask stuff:
	instance/
	.webassets-cache

	# Scrapy stuff:
	.scrapy

	# Sphinx documentation
	docs/_build/

	# PyBuilder
	target/

	# Jupyter Notebook
	.ipynb_checkpoints

	# pyenv
	.python-version

	# celery beat schedule file
	celerybeat-schedule

	# dotenv
	.env

	# virtualenv
	.venv/
	venv/
	ENV/

	# Spyder project settings
	.spyderproject

	# Rope project settings
	.ropeproject
	### JetBrains template
	# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
	# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

	# User-specific stuff:
	.idea/workspace.xml
	.idea/tasks.xml

	# Sensitive or high-churn files:
	.idea/dataSources/
	.idea/dataSources.ids
	.idea/dataSources.xml
	.idea/dataSources.local.xml
	.idea/sqlDataSources.xml
	.idea/dynamic.xml
	.idea/uiDesigner.xml

	# Gradle:
	.idea/gradle.xml
	.idea/libraries

	# Mongo Explorer plugin:
	.idea/mongoSettings.xml

	## File-based project format:
	*.iws

	## Plugin-specific files:

	# IntelliJ
	/out/

	# mpeltonen/sbt-idea plugin
	.idea_modules/

	# JIRA plugin
	atlassian-ide-plugin.xml

	# Crashlytics plugin (for Android Studio and IntelliJ)
	com_crashlytics_export_strings.xml
	crashlytics.properties
	crashlytics-build.properties
	fabric.properties

	# ofm transfers
	transfers/
	#!/bin/bash
	for FILE in *.xls; do
	FILENAME=$(echo $FILE \| awk '{split($0,a,"."); print a[1]}')
	if [ -e "$FILENAME.csv" ]
	then
	echo "File already exists: $FILENAME.csv"
	else
	# convert to csv
	libreoffice --headless --convert-to csv $FILE

	START=$(grep -n Nr. $FILENAME.csv \| awk '{split($0,a,":"); print a[1]}')

	# drop first comment line
	tail -n +$START "$FILENAME.csv" > "$FILENAME.tmp"

	# remove csv in order to append to it later
	rm -f "$FILENAME.csv"

	# convert from windows encoding to utf-8
	iconv -f cp1252 -t utf-8 "$FILENAME.tmp" > "$FILENAME.csv"

	# delete temporary file
	rm -f "$FILENAME.tmp"
	fi
	done
	import os

	import matplotlib.pyplot as plt
	import matplotlib.ticker as mtick
	import numpy as np
	import pandas as pd
	from matplotlib import style


	style.use('ggplot')

	TRANSFERS_DIR = 'transfers'


	def load_data():
	main_df = pd.DataFrame()
	for file in os.listdir(TRANSFERS_DIR):
	if file.endswith('csv'):
	df = pd.read_csv('{}/{}'.format(TRANSFERS_DIR, file),
	index_col=0,
	dtype={7: np.int32, 8: np.int32, 9: np.int32},
	skip_blank_lines=True,
	)
	df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True)

	df = df.rename(columns={df.columns[0]: "Matchday",
	df.columns[1]: "Season",
	df.columns[2]: "Position",
	df.columns[3]: "Age",
	df.columns[4]: "Strength",
	df.columns[5]: "Price",
	})

	if main_df.empty:
	main_df = df
	else:
	main_df = main_df.append(df)

	return main_df


	def filter_transfers(df, positions=None, ages=None, strengths=None, seasons=None, matchdays=None):
	filtered_df = df.copy()
	if positions:
	filtered_df = df[df.Position.isin(positions)]
	if ages:
	filtered_df = filtered_df[filtered_df.Age.isin(ages)]
	if strengths:
	filtered_df = filtered_df[filtered_df.Strength.isin(strengths)]
	if seasons:
	filtered_df = filtered_df[filtered_df.Season.isin(seasons)]
	if matchdays:
	filtered_df = filtered_df[filtered_df.Matchday.isin(matchdays)]
	return filtered_df


	def get_prices_grouped_by_strength(position='MS', age=33):
	df = filter_transfers(load_data(),
	positions=[position],
	ages=[age],
	)

	return df.groupby('Strength').Price


	def get_prices_grouped_by_age(position='MS', strength=16):
	df = filter_transfers(load_data(),
	positions=[position],
	strengths=[strength],
	)

	return df.groupby('Age').Price


	def draw_prices(prices, title, xlabel, ylabel='Preis'):
	x = np.array(prices.mean().index)
	y = prices.mean()
	yerr = prices.std()

	fig = plt.figure()
	ax = fig.add_subplot(111)

	plt.errorbar(x, y, yerr=yerr, fmt='o')

	plt.xlabel(xlabel)
	plt.ylabel(ylabel)
	plt.title(title)

	ax.yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

	plt.show()

	if __name__ == '__main__':
	#draw_prices(get_prices_grouped_by_age(),
	# title="Preise: MS/16er",
	# xlabel='Alter')

	draw_prices(get_prices_grouped_by_strength(),
	title="Preise: MS, 33 Jahre",
	xlabel='Stärke')
	pandas
	pandas-datareader
	matplotlib
	numpy
	BeautifulSoup4
	sklearn
	xlrd