😄

Luciano StrikingLoo

😄

Computer Science B.Sc+M.Sc University of Buenos Aires. Software Developer @ Microsoft

StrikingLoo / results.py

Created September 2, 2018 18:33

StrikingLoo / initializing.py

Created September 10, 2018 22:25

	import pandas as pd
	import seaborn as sns

	df = pd.read_csv('athlete_events.csv')
	df.shape
	#(271116, 15)

StrikingLoo / list_columns.py

Created September 10, 2018 22:40

	list(df)
	#['ID','Name','Sex','Age','Height','Weight','Team','NOC','Games','Year','Season','City',
	# 'Sport','Event','Medal']

StrikingLoo / incompleteness_percent.py

Created September 10, 2018 22:45

	def NaN_percent(df, column_name):
	row_count = df[column_name].shape[0]
	empty_values = row_count - df[column_name].count()
	return (100.0*empty_values)/row_count
	for i in list(df):
	print(i +': ' + str(NaN_percent(df,i))+'%')
	'''
	0% incomplete columns omitted for brevity.
	Age: 3.49444518214%
	Height: 22.193821095%

StrikingLoo / checking_athletes_names.py

Last active September 10, 2018 22:58

	total_rows = df.shape[0]
	unique_athletes = len(df.Name.unique())
	medal_winners = len(df[df.Medal.fillna('None')!='None'].Name.unique())

	"{0} {1} {2}".format(total_rows, unique_athletes, medal_winners)

	#'271116 134732 28202'

StrikingLoo / how_many_medals.py

Created September 10, 2018 23:06

	# See Medal distribution.
	print(df[df.Medal.fillna('None')!='None'].Medal.value_counts())
	# How many total medals.
	df[df.Medal.fillna('None')!='None'].shape[0]
	'''
	Gold 13372
	Bronze 13295
	Silver 13116
	Total: 39783
	'''

StrikingLoo / medals_by_country.py

Created September 10, 2018 23:18

	team_medal_count = df.groupby(['Team','Medal']).Medal.agg('count')
	# order them by quantity
	team_medal_count = team_medal_count.reset_index(name='count').sort_values(['count'], ascending=False)
	#team_medal_count.head(40) to show the first rows

	def get_country_stats(country):
	return team_medal_count[team_medal_count.Team==country]
	# get_country_stats('some_country') to get that country's medals

StrikingLoo / female_participation.py

Created September 10, 2018 23:41

	female = df[df.Sex=='F']
	year_count = female.groupby('Year').agg('count')
	years = list(year_count.index)
	counts = list(year_count.Name) #it doesnt matter which column we pick here,
	#as long as its non-empty
	sns.scatterplot(x = years, y = counts)

StrikingLoo / female_counts.py

Created September 10, 2018 23:42

	unique_women = len(df[df.Sex=='F'].Name.unique())
	unique_men = len(df[df.Sex=='M'].Name.unique())
	women_medals = df[df.Sex=='F'].Medal.count()
	men_medals = df[df.Sex=='M'].Medal.count()

	print("{} {} {} {} ".format(unique_women, unique_men, women_medals, men_medals ))

	df[df.Sex=='F'].Year.min()

	#33808 100979 11253 28530

StrikingLoo / sexes_over_time.py

Created September 10, 2018 23:56

	f_year_count = df[df.Sex=='F'].groupby('Year').agg('count').Name
	m_year_count = df[df.Sex=='M'].groupby('Year').agg('count').Name
	(sns.scatterplot(data= m_year_count),
	sns.scatterplot(data =f_year_count))