Skip to content

Instantly share code, notes, and snippets.

View StrikingLoo's full-sized avatar
😄

Luciano StrikingLoo

😄
View GitHub Profile
#--- top defenders ---
Dragon: 0.014
Ice: 0.024
Fairy: 0.030
Psychic: 0.031
Water: 0.032
Fire: 0.034
Normal: 0.041
Steel: 0.048
Dark: 0.049
import pandas as pd
import seaborn as sns
df = pd.read_csv('athlete_events.csv')
df.shape
#(271116, 15)
list(df)
#['ID','Name','Sex','Age','Height','Weight','Team','NOC','Games','Year','Season','City',
# 'Sport','Event','Medal']
def NaN_percent(df, column_name):
row_count = df[column_name].shape[0]
empty_values = row_count - df[column_name].count()
return (100.0*empty_values)/row_count
for i in list(df):
print(i +': ' + str(NaN_percent(df,i))+'%')
'''
0% incomplete columns omitted for brevity.
Age: 3.49444518214%
Height: 22.193821095%
total_rows = df.shape[0]
unique_athletes = len(df.Name.unique())
medal_winners = len(df[df.Medal.fillna('None')!='None'].Name.unique())
"{0} {1} {2}".format(total_rows, unique_athletes, medal_winners)
#'271116 134732 28202'
# See Medal distribution.
print(df[df.Medal.fillna('None')!='None'].Medal.value_counts())
# How many total medals.
df[df.Medal.fillna('None')!='None'].shape[0]
'''
Gold 13372
Bronze 13295
Silver 13116
Total: 39783
'''
team_medal_count = df.groupby(['Team','Medal']).Medal.agg('count')
# order them by quantity
team_medal_count = team_medal_count.reset_index(name='count').sort_values(['count'], ascending=False)
#team_medal_count.head(40) to show the first rows
def get_country_stats(country):
return team_medal_count[team_medal_count.Team==country]
# get_country_stats('some_country') to get that country's medals
female = df[df.Sex=='F']
year_count = female.groupby('Year').agg('count')
years = list(year_count.index)
counts = list(year_count.Name) #it doesnt matter which column we pick here,
#as long as its non-empty
sns.scatterplot(x = years, y = counts)
unique_women = len(df[df.Sex=='F'].Name.unique())
unique_men = len(df[df.Sex=='M'].Name.unique())
women_medals = df[df.Sex=='F'].Medal.count()
men_medals = df[df.Sex=='M'].Medal.count()
print("{} {} {} {} ".format(unique_women, unique_men, women_medals, men_medals ))
df[df.Sex=='F'].Year.min()
#33808 100979 11253 28530
f_year_count = df[df.Sex=='F'].groupby('Year').agg('count').Name
m_year_count = df[df.Sex=='M'].groupby('Year').agg('count').Name
(sns.scatterplot(data= m_year_count),
sns.scatterplot(data =f_year_count))