JonathanLoscalzo · April 25, 2019 14:47
diff --git a/00-readme.md b/00-readme.md
diff --git a/advantage_influence_host_country.py b/advantage_influence_host_country.py
 # Import pandas
 import pandas as pd

 # Left join editions and ioc_codes: hosts
 hosts = pd.merge(editions, ioc_codes, how='left')

 # Extract relevant columns and set index: hosts
 hosts = hosts[['Edition', 'NOC']].set_index('Edition')

 # Fix missing 'NOC' values of hosts
 print(hosts.loc[hosts.NOC.isnull()])
 hosts.loc[1972, 'NOC'] = 'FRG'
 hosts.loc[1980, 'NOC'] = 'URS'
 hosts.loc[1988, 'NOC'] = 'KOR'

 # Reset Index of hosts: hosts
 hosts.reset_index(inplace=True)

 # Print hosts
 print(hosts)
 # =============================================
 # Import pandas
 import pandas as pd

 # Reshape fractions_change: reshaped
 reshaped = fractions_change.melt(id_vars='Edition', value_name="Change")

 # Print reshaped.shape and fractions_change.shape
 print(reshaped.shape, fractions_change.shape)

 # Extract rows from reshaped where 'NOC' == 'CHN': chn
 chn = reshaped[reshaped['NOC'] == 'CHN']

 # Print last 5 rows of chn with .tail()
 print(chn.tail())

 #=============================================
 # Merge reshaped and hosts: merged
 merged = pd.merge(reshaped, hosts, how='inner')

 # Print first 5 rows of merged
 print(merged.head())

 # Set Index of merged and sort it: influence
 influence = merged.set_index('Edition').sort_index()

 # Print first 5 rows of influence
 print(influence.head())
 #==============================================
 # Import pyplot
 import matplotlib.pyplot as plt

 # Extract influence['Change']: change
 change = influence['Change']

 # Make bar plot of change: ax
 ax = change.plot(kind='bar')

 # Customize the plot to improve readability
 ax.set_ylabel("% Change of Host Country Medal Count")
 ax.set_title("Is there a Host Country Advantage?")
 ax.set_xticklabels(editions['City'])

 # Display the plot
 plt.show()
diff --git a/count_medals_per_country_per_edition b/count_medals_per_country_per_edition
 """
 Counting medals by country/edition in a pivot table
 Here, you'll start with the concatenated DataFrame medals from the previous exercise.

 You can construct a pivot table to see the number of medals each country won in each year. The result is a new DataFrame with the Olympic edition on the Index and with 138 country NOC codes as columns. 
 If you want a refresher on pivot tables, it may be useful to refer back to the relevant exercise

 """

 # Construct the pivot_table: medal_counts
 medal_counts = medals.pivot_table(index='Edition', values='Athlete', columns='NOC', aggfunc='count')
diff --git a/expanding.py b/expanding.py
 #https://stackoverflow.com/questions/45370666/what-are-pandas-expanding-window-functions
 """

 Computing percentage change in fraction of medals won
 Here, you'll start with the DataFrames editions, medals, medal_counts, & fractions from prior exercises.

 To see if there is a host country advantage, you first want to see how the fraction of medals won changes from edition to edition.

 The expanding mean provides a way to see this down each column. 
 It is the value of the mean with all the data available up to that point in time. 
 If you are interested in learning more about pandas' expanding transformations, 
 this section of the pandas documentation has additional information.
 """
 #Apply the expanding mean: mean_fractions
 mean_fractions = fractions.expanding().mean()

 # Compute the percentage change: fractions_change
 fractions_change = mean_fractions.pct_change() * 100

 # Reset the index of fractions_change: fractions_change
 fractions_change = fractions_change.reset_index()

 # Print first & last 5 rows of fractions_change
 print(fractions_change.head())
 print(fractions_change.tail())
diff --git a/fraction_per_country b/fraction_per_country
 """
 Computing fraction of medals per Olympic edition
 In this exercise, you'll start with the DataFrames editions, medals, & medal_counts from prior exercises.

 You can extract a Series with the total number of medals awarded in each Olympic edition.

 The DataFrame medal_counts can be divided row-wise by the total number of medals awarded each edition; the method .divide() performs the broadcast as you require.

 This gives you a normalized indication of each country's performance in each edition.
 """

 # Set Index of editions: totals
 totals = editions.set_index('Edition')

 # Reassign totals['Grand Total']: totals
 totals = totals['Grand Total']

 # Divide medal_counts by totals: fractions
 fractions = medal_counts.divide(totals, axis='rows')

 # Print first & last 5 rows of fractions
 print(fractions.head())
	# Import pandas
	import pandas as pd

	# Left join editions and ioc_codes: hosts
	hosts = pd.merge(editions, ioc_codes, how='left')

	# Extract relevant columns and set index: hosts
	hosts = hosts[['Edition', 'NOC']].set_index('Edition')

	# Fix missing 'NOC' values of hosts
	print(hosts.loc[hosts.NOC.isnull()])
	hosts.loc[1972, 'NOC'] = 'FRG'
	hosts.loc[1980, 'NOC'] = 'URS'
	hosts.loc[1988, 'NOC'] = 'KOR'

	# Reset Index of hosts: hosts
	hosts.reset_index(inplace=True)

	# Print hosts
	print(hosts)
	# =============================================
	# Import pandas
	import pandas as pd

	# Reshape fractions_change: reshaped
	reshaped = fractions_change.melt(id_vars='Edition', value_name="Change")

	# Print reshaped.shape and fractions_change.shape
	print(reshaped.shape, fractions_change.shape)

	# Extract rows from reshaped where 'NOC' == 'CHN': chn
	chn = reshaped[reshaped['NOC'] == 'CHN']

	# Print last 5 rows of chn with .tail()
	print(chn.tail())

	#=============================================
	# Merge reshaped and hosts: merged
	merged = pd.merge(reshaped, hosts, how='inner')

	# Print first 5 rows of merged
	print(merged.head())

	# Set Index of merged and sort it: influence
	influence = merged.set_index('Edition').sort_index()

	# Print first 5 rows of influence
	print(influence.head())
	#==============================================
	# Import pyplot
	import matplotlib.pyplot as plt

	# Extract influence['Change']: change
	change = influence['Change']

	# Make bar plot of change: ax
	ax = change.plot(kind='bar')

	# Customize the plot to improve readability
	ax.set_ylabel("% Change of Host Country Medal Count")
	ax.set_title("Is there a Host Country Advantage?")
	ax.set_xticklabels(editions['City'])

	# Display the plot
	plt.show()
	"""
	Counting medals by country/edition in a pivot table
	Here, you'll start with the concatenated DataFrame medals from the previous exercise.

	You can construct a pivot table to see the number of medals each country won in each year. The result is a new DataFrame with the Olympic edition on the Index and with 138 country NOC codes as columns.
	If you want a refresher on pivot tables, it may be useful to refer back to the relevant exercise

	"""

	# Construct the pivot_table: medal_counts
	medal_counts = medals.pivot_table(index='Edition', values='Athlete', columns='NOC', aggfunc='count')
	#https://stackoverflow.com/questions/45370666/what-are-pandas-expanding-window-functions
	"""

	Computing percentage change in fraction of medals won
	Here, you'll start with the DataFrames editions, medals, medal_counts, & fractions from prior exercises.

	To see if there is a host country advantage, you first want to see how the fraction of medals won changes from edition to edition.

	The expanding mean provides a way to see this down each column.
	It is the value of the mean with all the data available up to that point in time.
	If you are interested in learning more about pandas' expanding transformations,
	this section of the pandas documentation has additional information.
	"""
	#Apply the expanding mean: mean_fractions
	mean_fractions = fractions.expanding().mean()

	# Compute the percentage change: fractions_change
	fractions_change = mean_fractions.pct_change() * 100

	# Reset the index of fractions_change: fractions_change
	fractions_change = fractions_change.reset_index()

	# Print first & last 5 rows of fractions_change
	print(fractions_change.head())
	print(fractions_change.tail())
	"""
	Computing fraction of medals per Olympic edition
	In this exercise, you'll start with the DataFrames editions, medals, & medal_counts from prior exercises.

	You can extract a Series with the total number of medals awarded in each Olympic edition.

	The DataFrame medal_counts can be divided row-wise by the total number of medals awarded each edition; the method .divide() performs the broadcast as you require.

	This gives you a normalized indication of each country's performance in each edition.
	"""

	# Set Index of editions: totals
	totals = editions.set_index('Edition')

	# Reassign totals['Grand Total']: totals
	totals = totals['Grand Total']

	# Divide medal_counts by totals: fractions
	fractions = medal_counts.divide(totals, axis='rows')

	# Print first & last 5 rows of fractions
	print(fractions.head())