This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import requests, zipfile, io | |
| # requests the data from the USDA url | |
| response = requests.get('https://apps.fas.usda.gov/psdonline/downloads/psd_coffee_csv.zip') | |
| # the response is a zipfile, which we unzip in memory rather than saving to the disk | |
| zf = zipfile.ZipFile(io.BytesIO(response.content)) | |
| usda = pd.read_csv(zf.open('psd_coffee.csv')) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def country_query(country, year_type='Market_Year', attribute=''): | |
| df = usda[usda['Country_Name'] == country] | |
| if attribute: | |
| df = df[df['Attribute_Description'] == attribute] | |
| return df.pivot( | |
| index='Attribute_Description', | |
| columns=year_type, | |
| values='Value' | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def attribute_query(attribute, year_type='Market_Year', countries=[]): | |
| df = usda[usda['Attribute_Description'] == attribute] | |
| if countries: | |
| df = df[df['Country_Name'].isin(countries)] | |
| return df.pivot( | |
| index='Country_Name', | |
| columns=year_type, | |
| values='Value' | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| %matplotlib inline | |
| import matplotlib.pyplot as plt | |
| # Some constant assumptions we'll use in our analysis | |
| YEARS_IN_SERIES = 25 | |
| MAJOR_PRODUCER_THRESHOLD = 5000 # thousands of bags | |
| MIDSIZE_PRODUCER_THRESHOLD = 500 # thousands of bags |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def is_producing_above(country_data, threshold_value, num_years_back=5): | |
| recent_production = country_data.iloc[0,-num_years_back:] | |
| production_peak = recent_production.mean() | |
| return production_peak > threshold_value | |
| list_of_all_countries = usda['Country_Name'].unique() | |
| major_producers = [] | |
| midsize_producers = [] | |
| minor_producers = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| minor_producers_volumes = attribute_query('Production', countries=minor_producers).sum() | |
| midsize_producers_volumes = attribute_query('Production', countries=midsize_producers).sum() | |
| other_producers_volumes = pd.DataFrame( | |
| data=[minor_producers_volumes, midsize_producers_volumes], | |
| index=['Minor producers', 'Mid-size producers'] | |
| ) | |
| other_producers_volumes.T.plot(kind='area', stacked=True, figsize=(12,8), alpha=.4) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| major_producers_volumes = attribute_query('Production', countries=major_producers) | |
| major_producers_volumes = major_producers_volumes.sort_values(2019) | |
| major_producers_volumes.T.plot(kind='area', stacked=True, figsize=(12,8), alpha=.4) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| production_table = pd.concat([other_producers_volumes, major_producers_volumes], axis=0) | |
| production_table = production_table.iloc[:,-YEARS_IN_SERIES:] | |
| production_table = production_table.T | |
| production_table |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| production_table /= 1000 # change to millions instead of thousands of bags | |
| fig, ax = plt.subplots() | |
| production_table.plot( | |
| kind='area', | |
| stacked=True, | |
| figsize=(12,8), | |
| alpha=0.4, | |
| colormap='Blues', | |
| legend=False, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| labels = [] | |
| y_vals = [] | |
| cum_sum = 0 | |
| for col_num, country_name in enumerate(production_table.columns): | |
| labels.append(country_name) | |
| prod_val = production_table.iloc[-1,col_num] | |
| y_vals.append((cum_sum * 2 + prod_val)/2) | |
| cum_sum += prod_val | |
| for i, label in enumerate(labels): |
OlderNewer