ccerv1’s gists

ccerv1 / usda_coffee.py

Created January 27, 2019 16:58

Create a dataframe with the latest USDA coffee statistics

	import pandas as pd
	import requests, zipfile, io

	# requests the data from the USDA url
	response = requests.get('https://apps.fas.usda.gov/psdonline/downloads/psd_coffee_csv.zip')

	# the response is a zipfile, which we unzip in memory rather than saving to the disk
	zf = zipfile.ZipFile(io.BytesIO(response.content))
	usda = pd.read_csv(zf.open('psd_coffee.csv'))

ccerv1 / usda_country_query.py

Last active January 27, 2019 17:21

Queries the database for the data on a single country

	def country_query(country, year_type='Market_Year', attribute=''):
	df = usda[usda['Country_Name'] == country]
	if attribute:
	df = df[df['Attribute_Description'] == attribute]
	return df.pivot(
	index='Attribute_Description',
	columns=year_type,
	values='Value'
	)

ccerv1 / usda_attribute_query.py

Created January 27, 2019 17:30

Queries the database for the data on a single attribute

	def attribute_query(attribute, year_type='Market_Year', countries=[]):
	df = usda[usda['Attribute_Description'] == attribute]
	if countries:
	df = df[df['Country_Name'].isin(countries)]
	return df.pivot(
	index='Country_Name',
	columns=year_type,
	values='Value'
	)

ccerv1 / usda_chart_assums.py

Created February 16, 2019 11:57

Dependencies and assumptions for our analysis

	%matplotlib inline
	import matplotlib.pyplot as plt

	# Some constant assumptions we'll use in our analysis
	YEARS_IN_SERIES = 25
	MAJOR_PRODUCER_THRESHOLD = 5000 # thousands of bags
	MIDSIZE_PRODUCER_THRESHOLD = 500 # thousands of bags

ccerv1 / is_producing_above.py

Created February 16, 2019 12:00

Module to decide whether a country belongs in a list of 'major', 'midsize', or 'minor' producers

	def is_producing_above(country_data, threshold_value, num_years_back=5):
	recent_production = country_data.iloc[0,-num_years_back:]
	production_peak = recent_production.mean()
	return production_peak > threshold_value

	list_of_all_countries = usda['Country_Name'].unique()
	major_producers = []
	midsize_producers = []
	minor_producers = []

ccerv1 / other_producer_volumes.py

Created February 16, 2019 12:02

Put the volumes from smaller producing countries into a single table and plot it

	minor_producers_volumes = attribute_query('Production', countries=minor_producers).sum()
	midsize_producers_volumes = attribute_query('Production', countries=midsize_producers).sum()
	other_producers_volumes = pd.DataFrame(
	data=[minor_producers_volumes, midsize_producers_volumes],
	index=['Minor producers', 'Mid-size producers']
	)
	other_producers_volumes.T.plot(kind='area', stacked=True, figsize=(12,8), alpha=.4)

ccerv1 / major_producers.py

Created February 16, 2019 12:05

Plot the production of major coffee producing countries

	major_producers_volumes = attribute_query('Production', countries=major_producers)
	major_producers_volumes = major_producers_volumes.sort_values(2019)
	major_producers_volumes.T.plot(kind='area', stacked=True, figsize=(12,8), alpha=.4)

ccerv1 / production_table.py

Created February 16, 2019 12:11

Put our tables together, reduce the timeframe, and transpose

	production_table = pd.concat([other_producers_volumes, major_producers_volumes], axis=0)
	production_table = production_table.iloc[:,-YEARS_IN_SERIES:]
	production_table = production_table.T
	production_table

ccerv1 / pretty_production_chart_v1.py

Created February 16, 2019 12:16

Chart of coffee production by origin

	production_table /= 1000 # change to millions instead of thousands of bags

	fig, ax = plt.subplots()
	production_table.plot(
	kind='area',
	stacked=True,
	figsize=(12,8),
	alpha=0.4,
	colormap='Blues',
	legend=False,

ccerv1 / pretty_production_chart_v2.py

Created February 16, 2019 12:18

Updated chart with annotated labels

	labels = []
	y_vals = []
	cum_sum = 0
	for col_num, country_name in enumerate(production_table.columns):
	labels.append(country_name)
	prod_val = production_table.iloc[-1,col_num]
	y_vals.append((cum_sum * 2 + prod_val)/2)
	cum_sum += prod_val

	for i, label in enumerate(labels):

Carl Cervone ccerv1