(in kg)
http://en.wikipedia.org/wiki/List_of_countries_by_coffee_consumption_per_capita
matplotlibwikitools
Data belongs to wikipedia, see their licence terms
This script is released under WTFPL licence
(in kg)
http://en.wikipedia.org/wiki/List_of_countries_by_coffee_consumption_per_capita
matplotlibwikitoolsData belongs to wikipedia, see their licence terms
This script is released under WTFPL licence
| #!/usr/bin/env python2 | |
| # -*- coding: utf8 -*- | |
| from wikitools.wiki import Wiki | |
| from wikitools.page import Page | |
| from matplotlib import pyplot as plt | |
| import re | |
| # url to wikipedia API | |
| API_URL = "http://en.wikipedia.org/w/api.php" | |
| # page info (title, section number, useless items ;) ) | |
| PAGE_TITLE = "List_of_countries_by_coffee_consumption_per_capita" | |
| PAGE_SEC = 0 | |
| PAGE_USELESS = 3 | |
| # wiki obj. instantiation | |
| w = Wiki(API_URL) | |
| # get that page | |
| p = Page(w, title=PAGE_TITLE, sectionnumber=PAGE_SEC) | |
| text = p.getWikiText().split('\n') | |
| # get rid of useless data at the section's beginnning | |
| text = text[PAGE_USELESS:] | |
| reg = re.compile("\#\s?\{\{flagcountry\|([^\}]+)\}\}\s+([^\s]+)\skg") | |
| results = {} | |
| for i in text: | |
| m = reg.match(i) | |
| if m: | |
| results[m.group(1)] = float(m.group(2)) | |
| pays = sorted(results, key=results.get,reverse=True) | |
| # bars width | |
| width = 0.8 | |
| # number of country to display | |
| l = 20 | |
| # left tick of each bar | |
| left = [width*_ for _ in xrange(l)] | |
| # plot | |
| plt.bar(left, | |
| [results[_] for _ in pays[:l]], | |
| width=width | |
| ) | |
| # get x-tick for the middle of each bar | |
| left = [_+width/2 for _ in left] | |
| # countries name | |
| plt.xticks(left, pays[:l], rotation=85) | |
| # other display-related stuff | |
| plt.grid('on') | |
| plt.title(re.sub('_',' ', PAGE_TITLE)) | |
| plt.tight_layout() | |
| # now SAVE ! | |
| plt.savefig('graph.png') | |
| # just to get a better idea (for numbers lovers) | |
| for k in pays: | |
| print('{0} => {1}'.format(k, results[k])) |