Created
February 10, 2022 14:33
-
-
Save larsyencken/42a464f6b744850ebed30afab35790e2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "18147d3b-e3bf-4624-919d-fd6fd498e0c5", | |
"metadata": {}, | |
"source": [ | |
"# Comparison of food explorer data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "57ebc2d8-1a6b-46d5-9877-75d8cda976d8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from owid import catalog" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0be912cc-6d0d-42f9-b38c-e7bc6ac9c65f", | |
"metadata": {}, | |
"source": [ | |
"## Currently published explorer data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "2b9876a4-845d-4697-b532-5d587a0bb6b5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>table</th>\n", | |
" <th>dataset</th>\n", | |
" <th>version</th>\n", | |
" <th>namespace</th>\n", | |
" <th>dimensions</th>\n", | |
" <th>path</th>\n", | |
" <th>format</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1181</th>\n", | |
" <td>almonds</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/almonds</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1307</th>\n", | |
" <td>animal_fats</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/animal_fats</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1452</th>\n", | |
" <td>apples</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/apples</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1530</th>\n", | |
" <td>apricots</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/apricots</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1681</th>\n", | |
" <td>areca_nuts</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/areca_nuts</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40731</th>\n", | |
" <td>wheat</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/wheat</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40819</th>\n", | |
" <td>whey</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/whey</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>41124</th>\n", | |
" <td>wine</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/wine</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>41267</th>\n", | |
" <td>wool</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/wool</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>41543</th>\n", | |
" <td>yams</td>\n", | |
" <td>food_explorer</td>\n", | |
" <td>None</td>\n", | |
" <td>explorers</td>\n", | |
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n", | |
" <td>garden/explorers/2021/food_explorer/yams</td>\n", | |
" <td>csv</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>191 rows × 7 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" table dataset version namespace \\\n", | |
"1181 almonds food_explorer None explorers \n", | |
"1307 animal_fats food_explorer None explorers \n", | |
"1452 apples food_explorer None explorers \n", | |
"1530 apricots food_explorer None explorers \n", | |
"1681 areca_nuts food_explorer None explorers \n", | |
"... ... ... ... ... \n", | |
"40731 wheat food_explorer None explorers \n", | |
"40819 whey food_explorer None explorers \n", | |
"41124 wine food_explorer None explorers \n", | |
"41267 wool food_explorer None explorers \n", | |
"41543 yams food_explorer None explorers \n", | |
"\n", | |
" dimensions \\\n", | |
"1181 [\"Product\", \"Country\", \"Year\"] \n", | |
"1307 [\"Product\", \"Country\", \"Year\"] \n", | |
"1452 [\"Product\", \"Country\", \"Year\"] \n", | |
"1530 [\"Product\", \"Country\", \"Year\"] \n", | |
"1681 [\"Product\", \"Country\", \"Year\"] \n", | |
"... ... \n", | |
"40731 [\"Product\", \"Country\", \"Year\"] \n", | |
"40819 [\"Product\", \"Country\", \"Year\"] \n", | |
"41124 [\"Product\", \"Country\", \"Year\"] \n", | |
"41267 [\"Product\", \"Country\", \"Year\"] \n", | |
"41543 [\"Product\", \"Country\", \"Year\"] \n", | |
"\n", | |
" path format \n", | |
"1181 garden/explorers/2021/food_explorer/almonds csv \n", | |
"1307 garden/explorers/2021/food_explorer/animal_fats csv \n", | |
"1452 garden/explorers/2021/food_explorer/apples csv \n", | |
"1530 garden/explorers/2021/food_explorer/apricots csv \n", | |
"1681 garden/explorers/2021/food_explorer/areca_nuts csv \n", | |
"... ... ... \n", | |
"40731 garden/explorers/2021/food_explorer/wheat csv \n", | |
"40819 garden/explorers/2021/food_explorer/whey csv \n", | |
"41124 garden/explorers/2021/food_explorer/wine csv \n", | |
"41267 garden/explorers/2021/food_explorer/wool csv \n", | |
"41543 garden/explorers/2021/food_explorer/yams csv \n", | |
"\n", | |
"[191 rows x 7 columns]" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"tables = catalog.find(dataset='food_explorer')\n", | |
"tables" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "34fcbcf3-9f29-4ea1-9d95-ea614b5c1e1a", | |
"metadata": {}, | |
"source": [ | |
"## Regenerated explorer data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "2b6e8b73-7b88-4b96-ab9c-a50169cfbd76", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"local_ds = catalog.Dataset(f'data/garden/explorers/2021/food_explorer')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 89, | |
"id": "f1d3acc1-6f56-41a3-bc84-82b7b0f7b314", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"✓ almonds\n", | |
"✓ animal_fats\n", | |
"✓ apples\n", | |
"✓ apricots\n", | |
"✓ areca_nuts\n", | |
"✓ artichokes\n", | |
"✓ asparagus\n", | |
"✓ avocados\n", | |
"✓ bananas\n", | |
"✓ barley\n", | |
"✓ beans_dry\n", | |
"✓ beans_green\n", | |
"✓ beeswax\n", | |
"✓ blueberries\n", | |
"✓ brazil_nuts_with_shell\n", | |
"✓ broad_beans\n", | |
"✓ buckwheat\n", | |
"✗ buffalo_hides <---\n", | |
"✗ bulk <---\n", | |
"✗ butter_and_ghee <---\n", | |
"✓ cabbages\n", | |
"✓ canary_seed\n", | |
"✓ carrots_and_turnips\n", | |
"✓ cashew_nuts\n", | |
"✓ cassava\n", | |
"✓ castor_oil_seed\n", | |
"✗ cattle_hides <---\n", | |
"✓ cauliflowers_and_broccoli\n", | |
"✗ cereals <---\n", | |
"✓ cheese\n", | |
"✓ cherries\n", | |
"✓ chestnut\n", | |
"✓ chickpeas\n", | |
"✓ chillies_and_peppers\n", | |
"✗ citrus_fruit <---\n", | |
"✓ cocoa_beans\n", | |
"✓ coconut_oil\n", | |
"✓ coconuts\n", | |
"✓ coffee_green\n", | |
"✓ cotton\n", | |
"✓ cottonseed\n", | |
"✓ cottonseed_oil\n", | |
"✓ cow_peas\n", | |
"✓ cranberries\n", | |
"✓ cucumbers_and_gherkins\n", | |
"✓ currants\n", | |
"✓ dates\n", | |
"✓ eggplants\n", | |
"✗ eggs <---\n", | |
"✗ eggs_from_hens <---\n", | |
"✓ eggs_from_other_birds_excl_hens\n", | |
"✓ fat_buffaloes\n", | |
"✓ fat_camels\n", | |
"✓ fat_cattle\n", | |
"✓ fat_goats\n", | |
"✓ fat_pigs\n", | |
"✓ fat_sheep\n", | |
"✗ fibre_crops <---\n", | |
"✓ fish_and_seafood\n", | |
"✓ flax_fibre\n", | |
"✗ fruit <---\n", | |
"✓ garlic\n", | |
"✓ grapefruit\n", | |
"✓ grapes\n", | |
"✓ green_maize\n", | |
"✓ groundnut_oil\n", | |
"✓ groundnuts\n", | |
"✓ hazelnuts\n", | |
"✓ hempseed\n", | |
"✓ herbs_eg_fennel\n", | |
"✓ honey\n", | |
"✓ jute\n", | |
"✓ karite_nuts\n", | |
"✓ kiwi\n", | |
"✓ kola_nuts\n", | |
"✓ leeks\n", | |
"✓ lemons_and_limes\n", | |
"✓ lentils\n", | |
"✓ lettuce\n", | |
"✓ linseed\n", | |
"✓ linseed_oil\n", | |
"✓ maize\n", | |
"✓ maize_oil\n", | |
"✓ mangoes\n", | |
"✓ margarine\n", | |
"✓ meat_ass\n", | |
"✓ meat_beef\n", | |
"✗ meat_beef_and_buffalo <---\n", | |
"✗ meat_buffalo <---\n", | |
"✓ meat_camel\n", | |
"✗ meat_chicken <---\n", | |
"✗ meat_duck <---\n", | |
"✓ meat_game\n", | |
"✗ meat_goat <---\n", | |
"✗ meat_goose_and_guinea_fowl <---\n", | |
"✓ meat_horse\n", | |
"✓ meat_lamb_and_mutton\n", | |
"✓ meat_mule\n", | |
"✗ meat_pig <---\n", | |
"✗ meat_poultry <---\n", | |
"✓ meat_rabbit\n", | |
"✗ meat_sheep_and_goat <---\n", | |
"✗ meat_total <---\n", | |
"✓ meat_turkey\n", | |
"✓ melon\n", | |
"✓ melonseed\n", | |
"✗ milk <---\n", | |
"✓ millet\n", | |
"✓ mixed_grains\n", | |
"✓ molasses\n", | |
"✓ mushrooms\n", | |
"✓ mustard_seed\n", | |
"✓ nuts\n", | |
"✓ oats\n", | |
"✓ offals\n", | |
"✓ offals_buffaloes\n", | |
"✓ offals_camels\n", | |
"✓ offals_cattle\n", | |
"✓ offals_goats\n", | |
"✓ offals_horses\n", | |
"✓ offals_pigs\n", | |
"✓ offals_sheep\n", | |
"✗ oilcrops <---\n", | |
"✗ oilcrops_cake_equivalent <---\n", | |
"✗ oilcrops_oil_equivalent <---\n", | |
"✓ okra\n", | |
"✓ olive_oil\n", | |
"✓ olives\n", | |
"✓ onions\n", | |
"✓ oranges\n", | |
"✓ palm_fruit_oil\n", | |
"✓ palm_kernel_oil\n", | |
"✓ palm_kernels\n", | |
"✓ palm_oil\n", | |
"✓ papayas\n", | |
"✓ peaches_and_nectarines\n", | |
"✓ pears\n", | |
"✓ peas_dry\n", | |
"✓ peas_green\n", | |
"✓ pepper\n", | |
"✓ pigeon_peas\n", | |
"✓ pineapples\n", | |
"✓ pistachios\n", | |
"✓ plantains\n", | |
"✓ plums\n", | |
"✓ poppy_seeds\n", | |
"✓ pork\n", | |
"✓ potatoes\n", | |
"✗ pulses <---\n", | |
"✓ quinoa\n", | |
"✓ rapeseed\n", | |
"✓ rapeseed_oil\n", | |
"✓ raspberries\n", | |
"✓ rice\n", | |
"✗ roots_and_tubers <---\n", | |
"✓ rye\n", | |
"✓ safflower_oil\n", | |
"✓ safflower_seed\n", | |
"✓ seed_cotton\n", | |
"✓ sesame_oil\n", | |
"✓ sesame_seed\n", | |
"✓ silk\n", | |
"✓ skins_goat\n", | |
"✓ skins_sheep\n", | |
"✓ sorghum\n", | |
"✓ soybean_oil\n", | |
"✓ soybeans\n", | |
"✓ spinach\n", | |
"✓ strawberries\n", | |
"✓ string_beans\n", | |
"✓ sugar_beet\n", | |
"✓ sugar_cane\n", | |
"✗ sugar_crops <---\n", | |
"✓ sugar_raw\n", | |
"✓ sunflower_oil\n", | |
"✓ sunflower_seed\n", | |
"✓ sweet_potatoes\n", | |
"✓ tangerines\n", | |
"✓ tea\n", | |
"✓ tobacco\n", | |
"✓ tomatoes\n", | |
"✓ total\n", | |
"✓ treenuts\n", | |
"✗ vegetables <---\n", | |
"✓ walnuts\n", | |
"✓ watermelons\n", | |
"✓ wheat\n", | |
"✓ whey\n", | |
"✓ wine\n", | |
"✓ wool\n", | |
"✓ yams\n" | |
] | |
} | |
], | |
"source": [ | |
"for i in range(len(tables)):\n", | |
" name = tables.table.iloc[i]\n", | |
" live_table = tables.iloc[i].load()\n", | |
" local_table = local_ds[name]\n", | |
" \n", | |
" live_table.drop('yield__tonnes_per_ha', axis=1, inplace=True)\n", | |
" local_table.drop('yield__tonnes_per_ha', axis=1, inplace=True)\n", | |
" if live_table.equals(local_table):\n", | |
" print(f'✓ {name}')\n", | |
" else:\n", | |
" print(f'✗ {name} <---')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3daafe3b-36a9-4e00-ad8f-454146ef38a1", | |
"metadata": {}, | |
"source": [ | |
"## Wheat" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"id": "b8880eb4-1009-4f0e-b078-048c24ac4e7e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lhs = tables[tables.table == 'wheat'].load()\n", | |
"rhs = local_ds['wheat']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"id": "b2cb3073-e9bc-46b5-acbf-11f1a06050b6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lhs = lhs.reset_index()\n", | |
"rhs = rhs.reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 58, | |
"id": "16065cce-6e65-42be-8aab-6f7a61d5a153", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(lhs.Country.unique() == rhs.Country.unique()).all()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"id": "37055e1e-8870-48d0-9db3-52680950590c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"✗ China\n" | |
] | |
} | |
], | |
"source": [ | |
"for country in lhs.Country.unique():\n", | |
" lhs_c = lhs[lhs.Country == country].set_index('Year')\n", | |
" rhs_c = rhs[rhs.Country == country].set_index('Year')\n", | |
" if not lhs_c.equals(rhs_c):\n", | |
" print(f'✗ {country}')\n", | |
" break" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"id": "cfd57fc4-fcf4-4807-923e-0dc111debc98", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert len(lhs_c) == len(rhs_c)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 79, | |
"id": "bcd98f92-945a-4f7a-9f09-9f59e9510abd", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>current</th>\n", | |
" <th>update</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Product</th>\n", | |
" <td>Wheat</td>\n", | |
" <td>Wheat</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Country</th>\n", | |
" <td>China</td>\n", | |
" <td>China</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>area_harvested__ha</th>\n", | |
" <td>23730000.0</td>\n", | |
" <td>23730000.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>domestic_supply__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>exports__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>feed__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__fat_g_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__kcal_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__kg_per_year</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__protein_g_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>imports__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>other_uses__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>population</th>\n", | |
" <td>1433783692</td>\n", | |
" <td>1433783692</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>producing_or_slaughtered_animals__animals</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>production__tonnes</th>\n", | |
" <td>133596300.0</td>\n", | |
" <td>133596300.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>waste_in_supply_chain__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yield__kg_per_animal</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yield__tonnes_per_ha</th>\n", | |
" <td>5.6294</td>\n", | |
" <td>5.6298</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" current update\n", | |
"Product Wheat Wheat\n", | |
"Country China China\n", | |
"area_harvested__ha 23730000.0 23730000.0\n", | |
"domestic_supply__tonnes NaN NaN\n", | |
"exports__tonnes NaN NaN\n", | |
"feed__tonnes NaN NaN\n", | |
"food__tonnes NaN NaN\n", | |
"food_available_for_consumption__fat_g_per_day NaN NaN\n", | |
"food_available_for_consumption__kcal_per_day NaN NaN\n", | |
"food_available_for_consumption__kg_per_year NaN NaN\n", | |
"food_available_for_consumption__protein_g_per_day NaN NaN\n", | |
"imports__tonnes NaN NaN\n", | |
"other_uses__tonnes NaN NaN\n", | |
"population 1433783692 1433783692\n", | |
"producing_or_slaughtered_animals__animals NaN NaN\n", | |
"production__tonnes 133596300.0 133596300.0\n", | |
"waste_in_supply_chain__tonnes NaN NaN\n", | |
"yield__kg_per_animal NaN NaN\n", | |
"yield__tonnes_per_ha 5.6294 5.6298" | |
] | |
}, | |
"execution_count": 79, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"year = 2019\n", | |
"delta = pd.DataFrame()\n", | |
"delta['current'] = lhs_c.loc[year]\n", | |
"delta['update'] = rhs_c.loc[year]\n", | |
"delta" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "c340f79b-da42-44a1-ad4c-ef014dc5f600", | |
"metadata": {}, | |
"source": [ | |
"**Conclusion**: only `yield__tonnes_per_ha` has changed, and only in the third decimal place" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "11a4a146-607f-45ef-9ebc-7f06d0b698f7", | |
"metadata": {}, | |
"source": [ | |
"## Maize" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"id": "82b8a163-c9c7-4c5b-8bec-77fc1bb0d1cc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lhs = tables[tables.table == 'maize'].load()\n", | |
"rhs = local_ds['maize']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 84, | |
"id": "3741ca0d-0c40-493d-a993-60d10dcd399a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lhs = lhs.reset_index()\n", | |
"rhs = rhs.reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"id": "5f34e471-c85c-4e25-9322-0e4e4a3d7785", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 85, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(lhs.Country.unique() == rhs.Country.unique()).all()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"id": "cff5b106-f2b1-41a2-a89d-bfcfa92753df", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"✗ China\n" | |
] | |
} | |
], | |
"source": [ | |
"for country in lhs.Country.unique():\n", | |
" lhs_c = lhs[lhs.Country == country].set_index('Year')\n", | |
" rhs_c = rhs[rhs.Country == country].set_index('Year')\n", | |
" if not lhs_c.equals(rhs_c):\n", | |
" print(f'✗ {country}')\n", | |
" break" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"id": "c2df664e-e766-40ad-8728-9056a215b727", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert len(lhs_c) == len(rhs_c)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"id": "3bfe9059-da4c-4bf5-b73e-96f5fe0dedf6", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>current</th>\n", | |
" <th>update</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Product</th>\n", | |
" <td>Maize</td>\n", | |
" <td>Maize</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Country</th>\n", | |
" <td>China</td>\n", | |
" <td>China</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>area_harvested__ha</th>\n", | |
" <td>41280000.0</td>\n", | |
" <td>41280000.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>domestic_supply__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>exports__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>feed__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__fat_g_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__kcal_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__kg_per_year</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__protein_g_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>imports__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>other_uses__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>population</th>\n", | |
" <td>1433783692</td>\n", | |
" <td>1433783692</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>producing_or_slaughtered_animals__animals</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>production__tonnes</th>\n", | |
" <td>260778900.0</td>\n", | |
" <td>260778900.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>waste_in_supply_chain__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yield__kg_per_animal</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yield__tonnes_per_ha</th>\n", | |
" <td>6.3171</td>\n", | |
" <td>6.3173</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" current update\n", | |
"Product Maize Maize\n", | |
"Country China China\n", | |
"area_harvested__ha 41280000.0 41280000.0\n", | |
"domestic_supply__tonnes NaN NaN\n", | |
"exports__tonnes NaN NaN\n", | |
"feed__tonnes NaN NaN\n", | |
"food__tonnes NaN NaN\n", | |
"food_available_for_consumption__fat_g_per_day NaN NaN\n", | |
"food_available_for_consumption__kcal_per_day NaN NaN\n", | |
"food_available_for_consumption__kg_per_year NaN NaN\n", | |
"food_available_for_consumption__protein_g_per_day NaN NaN\n", | |
"imports__tonnes NaN NaN\n", | |
"other_uses__tonnes NaN NaN\n", | |
"population 1433783692 1433783692\n", | |
"producing_or_slaughtered_animals__animals NaN NaN\n", | |
"production__tonnes 260778900.0 260778900.0\n", | |
"waste_in_supply_chain__tonnes NaN NaN\n", | |
"yield__kg_per_animal NaN NaN\n", | |
"yield__tonnes_per_ha 6.3171 6.3173" | |
] | |
}, | |
"execution_count": 88, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"year = 2019\n", | |
"delta = pd.DataFrame()\n", | |
"delta['current'] = lhs_c.loc[year]\n", | |
"delta['update'] = rhs_c.loc[year]\n", | |
"delta" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "60cf419d-60eb-44a4-b8ab-7ae9ec4c63ec", | |
"metadata": {}, | |
"source": [ | |
"**Conclusion**: only `yield__tonnes_per_ha` has changed, and only in the fourth decimal place" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6e758330-6ae7-4ebb-9097-28ff0f23f1f3", | |
"metadata": {}, | |
"source": [ | |
"## Cereals" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"id": "2656ac8f-92e4-46d7-8810-7ee22747e4d5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lhs = tables[tables.table == 'cereals'].load()\n", | |
"rhs = local_ds['cereals']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"id": "fdb71639-10fa-4645-8e5d-7e0bbe93b910", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lhs = lhs.reset_index()\n", | |
"rhs = rhs.reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"id": "26458594-238a-4fab-8eef-9c70d5f1649f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 93, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(lhs.Country.unique() == rhs.Country.unique()).all()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"id": "f103efd9-9215-485e-bb15-d96daafac0e3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"✗ Asia\n" | |
] | |
} | |
], | |
"source": [ | |
"for country in lhs.Country.unique():\n", | |
" lhs_c = lhs[lhs.Country == country].set_index('Year')\n", | |
" rhs_c = rhs[rhs.Country == country].set_index('Year')\n", | |
" if not lhs_c.equals(rhs_c):\n", | |
" print(f'✗ {country}')\n", | |
" break" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"id": "da2b2ed3-71df-46b8-bb0d-c237982bb697", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert len(lhs_c) == len(rhs_c)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"id": "da0b6e68-7c02-440a-b248-f2581602fd8d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>current</th>\n", | |
" <th>update</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Product</th>\n", | |
" <td>Cereals</td>\n", | |
" <td>Cereals</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Country</th>\n", | |
" <td>Asia</td>\n", | |
" <td>Asia</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>area_harvested__ha</th>\n", | |
" <td>324011026.0</td>\n", | |
" <td>324315639.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>domestic_supply__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>exports__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>feed__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__fat_g_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__kcal_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__kg_per_year</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>food_available_for_consumption__protein_g_per_day</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>imports__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>other_uses__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>population</th>\n", | |
" <td>4517301213</td>\n", | |
" <td>4517301213</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>producing_or_slaughtered_animals__animals</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>production__tonnes</th>\n", | |
" <td>1400161610.0</td>\n", | |
" <td>1402139466.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>waste_in_supply_chain__tonnes</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yield__kg_per_animal</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yield__tonnes_per_ha</th>\n", | |
" <td>4.321339</td>\n", | |
" <td>4.323379</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" current update\n", | |
"Product Cereals Cereals\n", | |
"Country Asia Asia\n", | |
"area_harvested__ha 324011026.0 324315639.0\n", | |
"domestic_supply__tonnes NaN NaN\n", | |
"exports__tonnes NaN NaN\n", | |
"feed__tonnes NaN NaN\n", | |
"food__tonnes NaN NaN\n", | |
"food_available_for_consumption__fat_g_per_day NaN NaN\n", | |
"food_available_for_consumption__kcal_per_day NaN NaN\n", | |
"food_available_for_consumption__kg_per_year NaN NaN\n", | |
"food_available_for_consumption__protein_g_per_day NaN NaN\n", | |
"imports__tonnes NaN NaN\n", | |
"other_uses__tonnes NaN NaN\n", | |
"population 4517301213 4517301213\n", | |
"producing_or_slaughtered_animals__animals NaN NaN\n", | |
"production__tonnes 1400161610.0 1402139466.0\n", | |
"waste_in_supply_chain__tonnes NaN NaN\n", | |
"yield__kg_per_animal NaN NaN\n", | |
"yield__tonnes_per_ha 4.321339 4.323379" | |
] | |
}, | |
"execution_count": 104, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"year = 2019\n", | |
"delta = pd.DataFrame()\n", | |
"delta['current'] = lhs_c.loc[year]\n", | |
"delta['update'] = rhs_c.loc[year]\n", | |
"delta" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment