Skip to content

Instantly share code, notes, and snippets.

@larsyencken
Created February 10, 2022 14:33
Show Gist options
  • Save larsyencken/42a464f6b744850ebed30afab35790e2 to your computer and use it in GitHub Desktop.
Save larsyencken/42a464f6b744850ebed30afab35790e2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "18147d3b-e3bf-4624-919d-fd6fd498e0c5",
"metadata": {},
"source": [
"# Comparison of food explorer data"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "57ebc2d8-1a6b-46d5-9877-75d8cda976d8",
"metadata": {},
"outputs": [],
"source": [
"from owid import catalog"
]
},
{
"cell_type": "markdown",
"id": "0be912cc-6d0d-42f9-b38c-e7bc6ac9c65f",
"metadata": {},
"source": [
"## Currently published explorer data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2b9876a4-845d-4697-b532-5d587a0bb6b5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>table</th>\n",
" <th>dataset</th>\n",
" <th>version</th>\n",
" <th>namespace</th>\n",
" <th>dimensions</th>\n",
" <th>path</th>\n",
" <th>format</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1181</th>\n",
" <td>almonds</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/almonds</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1307</th>\n",
" <td>animal_fats</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/animal_fats</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1452</th>\n",
" <td>apples</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/apples</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1530</th>\n",
" <td>apricots</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/apricots</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1681</th>\n",
" <td>areca_nuts</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/areca_nuts</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40731</th>\n",
" <td>wheat</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/wheat</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40819</th>\n",
" <td>whey</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/whey</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41124</th>\n",
" <td>wine</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/wine</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41267</th>\n",
" <td>wool</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/wool</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41543</th>\n",
" <td>yams</td>\n",
" <td>food_explorer</td>\n",
" <td>None</td>\n",
" <td>explorers</td>\n",
" <td>[\"Product\", \"Country\", \"Year\"]</td>\n",
" <td>garden/explorers/2021/food_explorer/yams</td>\n",
" <td>csv</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>191 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" table dataset version namespace \\\n",
"1181 almonds food_explorer None explorers \n",
"1307 animal_fats food_explorer None explorers \n",
"1452 apples food_explorer None explorers \n",
"1530 apricots food_explorer None explorers \n",
"1681 areca_nuts food_explorer None explorers \n",
"... ... ... ... ... \n",
"40731 wheat food_explorer None explorers \n",
"40819 whey food_explorer None explorers \n",
"41124 wine food_explorer None explorers \n",
"41267 wool food_explorer None explorers \n",
"41543 yams food_explorer None explorers \n",
"\n",
" dimensions \\\n",
"1181 [\"Product\", \"Country\", \"Year\"] \n",
"1307 [\"Product\", \"Country\", \"Year\"] \n",
"1452 [\"Product\", \"Country\", \"Year\"] \n",
"1530 [\"Product\", \"Country\", \"Year\"] \n",
"1681 [\"Product\", \"Country\", \"Year\"] \n",
"... ... \n",
"40731 [\"Product\", \"Country\", \"Year\"] \n",
"40819 [\"Product\", \"Country\", \"Year\"] \n",
"41124 [\"Product\", \"Country\", \"Year\"] \n",
"41267 [\"Product\", \"Country\", \"Year\"] \n",
"41543 [\"Product\", \"Country\", \"Year\"] \n",
"\n",
" path format \n",
"1181 garden/explorers/2021/food_explorer/almonds csv \n",
"1307 garden/explorers/2021/food_explorer/animal_fats csv \n",
"1452 garden/explorers/2021/food_explorer/apples csv \n",
"1530 garden/explorers/2021/food_explorer/apricots csv \n",
"1681 garden/explorers/2021/food_explorer/areca_nuts csv \n",
"... ... ... \n",
"40731 garden/explorers/2021/food_explorer/wheat csv \n",
"40819 garden/explorers/2021/food_explorer/whey csv \n",
"41124 garden/explorers/2021/food_explorer/wine csv \n",
"41267 garden/explorers/2021/food_explorer/wool csv \n",
"41543 garden/explorers/2021/food_explorer/yams csv \n",
"\n",
"[191 rows x 7 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tables = catalog.find(dataset='food_explorer')\n",
"tables"
]
},
{
"cell_type": "markdown",
"id": "34fcbcf3-9f29-4ea1-9d95-ea614b5c1e1a",
"metadata": {},
"source": [
"## Regenerated explorer data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2b6e8b73-7b88-4b96-ab9c-a50169cfbd76",
"metadata": {},
"outputs": [],
"source": [
"local_ds = catalog.Dataset(f'data/garden/explorers/2021/food_explorer')"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "f1d3acc1-6f56-41a3-bc84-82b7b0f7b314",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✓ almonds\n",
"✓ animal_fats\n",
"✓ apples\n",
"✓ apricots\n",
"✓ areca_nuts\n",
"✓ artichokes\n",
"✓ asparagus\n",
"✓ avocados\n",
"✓ bananas\n",
"✓ barley\n",
"✓ beans_dry\n",
"✓ beans_green\n",
"✓ beeswax\n",
"✓ blueberries\n",
"✓ brazil_nuts_with_shell\n",
"✓ broad_beans\n",
"✓ buckwheat\n",
"✗ buffalo_hides <---\n",
"✗ bulk <---\n",
"✗ butter_and_ghee <---\n",
"✓ cabbages\n",
"✓ canary_seed\n",
"✓ carrots_and_turnips\n",
"✓ cashew_nuts\n",
"✓ cassava\n",
"✓ castor_oil_seed\n",
"✗ cattle_hides <---\n",
"✓ cauliflowers_and_broccoli\n",
"✗ cereals <---\n",
"✓ cheese\n",
"✓ cherries\n",
"✓ chestnut\n",
"✓ chickpeas\n",
"✓ chillies_and_peppers\n",
"✗ citrus_fruit <---\n",
"✓ cocoa_beans\n",
"✓ coconut_oil\n",
"✓ coconuts\n",
"✓ coffee_green\n",
"✓ cotton\n",
"✓ cottonseed\n",
"✓ cottonseed_oil\n",
"✓ cow_peas\n",
"✓ cranberries\n",
"✓ cucumbers_and_gherkins\n",
"✓ currants\n",
"✓ dates\n",
"✓ eggplants\n",
"✗ eggs <---\n",
"✗ eggs_from_hens <---\n",
"✓ eggs_from_other_birds_excl_hens\n",
"✓ fat_buffaloes\n",
"✓ fat_camels\n",
"✓ fat_cattle\n",
"✓ fat_goats\n",
"✓ fat_pigs\n",
"✓ fat_sheep\n",
"✗ fibre_crops <---\n",
"✓ fish_and_seafood\n",
"✓ flax_fibre\n",
"✗ fruit <---\n",
"✓ garlic\n",
"✓ grapefruit\n",
"✓ grapes\n",
"✓ green_maize\n",
"✓ groundnut_oil\n",
"✓ groundnuts\n",
"✓ hazelnuts\n",
"✓ hempseed\n",
"✓ herbs_eg_fennel\n",
"✓ honey\n",
"✓ jute\n",
"✓ karite_nuts\n",
"✓ kiwi\n",
"✓ kola_nuts\n",
"✓ leeks\n",
"✓ lemons_and_limes\n",
"✓ lentils\n",
"✓ lettuce\n",
"✓ linseed\n",
"✓ linseed_oil\n",
"✓ maize\n",
"✓ maize_oil\n",
"✓ mangoes\n",
"✓ margarine\n",
"✓ meat_ass\n",
"✓ meat_beef\n",
"✗ meat_beef_and_buffalo <---\n",
"✗ meat_buffalo <---\n",
"✓ meat_camel\n",
"✗ meat_chicken <---\n",
"✗ meat_duck <---\n",
"✓ meat_game\n",
"✗ meat_goat <---\n",
"✗ meat_goose_and_guinea_fowl <---\n",
"✓ meat_horse\n",
"✓ meat_lamb_and_mutton\n",
"✓ meat_mule\n",
"✗ meat_pig <---\n",
"✗ meat_poultry <---\n",
"✓ meat_rabbit\n",
"✗ meat_sheep_and_goat <---\n",
"✗ meat_total <---\n",
"✓ meat_turkey\n",
"✓ melon\n",
"✓ melonseed\n",
"✗ milk <---\n",
"✓ millet\n",
"✓ mixed_grains\n",
"✓ molasses\n",
"✓ mushrooms\n",
"✓ mustard_seed\n",
"✓ nuts\n",
"✓ oats\n",
"✓ offals\n",
"✓ offals_buffaloes\n",
"✓ offals_camels\n",
"✓ offals_cattle\n",
"✓ offals_goats\n",
"✓ offals_horses\n",
"✓ offals_pigs\n",
"✓ offals_sheep\n",
"✗ oilcrops <---\n",
"✗ oilcrops_cake_equivalent <---\n",
"✗ oilcrops_oil_equivalent <---\n",
"✓ okra\n",
"✓ olive_oil\n",
"✓ olives\n",
"✓ onions\n",
"✓ oranges\n",
"✓ palm_fruit_oil\n",
"✓ palm_kernel_oil\n",
"✓ palm_kernels\n",
"✓ palm_oil\n",
"✓ papayas\n",
"✓ peaches_and_nectarines\n",
"✓ pears\n",
"✓ peas_dry\n",
"✓ peas_green\n",
"✓ pepper\n",
"✓ pigeon_peas\n",
"✓ pineapples\n",
"✓ pistachios\n",
"✓ plantains\n",
"✓ plums\n",
"✓ poppy_seeds\n",
"✓ pork\n",
"✓ potatoes\n",
"✗ pulses <---\n",
"✓ quinoa\n",
"✓ rapeseed\n",
"✓ rapeseed_oil\n",
"✓ raspberries\n",
"✓ rice\n",
"✗ roots_and_tubers <---\n",
"✓ rye\n",
"✓ safflower_oil\n",
"✓ safflower_seed\n",
"✓ seed_cotton\n",
"✓ sesame_oil\n",
"✓ sesame_seed\n",
"✓ silk\n",
"✓ skins_goat\n",
"✓ skins_sheep\n",
"✓ sorghum\n",
"✓ soybean_oil\n",
"✓ soybeans\n",
"✓ spinach\n",
"✓ strawberries\n",
"✓ string_beans\n",
"✓ sugar_beet\n",
"✓ sugar_cane\n",
"✗ sugar_crops <---\n",
"✓ sugar_raw\n",
"✓ sunflower_oil\n",
"✓ sunflower_seed\n",
"✓ sweet_potatoes\n",
"✓ tangerines\n",
"✓ tea\n",
"✓ tobacco\n",
"✓ tomatoes\n",
"✓ total\n",
"✓ treenuts\n",
"✗ vegetables <---\n",
"✓ walnuts\n",
"✓ watermelons\n",
"✓ wheat\n",
"✓ whey\n",
"✓ wine\n",
"✓ wool\n",
"✓ yams\n"
]
}
],
"source": [
"for i in range(len(tables)):\n",
" name = tables.table.iloc[i]\n",
" live_table = tables.iloc[i].load()\n",
" local_table = local_ds[name]\n",
" \n",
" live_table.drop('yield__tonnes_per_ha', axis=1, inplace=True)\n",
" local_table.drop('yield__tonnes_per_ha', axis=1, inplace=True)\n",
" if live_table.equals(local_table):\n",
" print(f'✓ {name}')\n",
" else:\n",
" print(f'✗ {name} <---')"
]
},
{
"cell_type": "markdown",
"id": "3daafe3b-36a9-4e00-ad8f-454146ef38a1",
"metadata": {},
"source": [
"## Wheat"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "b8880eb4-1009-4f0e-b078-048c24ac4e7e",
"metadata": {},
"outputs": [],
"source": [
"lhs = tables[tables.table == 'wheat'].load()\n",
"rhs = local_ds['wheat']"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "b2cb3073-e9bc-46b5-acbf-11f1a06050b6",
"metadata": {},
"outputs": [],
"source": [
"lhs = lhs.reset_index()\n",
"rhs = rhs.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "16065cce-6e65-42be-8aab-6f7a61d5a153",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(lhs.Country.unique() == rhs.Country.unique()).all()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "37055e1e-8870-48d0-9db3-52680950590c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✗ China\n"
]
}
],
"source": [
"for country in lhs.Country.unique():\n",
" lhs_c = lhs[lhs.Country == country].set_index('Year')\n",
" rhs_c = rhs[rhs.Country == country].set_index('Year')\n",
" if not lhs_c.equals(rhs_c):\n",
" print(f'✗ {country}')\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "cfd57fc4-fcf4-4807-923e-0dc111debc98",
"metadata": {},
"outputs": [],
"source": [
"assert len(lhs_c) == len(rhs_c)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "bcd98f92-945a-4f7a-9f09-9f59e9510abd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>current</th>\n",
" <th>update</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Product</th>\n",
" <td>Wheat</td>\n",
" <td>Wheat</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <td>China</td>\n",
" <td>China</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area_harvested__ha</th>\n",
" <td>23730000.0</td>\n",
" <td>23730000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>domestic_supply__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>exports__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>feed__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__fat_g_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__kcal_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__kg_per_year</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__protein_g_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imports__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other_uses__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>population</th>\n",
" <td>1433783692</td>\n",
" <td>1433783692</td>\n",
" </tr>\n",
" <tr>\n",
" <th>producing_or_slaughtered_animals__animals</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>production__tonnes</th>\n",
" <td>133596300.0</td>\n",
" <td>133596300.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>waste_in_supply_chain__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yield__kg_per_animal</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yield__tonnes_per_ha</th>\n",
" <td>5.6294</td>\n",
" <td>5.6298</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" current update\n",
"Product Wheat Wheat\n",
"Country China China\n",
"area_harvested__ha 23730000.0 23730000.0\n",
"domestic_supply__tonnes NaN NaN\n",
"exports__tonnes NaN NaN\n",
"feed__tonnes NaN NaN\n",
"food__tonnes NaN NaN\n",
"food_available_for_consumption__fat_g_per_day NaN NaN\n",
"food_available_for_consumption__kcal_per_day NaN NaN\n",
"food_available_for_consumption__kg_per_year NaN NaN\n",
"food_available_for_consumption__protein_g_per_day NaN NaN\n",
"imports__tonnes NaN NaN\n",
"other_uses__tonnes NaN NaN\n",
"population 1433783692 1433783692\n",
"producing_or_slaughtered_animals__animals NaN NaN\n",
"production__tonnes 133596300.0 133596300.0\n",
"waste_in_supply_chain__tonnes NaN NaN\n",
"yield__kg_per_animal NaN NaN\n",
"yield__tonnes_per_ha 5.6294 5.6298"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year = 2019\n",
"delta = pd.DataFrame()\n",
"delta['current'] = lhs_c.loc[year]\n",
"delta['update'] = rhs_c.loc[year]\n",
"delta"
]
},
{
"cell_type": "markdown",
"id": "c340f79b-da42-44a1-ad4c-ef014dc5f600",
"metadata": {},
"source": [
"**Conclusion**: only `yield__tonnes_per_ha` has changed, and only in the third decimal place"
]
},
{
"cell_type": "markdown",
"id": "11a4a146-607f-45ef-9ebc-7f06d0b698f7",
"metadata": {},
"source": [
"## Maize"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "82b8a163-c9c7-4c5b-8bec-77fc1bb0d1cc",
"metadata": {},
"outputs": [],
"source": [
"lhs = tables[tables.table == 'maize'].load()\n",
"rhs = local_ds['maize']"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "3741ca0d-0c40-493d-a993-60d10dcd399a",
"metadata": {},
"outputs": [],
"source": [
"lhs = lhs.reset_index()\n",
"rhs = rhs.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "5f34e471-c85c-4e25-9322-0e4e4a3d7785",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(lhs.Country.unique() == rhs.Country.unique()).all()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "cff5b106-f2b1-41a2-a89d-bfcfa92753df",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✗ China\n"
]
}
],
"source": [
"for country in lhs.Country.unique():\n",
" lhs_c = lhs[lhs.Country == country].set_index('Year')\n",
" rhs_c = rhs[rhs.Country == country].set_index('Year')\n",
" if not lhs_c.equals(rhs_c):\n",
" print(f'✗ {country}')\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "c2df664e-e766-40ad-8728-9056a215b727",
"metadata": {},
"outputs": [],
"source": [
"assert len(lhs_c) == len(rhs_c)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "3bfe9059-da4c-4bf5-b73e-96f5fe0dedf6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>current</th>\n",
" <th>update</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Product</th>\n",
" <td>Maize</td>\n",
" <td>Maize</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <td>China</td>\n",
" <td>China</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area_harvested__ha</th>\n",
" <td>41280000.0</td>\n",
" <td>41280000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>domestic_supply__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>exports__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>feed__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__fat_g_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__kcal_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__kg_per_year</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__protein_g_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imports__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other_uses__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>population</th>\n",
" <td>1433783692</td>\n",
" <td>1433783692</td>\n",
" </tr>\n",
" <tr>\n",
" <th>producing_or_slaughtered_animals__animals</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>production__tonnes</th>\n",
" <td>260778900.0</td>\n",
" <td>260778900.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>waste_in_supply_chain__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yield__kg_per_animal</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yield__tonnes_per_ha</th>\n",
" <td>6.3171</td>\n",
" <td>6.3173</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" current update\n",
"Product Maize Maize\n",
"Country China China\n",
"area_harvested__ha 41280000.0 41280000.0\n",
"domestic_supply__tonnes NaN NaN\n",
"exports__tonnes NaN NaN\n",
"feed__tonnes NaN NaN\n",
"food__tonnes NaN NaN\n",
"food_available_for_consumption__fat_g_per_day NaN NaN\n",
"food_available_for_consumption__kcal_per_day NaN NaN\n",
"food_available_for_consumption__kg_per_year NaN NaN\n",
"food_available_for_consumption__protein_g_per_day NaN NaN\n",
"imports__tonnes NaN NaN\n",
"other_uses__tonnes NaN NaN\n",
"population 1433783692 1433783692\n",
"producing_or_slaughtered_animals__animals NaN NaN\n",
"production__tonnes 260778900.0 260778900.0\n",
"waste_in_supply_chain__tonnes NaN NaN\n",
"yield__kg_per_animal NaN NaN\n",
"yield__tonnes_per_ha 6.3171 6.3173"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year = 2019\n",
"delta = pd.DataFrame()\n",
"delta['current'] = lhs_c.loc[year]\n",
"delta['update'] = rhs_c.loc[year]\n",
"delta"
]
},
{
"cell_type": "markdown",
"id": "60cf419d-60eb-44a4-b8ab-7ae9ec4c63ec",
"metadata": {},
"source": [
"**Conclusion**: only `yield__tonnes_per_ha` has changed, and only in the fourth decimal place"
]
},
{
"cell_type": "markdown",
"id": "6e758330-6ae7-4ebb-9097-28ff0f23f1f3",
"metadata": {},
"source": [
"## Cereals"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "2656ac8f-92e4-46d7-8810-7ee22747e4d5",
"metadata": {},
"outputs": [],
"source": [
"lhs = tables[tables.table == 'cereals'].load()\n",
"rhs = local_ds['cereals']"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "fdb71639-10fa-4645-8e5d-7e0bbe93b910",
"metadata": {},
"outputs": [],
"source": [
"lhs = lhs.reset_index()\n",
"rhs = rhs.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "26458594-238a-4fab-8eef-9c70d5f1649f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(lhs.Country.unique() == rhs.Country.unique()).all()"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "f103efd9-9215-485e-bb15-d96daafac0e3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✗ Asia\n"
]
}
],
"source": [
"for country in lhs.Country.unique():\n",
" lhs_c = lhs[lhs.Country == country].set_index('Year')\n",
" rhs_c = rhs[rhs.Country == country].set_index('Year')\n",
" if not lhs_c.equals(rhs_c):\n",
" print(f'✗ {country}')\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "da2b2ed3-71df-46b8-bb0d-c237982bb697",
"metadata": {},
"outputs": [],
"source": [
"assert len(lhs_c) == len(rhs_c)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "da0b6e68-7c02-440a-b248-f2581602fd8d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>current</th>\n",
" <th>update</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Product</th>\n",
" <td>Cereals</td>\n",
" <td>Cereals</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <td>Asia</td>\n",
" <td>Asia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area_harvested__ha</th>\n",
" <td>324011026.0</td>\n",
" <td>324315639.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>domestic_supply__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>exports__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>feed__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__fat_g_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__kcal_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__kg_per_year</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>food_available_for_consumption__protein_g_per_day</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imports__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other_uses__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>population</th>\n",
" <td>4517301213</td>\n",
" <td>4517301213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>producing_or_slaughtered_animals__animals</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>production__tonnes</th>\n",
" <td>1400161610.0</td>\n",
" <td>1402139466.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>waste_in_supply_chain__tonnes</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yield__kg_per_animal</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yield__tonnes_per_ha</th>\n",
" <td>4.321339</td>\n",
" <td>4.323379</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" current update\n",
"Product Cereals Cereals\n",
"Country Asia Asia\n",
"area_harvested__ha 324011026.0 324315639.0\n",
"domestic_supply__tonnes NaN NaN\n",
"exports__tonnes NaN NaN\n",
"feed__tonnes NaN NaN\n",
"food__tonnes NaN NaN\n",
"food_available_for_consumption__fat_g_per_day NaN NaN\n",
"food_available_for_consumption__kcal_per_day NaN NaN\n",
"food_available_for_consumption__kg_per_year NaN NaN\n",
"food_available_for_consumption__protein_g_per_day NaN NaN\n",
"imports__tonnes NaN NaN\n",
"other_uses__tonnes NaN NaN\n",
"population 4517301213 4517301213\n",
"producing_or_slaughtered_animals__animals NaN NaN\n",
"production__tonnes 1400161610.0 1402139466.0\n",
"waste_in_supply_chain__tonnes NaN NaN\n",
"yield__kg_per_animal NaN NaN\n",
"yield__tonnes_per_ha 4.321339 4.323379"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year = 2019\n",
"delta = pd.DataFrame()\n",
"delta['current'] = lhs_c.loc[year]\n",
"delta['update'] = rhs_c.loc[year]\n",
"delta"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment