calebrob6 · February 10, 2025 16:50
diff --git a/itu_global_unconnected.ipynb b/itu_global_unconnected.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_connectivity = pd.read_csv('data/population-coverage-by-mobile-network-technology_1739205819280.csv')\n",
    "df_population = pd.read_csv('data/total-population_1739205865536.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>seriesID</th>\n",
       "      <th>seriesCode</th>\n",
       "      <th>seriesName</th>\n",
       "      <th>seriesParent</th>\n",
       "      <th>seriesUnits</th>\n",
       "      <th>entityID</th>\n",
       "      <th>entityIso</th>\n",
       "      <th>entityName</th>\n",
       "      <th>dataValue</th>\n",
       "      <th>dataYear</th>\n",
       "      <th>dataNote</th>\n",
       "      <th>dataSource</th>\n",
       "      <th>seriesDescription</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>194</td>\n",
       "      <td>i271pop</td>\n",
       "      <td>At least 2G</td>\n",
       "      <td>NaN</td>\n",
       "      <td>%</td>\n",
       "      <td>4</td>\n",
       "      <td>ABW</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>90.0</td>\n",
       "      <td>2005</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Refers to the percentage of inhabitants within...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>194</td>\n",
       "      <td>i271pop</td>\n",
       "      <td>At least 2G</td>\n",
       "      <td>NaN</td>\n",
       "      <td>%</td>\n",
       "      <td>4</td>\n",
       "      <td>ABW</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>99.0</td>\n",
       "      <td>2006</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Refers to the percentage of inhabitants within...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   seriesID seriesCode   seriesName  seriesParent seriesUnits  entityID  \\\n",
       "0       194    i271pop  At least 2G           NaN           %         4   \n",
       "1       194    i271pop  At least 2G           NaN           %         4   \n",
       "\n",
       "  entityIso entityName  dataValue  dataYear dataNote dataSource  \\\n",
       "0       ABW      Aruba       90.0      2005      NaN        NaN   \n",
       "1       ABW      Aruba       99.0      2006      NaN        NaN   \n",
       "\n",
       "                                   seriesDescription  \n",
       "0  Refers to the percentage of inhabitants within...  \n",
       "1  Refers to the percentage of inhabitants within...  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_connectivity.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>seriesID</th>\n",
       "      <th>seriesCode</th>\n",
       "      <th>seriesName</th>\n",
       "      <th>seriesParent</th>\n",
       "      <th>seriesUnits</th>\n",
       "      <th>entityID</th>\n",
       "      <th>entityIso</th>\n",
       "      <th>entityName</th>\n",
       "      <th>dataValue</th>\n",
       "      <th>dataYear</th>\n",
       "      <th>dataNote</th>\n",
       "      <th>dataSource</th>\n",
       "      <th>seriesDescription</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>260</td>\n",
       "      <td>i61</td>\n",
       "      <td>Total population</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>ABW</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>88761.0</td>\n",
       "      <td>2000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UN Population Division World Population Prospe...</td>\n",
       "      <td>Total population</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>260</td>\n",
       "      <td>i61</td>\n",
       "      <td>Total population</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>ABW</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>90305.0</td>\n",
       "      <td>2001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>UN Population Division World Population Prospe...</td>\n",
       "      <td>Total population</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   seriesID seriesCode        seriesName  seriesParent  seriesUnits  entityID  \\\n",
       "0       260        i61  Total population           NaN          NaN         4   \n",
       "1       260        i61  Total population           NaN          NaN         4   \n",
       "\n",
       "  entityIso entityName  dataValue  dataYear dataNote  \\\n",
       "0       ABW      Aruba    88761.0      2000      NaN   \n",
       "1       ABW      Aruba    90305.0      2001      NaN   \n",
       "\n",
       "                                          dataSource seriesDescription  \n",
       "0  UN Population Division World Population Prospe...  Total population  \n",
       "1  UN Population Division World Population Prospe...  Total population  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_population.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_connectivity_2020 = df_connectivity[(df_connectivity[\"dataYear\"] == 2020) & (df_connectivity[\"seriesName\"] == \"At least 2G\")]\n",
    "df_population_2020 = df_population[(df_population[\"dataYear\"] == 2020)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((232, 13), (210, 13))"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_population_2020.shape, df_connectivity_2020.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merged = pd.merge(df_population_2020, df_connectivity_2020, on=\"entityIso\", how=\"inner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(210, 25)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_merged.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merged[\"unconnected_pct\"] = ((100 - df_merged[\"dataValue_y\"]) / 100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(234573691.57819983)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(df_merged[\"dataValue_x\"] * df_merged[\"unconnected_pct\"]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "234.57369157819983"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "234573691.57819983 / 1e6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ihme",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_connectivity = pd.read_csv('data/population-coverage-by-mobile-network-technology_1739205819280.csv')\n",
	"df_population = pd.read_csv('data/total-population_1739205865536.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>seriesID</th>\n",
	" <th>seriesCode</th>\n",
	" <th>seriesName</th>\n",
	" <th>seriesParent</th>\n",
	" <th>seriesUnits</th>\n",
	" <th>entityID</th>\n",
	" <th>entityIso</th>\n",
	" <th>entityName</th>\n",
	" <th>dataValue</th>\n",
	" <th>dataYear</th>\n",
	" <th>dataNote</th>\n",
	" <th>dataSource</th>\n",
	" <th>seriesDescription</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>194</td>\n",
	" <td>i271pop</td>\n",
	" <td>At least 2G</td>\n",
	" <td>NaN</td>\n",
	" <td>%</td>\n",
	" <td>4</td>\n",
	" <td>ABW</td>\n",
	" <td>Aruba</td>\n",
	" <td>90.0</td>\n",
	" <td>2005</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>Refers to the percentage of inhabitants within...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>194</td>\n",
	" <td>i271pop</td>\n",
	" <td>At least 2G</td>\n",
	" <td>NaN</td>\n",
	" <td>%</td>\n",
	" <td>4</td>\n",
	" <td>ABW</td>\n",
	" <td>Aruba</td>\n",
	" <td>99.0</td>\n",
	" <td>2006</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>Refers to the percentage of inhabitants within...</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" seriesID seriesCode seriesName seriesParent seriesUnits entityID \\\n",
	"0 194 i271pop At least 2G NaN % 4 \n",
	"1 194 i271pop At least 2G NaN % 4 \n",
	"\n",
	" entityIso entityName dataValue dataYear dataNote dataSource \\\n",
	"0 ABW Aruba 90.0 2005 NaN NaN \n",
	"1 ABW Aruba 99.0 2006 NaN NaN \n",
	"\n",
	" seriesDescription \n",
	"0 Refers to the percentage of inhabitants within... \n",
	"1 Refers to the percentage of inhabitants within... "
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_connectivity.head(2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>seriesID</th>\n",
	" <th>seriesCode</th>\n",
	" <th>seriesName</th>\n",
	" <th>seriesParent</th>\n",
	" <th>seriesUnits</th>\n",
	" <th>entityID</th>\n",
	" <th>entityIso</th>\n",
	" <th>entityName</th>\n",
	" <th>dataValue</th>\n",
	" <th>dataYear</th>\n",
	" <th>dataNote</th>\n",
	" <th>dataSource</th>\n",
	" <th>seriesDescription</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>260</td>\n",
	" <td>i61</td>\n",
	" <td>Total population</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>4</td>\n",
	" <td>ABW</td>\n",
	" <td>Aruba</td>\n",
	" <td>88761.0</td>\n",
	" <td>2000</td>\n",
	" <td>NaN</td>\n",
	" <td>UN Population Division World Population Prospe...</td>\n",
	" <td>Total population</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>260</td>\n",
	" <td>i61</td>\n",
	" <td>Total population</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>4</td>\n",
	" <td>ABW</td>\n",
	" <td>Aruba</td>\n",
	" <td>90305.0</td>\n",
	" <td>2001</td>\n",
	" <td>NaN</td>\n",
	" <td>UN Population Division World Population Prospe...</td>\n",
	" <td>Total population</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" seriesID seriesCode seriesName seriesParent seriesUnits entityID \\\n",
	"0 260 i61 Total population NaN NaN 4 \n",
	"1 260 i61 Total population NaN NaN 4 \n",
	"\n",
	" entityIso entityName dataValue dataYear dataNote \\\n",
	"0 ABW Aruba 88761.0 2000 NaN \n",
	"1 ABW Aruba 90305.0 2001 NaN \n",
	"\n",
	" dataSource seriesDescription \n",
	"0 UN Population Division World Population Prospe... Total population \n",
	"1 UN Population Division World Population Prospe... Total population "
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_population.head(2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_connectivity_2020 = df_connectivity[(df_connectivity[\"dataYear\"] == 2020) & (df_connectivity[\"seriesName\"] == \"At least 2G\")]\n",
	"df_population_2020 = df_population[(df_population[\"dataYear\"] == 2020)]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"((232, 13), (210, 13))"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_population_2020.shape, df_connectivity_2020.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_merged = pd.merge(df_population_2020, df_connectivity_2020, on=\"entityIso\", how=\"inner\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(210, 25)"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_merged.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_merged[\"unconnected_pct\"] = ((100 - df_merged[\"dataValue_y\"]) / 100)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"np.float64(234573691.57819983)"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"(df_merged[\"dataValue_x\"] * df_merged[\"unconnected_pct\"]).sum()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"234.57369157819983"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"234573691.57819983 / 1e6"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "ihme",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.11"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}