Skip to content

Instantly share code, notes, and snippets.

@calebrob6
Created February 10, 2025 16:50
Show Gist options
  • Save calebrob6/59aff6a92ee39010237e9e16379f4297 to your computer and use it in GitHub Desktop.
Save calebrob6/59aff6a92ee39010237e9e16379f4297 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df_connectivity = pd.read_csv('data/population-coverage-by-mobile-network-technology_1739205819280.csv')\n",
"df_population = pd.read_csv('data/total-population_1739205865536.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>seriesID</th>\n",
" <th>seriesCode</th>\n",
" <th>seriesName</th>\n",
" <th>seriesParent</th>\n",
" <th>seriesUnits</th>\n",
" <th>entityID</th>\n",
" <th>entityIso</th>\n",
" <th>entityName</th>\n",
" <th>dataValue</th>\n",
" <th>dataYear</th>\n",
" <th>dataNote</th>\n",
" <th>dataSource</th>\n",
" <th>seriesDescription</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>194</td>\n",
" <td>i271pop</td>\n",
" <td>At least 2G</td>\n",
" <td>NaN</td>\n",
" <td>%</td>\n",
" <td>4</td>\n",
" <td>ABW</td>\n",
" <td>Aruba</td>\n",
" <td>90.0</td>\n",
" <td>2005</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Refers to the percentage of inhabitants within...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>194</td>\n",
" <td>i271pop</td>\n",
" <td>At least 2G</td>\n",
" <td>NaN</td>\n",
" <td>%</td>\n",
" <td>4</td>\n",
" <td>ABW</td>\n",
" <td>Aruba</td>\n",
" <td>99.0</td>\n",
" <td>2006</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Refers to the percentage of inhabitants within...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" seriesID seriesCode seriesName seriesParent seriesUnits entityID \\\n",
"0 194 i271pop At least 2G NaN % 4 \n",
"1 194 i271pop At least 2G NaN % 4 \n",
"\n",
" entityIso entityName dataValue dataYear dataNote dataSource \\\n",
"0 ABW Aruba 90.0 2005 NaN NaN \n",
"1 ABW Aruba 99.0 2006 NaN NaN \n",
"\n",
" seriesDescription \n",
"0 Refers to the percentage of inhabitants within... \n",
"1 Refers to the percentage of inhabitants within... "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_connectivity.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>seriesID</th>\n",
" <th>seriesCode</th>\n",
" <th>seriesName</th>\n",
" <th>seriesParent</th>\n",
" <th>seriesUnits</th>\n",
" <th>entityID</th>\n",
" <th>entityIso</th>\n",
" <th>entityName</th>\n",
" <th>dataValue</th>\n",
" <th>dataYear</th>\n",
" <th>dataNote</th>\n",
" <th>dataSource</th>\n",
" <th>seriesDescription</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>260</td>\n",
" <td>i61</td>\n",
" <td>Total population</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>ABW</td>\n",
" <td>Aruba</td>\n",
" <td>88761.0</td>\n",
" <td>2000</td>\n",
" <td>NaN</td>\n",
" <td>UN Population Division World Population Prospe...</td>\n",
" <td>Total population</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>260</td>\n",
" <td>i61</td>\n",
" <td>Total population</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>ABW</td>\n",
" <td>Aruba</td>\n",
" <td>90305.0</td>\n",
" <td>2001</td>\n",
" <td>NaN</td>\n",
" <td>UN Population Division World Population Prospe...</td>\n",
" <td>Total population</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" seriesID seriesCode seriesName seriesParent seriesUnits entityID \\\n",
"0 260 i61 Total population NaN NaN 4 \n",
"1 260 i61 Total population NaN NaN 4 \n",
"\n",
" entityIso entityName dataValue dataYear dataNote \\\n",
"0 ABW Aruba 88761.0 2000 NaN \n",
"1 ABW Aruba 90305.0 2001 NaN \n",
"\n",
" dataSource seriesDescription \n",
"0 UN Population Division World Population Prospe... Total population \n",
"1 UN Population Division World Population Prospe... Total population "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_population.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df_connectivity_2020 = df_connectivity[(df_connectivity[\"dataYear\"] == 2020) & (df_connectivity[\"seriesName\"] == \"At least 2G\")]\n",
"df_population_2020 = df_population[(df_population[\"dataYear\"] == 2020)]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((232, 13), (210, 13))"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_population_2020.shape, df_connectivity_2020.shape"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"df_merged = pd.merge(df_population_2020, df_connectivity_2020, on=\"entityIso\", how=\"inner\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(210, 25)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_merged.shape"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df_merged[\"unconnected_pct\"] = ((100 - df_merged[\"dataValue_y\"]) / 100)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(234573691.57819983)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(df_merged[\"dataValue_x\"] * df_merged[\"unconnected_pct\"]).sum()"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"234.57369157819983"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"234573691.57819983 / 1e6"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ihme",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment