Skip to content

Instantly share code, notes, and snippets.

@ricalanis
Created February 21, 2018 17:40
Show Gist options
  • Save ricalanis/05804e58a3d7aefc819f02ed47d7f4f4 to your computer and use it in GitHub Desktop.
Save ricalanis/05804e58a3d7aefc819f02ed47d7f4f4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"page_content = requests.get(\"http://www.imss.gob.mx/directorio/?page=0\").content"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NOMBRE DE LA UNIDAD</th>\n",
" <th>TIPO DE UNIDAD</th>\n",
" <th>DIRECCION</th>\n",
" <th>ESTADO</th>\n",
" <th>TELEFONO</th>\n",
" <th>HORARIO</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>DELEGACIÓN - GUANAJUATODELEGACIÓN</td>\n",
" <td>ADMINISTRATIVAS DELEGACIÓN</td>\n",
" <td>BOULEVARD ADOLFO LÓPEZ MATEOS S/N LEÓN, COL. O...</td>\n",
" <td>GUANAJUATO</td>\n",
" <td>477 7174800 EXT.S/N</td>\n",
" <td>N/D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SUBDELEGACIÓN CIUDAD VALLESSUBDELEGACIÓN</td>\n",
" <td>ADMINISTRATIVAS SUBDELEGACIÓN</td>\n",
" <td>BOULEVARD MÉXICO LAREDO NORTE 934 CIUDAD VALLE...</td>\n",
" <td>SAN LUIS POTOSÍ</td>\n",
" <td>481 3823255 EXT.S/N</td>\n",
" <td>N/D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>HOSPITAL GENERAL DE ZONA (HGZ) 1MÓDULOS DE ATE...</td>\n",
" <td>MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN</td>\n",
" <td>JOSÉ MA. CHÁVEZ NO. 1202 , COL. LINDAVISTA</td>\n",
" <td>AGUASCALIENTES</td>\n",
" <td>NaN</td>\n",
" <td>7:00 - 21:30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>UNIDAD DE MEDICINA FAMILIAR (UMF) 11MÓDULOS DE...</td>\n",
" <td>MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN</td>\n",
" <td>CARR. COSTERA Y ANILLO PERIFÉRICO S/N , COL. C...</td>\n",
" <td>CHIAPAS</td>\n",
" <td>NaN</td>\n",
" <td>8:00 - 16:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>SUBDELEGACIÓN LA PAZSUBDELEGACIÓN</td>\n",
" <td>ADMINISTRATIVAS SUBDELEGACIÓN</td>\n",
" <td>BOULEVARD 5 DE FEBRERO S/N LA PAZ, COL. ZONA C...</td>\n",
" <td>BAJA CALIFORNIA SUR</td>\n",
" <td>612 1220715 EXT.100</td>\n",
" <td>N/D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>HOSPITAL GENERAL DE ZONA CON MEDICINA FAMILIAR...</td>\n",
" <td>MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN</td>\n",
" <td>BLVD. HAROLD R. PAPE S/N , COL. GUADALUPE , MO...</td>\n",
" <td>COAHUILA</td>\n",
" <td>NaN</td>\n",
" <td>8:00-20:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>SUBDELEGACIÓN TORREÓNSUBDELEGACIÓN</td>\n",
" <td>ADMINISTRATIVAS SUBDELEGACIÓN</td>\n",
" <td>BOULEVARD INDEPENDENCIA 450 PONIENTE TORREÓN, ...</td>\n",
" <td>COAHUILA</td>\n",
" <td>871 7127485 EXT.S/N</td>\n",
" <td>N/D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>UNIDAD DE MEDICINA FAMILIAR (UMF) 39MÓDULOS DE...</td>\n",
" <td>MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN</td>\n",
" <td>ESTORNINO Y PROL. RUIZ CORTINES , COL. VALLE V...</td>\n",
" <td>NUEVO LEÓN</td>\n",
" <td>NaN</td>\n",
" <td>8:00 - 16:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>SUB. PONIENTEMÓDULOS DE ATENCIÓN</td>\n",
" <td>MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN</td>\n",
" <td>CARRANZA NO. 1080</td>\n",
" <td>SAN LUIS POTOSÍ</td>\n",
" <td>NaN</td>\n",
" <td>7:30 - 15:30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>DELEGACIÓNMÓDULOS DE ATENCIÓN</td>\n",
" <td>MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN</td>\n",
" <td>CUAUHTÉMOC NO. 255 , COL. MODERNA</td>\n",
" <td>SAN LUIS POTOSÍ</td>\n",
" <td>NaN</td>\n",
" <td>8:00 - 16:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NOMBRE DE LA UNIDAD \\\n",
"0 DELEGACIÓN - GUANAJUATODELEGACIÓN \n",
"1 SUBDELEGACIÓN CIUDAD VALLESSUBDELEGACIÓN \n",
"2 HOSPITAL GENERAL DE ZONA (HGZ) 1MÓDULOS DE ATE... \n",
"3 UNIDAD DE MEDICINA FAMILIAR (UMF) 11MÓDULOS DE... \n",
"4 SUBDELEGACIÓN LA PAZSUBDELEGACIÓN \n",
"5 HOSPITAL GENERAL DE ZONA CON MEDICINA FAMILIAR... \n",
"6 SUBDELEGACIÓN TORREÓNSUBDELEGACIÓN \n",
"7 UNIDAD DE MEDICINA FAMILIAR (UMF) 39MÓDULOS DE... \n",
"8 SUB. PONIENTEMÓDULOS DE ATENCIÓN \n",
"9 DELEGACIÓNMÓDULOS DE ATENCIÓN \n",
"\n",
" TIPO DE UNIDAD \\\n",
"0 ADMINISTRATIVAS DELEGACIÓN \n",
"1 ADMINISTRATIVAS SUBDELEGACIÓN \n",
"2 MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN \n",
"3 MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN \n",
"4 ADMINISTRATIVAS SUBDELEGACIÓN \n",
"5 MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN \n",
"6 ADMINISTRATIVAS SUBDELEGACIÓN \n",
"7 MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN \n",
"8 MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN \n",
"9 MÓDULOS DE ATENCIÓN MÓDULOS DE ATENCIÓN \n",
"\n",
" DIRECCION ESTADO \\\n",
"0 BOULEVARD ADOLFO LÓPEZ MATEOS S/N LEÓN, COL. O... GUANAJUATO \n",
"1 BOULEVARD MÉXICO LAREDO NORTE 934 CIUDAD VALLE... SAN LUIS POTOSÍ \n",
"2 JOSÉ MA. CHÁVEZ NO. 1202 , COL. LINDAVISTA AGUASCALIENTES \n",
"3 CARR. COSTERA Y ANILLO PERIFÉRICO S/N , COL. C... CHIAPAS \n",
"4 BOULEVARD 5 DE FEBRERO S/N LA PAZ, COL. ZONA C... BAJA CALIFORNIA SUR \n",
"5 BLVD. HAROLD R. PAPE S/N , COL. GUADALUPE , MO... COAHUILA \n",
"6 BOULEVARD INDEPENDENCIA 450 PONIENTE TORREÓN, ... COAHUILA \n",
"7 ESTORNINO Y PROL. RUIZ CORTINES , COL. VALLE V... NUEVO LEÓN \n",
"8 CARRANZA NO. 1080 SAN LUIS POTOSÍ \n",
"9 CUAUHTÉMOC NO. 255 , COL. MODERNA SAN LUIS POTOSÍ \n",
"\n",
" TELEFONO HORARIO \n",
"0 477 7174800 EXT.S/N N/D \n",
"1 481 3823255 EXT.S/N N/D \n",
"2 NaN 7:00 - 21:30 \n",
"3 NaN 8:00 - 16:00 \n",
"4 612 1220715 EXT.100 N/D \n",
"5 NaN 8:00-20:00 \n",
"6 871 7127485 EXT.S/N N/D \n",
"7 NaN 8:00 - 16:00 \n",
"8 NaN 7:30 - 15:30 \n",
"9 NaN 8:00 - 16:00 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_html(page_content)[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def extract_lat_lon(datum):\n",
" split1 = datum.split(\"q=\")\n",
" split2 = split1[1].split(\",\")\n",
" lat = split2[0]\n",
" print(split2)\n",
" lon = split2[1].split(\"&\")[0]\n",
" return [lat, lon]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['21.138381', '-101.68694&z=15&iwloc=near&addr']\n",
"['21.996093', '-99.01131&z=15&iwloc=near&addr']\n",
"['21.81093256', '-102.4032585&z=15&iwloc=near&addr']\n",
"['16.32244169', '-91.7910619&z=15&iwloc=near&addr']\n",
"['24.146829', '-110.319533&z=15&iwloc=near&addr']\n",
"['26.9138465', '-101.4283989&z=15&iwloc=near&addr']\n",
"['25.544559', '-103.454207&z=15&iwloc=near&addr']\n",
"['25.7396017', '-100.3819828&z=15&iwloc=near&addr']\n",
"['22.150551', '100.9896561&z=15&iwloc=near&addr']\n",
"['22.1485445', '-100.9863461&z=15&iwloc=near&addr']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system (\"lxml\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n",
"\n",
"The code that caused this warning is on line 193 of the file /usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py. To get rid of this warning, change code that looks like this:\n",
"\n",
" BeautifulSoup([your markup])\n",
"\n",
"to this:\n",
"\n",
" BeautifulSoup([your markup], \"lxml\")\n",
"\n",
" markup_type=markup_type))\n"
]
},
{
"data": {
"text/plain": [
"[['21.138381', '-101.68694'],\n",
" ['21.996093', '-99.01131'],\n",
" ['21.81093256', '-102.4032585'],\n",
" ['16.32244169', '-91.7910619'],\n",
" ['24.146829', '-110.319533'],\n",
" ['26.9138465', '-101.4283989'],\n",
" ['25.544559', '-103.454207'],\n",
" ['25.7396017', '-100.3819828'],\n",
" ['22.150551', '100.9896561'],\n",
" ['22.1485445', '-100.9863461']]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[extract_lat_lon(a[\"href\"]) for a in BeautifulSoup(page_content).find_all(\"table\")[0].find_all(\"a\")]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment