Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ricalanis/65c21d15069afb80fc50479f98d374d2 to your computer and use it in GitHub Desktop.
Save ricalanis/65c21d15069afb80fc50479f98d374d2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from bs4 import BeautifulSoup\n",
"import requests\n",
"import pandas\n",
"import simplejson"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def clean_string(input_string):\n",
" input_string =str(input_string)\n",
" input_string =input_string.replace(\"\\xa0\",\"\")\n",
" input_string =input_string.replace(\"\\r\",\"\")\n",
" input_string =input_string.replace(\"\\n\",\"\")\n",
" input_string =input_string.replace(\"<label>\",\"\")\n",
" input_string =input_string.replace(\"</label>\",\"\")\n",
" input_string =input_string.replace(\"</td>\",\"\")\n",
" input_string =input_string.replace(\"</span>\",\"\")\n",
" input_string =input_string.strip()\n",
" lista_string = input_string.split(':') \n",
" return lista_string[1]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def extract_page(url):\n",
" request_pagina = requests.get(url)\n",
" soup_pagina = BeautifulSoup(request_pagina.text)\n",
" data =extract_data(soup_pagina)\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def extract_data(soup_pagina):\n",
" page_table = soup_pagina.find_all('table')\n",
" td_table = page_table[5].find_all('td')\n",
" i = 4\n",
" lista_respuestas = []\n",
" while i < 24:\n",
" lista_respuestas.append(clean_string(td_table[i]))\n",
" i = i + 1\n",
" return lista_respuestas"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_links(soup):\n",
" links_pagina = []\n",
" for link in soup.find_all('a'):\n",
" direccion_link = link.get('href',None)\n",
" try: \n",
" if \"Extra_FlowController_1id\" in direccion_link: \n",
" if direccion_link not in links_pagina:\n",
" links_pagina.append(direccion_link)\n",
" except:\n",
" print(\"\")\n",
" return links_pagina"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system (\"lxml\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n",
"\n",
"The code that caused this warning is on line 193 of the file /usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py. To get rid of this warning, change code that looks like this:\n",
"\n",
" BeautifulSoup([your markup])\n",
"\n",
"to this:\n",
"\n",
" BeautifulSoup([your markup], \"lxml\")\n",
"\n",
" markup_type=markup_type))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-34-1d27adb1a63c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_page\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mlista_registros\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"http://www.cns.gob.mx:80/extraviadosWeb/portals/extraviados.portal?_nfpb=true&_st=&_windowLabel=Extra_FlowController_1&Extra_FlowController_1_actionOverride=%2FConsulta%2FExtra_Flow%2Fsiguientes\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0msoup\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBeautifulSoup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpage_source\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"lxml\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mlinks_pagina\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_links\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msoup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, url)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0mLoads\u001b[0m \u001b[0ma\u001b[0m \u001b[0mweb\u001b[0m \u001b[0mpage\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcurrent\u001b[0m \u001b[0mbrowser\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 263\u001b[0m \"\"\"\n\u001b[0;32m--> 264\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGET\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'url'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrap_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/remote_connection.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, command, params)\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTemplate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubstitute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[0murl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'%s%s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 464\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 465\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 466\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/remote_connection.py\u001b[0m in \u001b[0;36m_request\u001b[0;34m(self, method, url, body)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_conn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparsed_url\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 488\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_conn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 489\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhttplib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mHTTPException\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 490\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_conn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1330\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1332\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1333\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 298\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"driver = webdriver.Chrome()\n",
"driver.get(\"http://www.cns.gob.mx/extraviadosWeb/portals/extraviados.portal\")\n",
"age_element = driver.find_element_by_name(\"Extra_FlowController_1wlw-select_key:{actionForm.edad}\")\n",
"age_element.send_keys(\"Ma\")\n",
"search_element =driver.find_element_by_name(\"Submit\")\n",
"search_element.click()\n",
"soup=BeautifulSoup(driver.page_source,\"lxml\")\n",
"links_pagina = get_links(soup)\n",
"lista_registros = []\n",
"while len(links_pagina)>0:\n",
" for link in links_pagina:\n",
" data = extract_page(link)\n",
" lista_registros.append(data)\n",
" driver.get(\"http://www.cns.gob.mx:80/extraviadosWeb/portals/extraviados.portal?_nfpb=true&_st=&_windowLabel=Extra_FlowController_1&Extra_FlowController_1_actionOverride=%2FConsulta%2FExtra_Flow%2Fsiguientes\")\n",
" soup=BeautifulSoup(driver.page_source,\"lxml\")\n",
" links_pagina = get_links(soup)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['JUAREZGONZALEZ CAROLINA ANDREA',\n",
" '04/11/1973',\n",
" ' 43 años',\n",
" ' 173 cms.',\n",
" ' 70',\n",
" ' ROBUSTA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA CHICA',\n",
" ' MEDIANOS REDONDOS CAFÉ OBSCURO',\n",
" ' NORMAL CASTAÑO OBSCURO LACIO LARGO',\n",
" ' CHICA',\n",
" ' RESPINGADA DELGADA PEQUEÑA',\n",
" ' REGULAR',\n",
" ' TATUADAS RECTAS NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' TRIANGULAR MEDIANO',\n",
" '05/10/2017',\n",
" ' EN SU DOMICILIO',\n",
" 'SALIO DE SU DOMICILIO ALREDEDOR DE LAS 10 DE LA MAÑANA',\n",
" '',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=KshchHvQJT5GGRBPfdB9TKLYX7ZhGGvF64RxTkSGx9pbFJGpQFPP!169856990?photoid=15804&campophoto=PE_BIMG2'],\n",
" ['MONTEFORTHERNANDEZ MARIA DE LA LUZ',\n",
" '17/02/1928',\n",
" ' 89 años',\n",
" ' 162 cms.',\n",
" ' 50',\n",
" ' DELGADA',\n",
" ' BLANCA',\n",
" ' OVALADA CHICA',\n",
" ' PEQUEÑOS OVALADOS CAFÉ CLARO',\n",
" ' ESCASO CANO QUEBRADO CORTO',\n",
" ' MEDIANA',\n",
" ' AFILADA DELGADA MEDIANA',\n",
" ' REGULAR',\n",
" ' ESCASAS RECTAS CASTAÑO',\n",
" ' ROSAS DELGADOS',\n",
" ' CUADRADO MEDIANO',\n",
" '15/09/2017',\n",
" ' SU DOMICILIO',\n",
" 'LA SRA. SALIO DE SU DOMICILIO EN LA CD DE COLIMA',\n",
" 'PELO CANO, VESTIA PANTALON ROSA, BLUSA VERDE, CHAMARRA LILA, BOLSA ROJA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=zZfthHlW1pypFjCfM6nXkbF0762GlJp3p1fPGJLQBC2vyh0TD0rp!1620356072?photoid=15799&campophoto=PE_BIMG2'],\n",
" ['LOPEZROQUE MARIA DE LOS ANGELES',\n",
" '02/08/1996',\n",
" ' 22 años',\n",
" ' 140 cms.',\n",
" ' 60',\n",
" ' ROBUSTA',\n",
" ' MORENA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS OVALADOS NEGROS',\n",
" ' ABUNDANTE NEGRO QUEBRADO MEDIANO',\n",
" ' MEDIANA',\n",
" ' CHATA ANCHA PEQUEÑA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' MORADOS GRUESOS',\n",
" ' TRIANGULAR GRANDE',\n",
" '11/09/2017',\n",
" ' CENTRO DE OAXACA',\n",
" 'SALIO A SU TRABAJO Y NO VOLVIÓ.',\n",
" 'TIENE UN LUNAR GRANDE DE COLOR NEGRO EN LA PIERNA IZQUIERDA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=MJY5hHvBLSh4bCdDr3W4g1CrMLjTGx9D3cGVx1Gn6JDLDVlQKhrl!169856990?photoid=15797&campophoto=PE_BIMG2'],\n",
" ['VALDOVINOSPADILLA SUSANA',\n",
" '26/09/1994',\n",
" ' 24 años',\n",
" ' 157 cms.',\n",
" ' 45',\n",
" ' DELGADA',\n",
" ' BLANCA',\n",
" ' REDONDA MEDIANA',\n",
" ' GRANDES OVALADOS CAFÉ CLARO',\n",
" ' ABUNDANTE CASTAÑO CLARO QUEBRADO LARGO',\n",
" ' GRANDE',\n",
" ' RECTILINEA AGUILEÑA MEDIANA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS LINEAL CASTAÑO',\n",
" ' ROSAS DELGADOS',\n",
" ' REDONDO MEDIANO',\n",
" '30/08/2017',\n",
" ' JIQUILPAN',\n",
" 'LLEVABA LOS NIÑOS ALA ESCUELA Y YA NO VOLVIERON',\n",
" 'LLEVA A 3 MENORES CON ELLA Y TIENE UNA MORDIDA DE UN PERRO EN LABIO SUPERIOR IZQUIERDO ES GUERA SE PINTA EL CABELLO',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=HLJNhHlXlmmYQQr3QQ3rVHmFykt2mSTQGzxtMzlnWW8nVG2Mx5NS!1620356072?photoid=15792&campophoto=PE_BIMG2'],\n",
" ['GONZALEZLINARES JOCABETH',\n",
" '20/08/1987',\n",
" ' 30 años',\n",
" ' 156 cms.',\n",
" ' 60',\n",
" ' MEDIANA',\n",
" ' BLANCA',\n",
" ' REDONDA MEDIANA',\n",
" ' MIEL GRANDES RASGADOS MIEL',\n",
" ' NORMAL NEGRO LACIO LARGO',\n",
" ' MEDIANA',\n",
" ' RECTILINEA ANCHA MEDIANA',\n",
" ' ANGOSTA',\n",
" ' PINTADAS SE DESCONOCE NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' CUADRADO MEDIANO',\n",
" '25/08/2017',\n",
" ' AVENIDA PATRIOTISMO',\n",
" 'SALIO DE TRABAJARA LAS 6 DE LA TARDE Y YA NO LLEGO A CASA.',\n",
" 'LUNAR EN MENTON DE LADO DERECHO',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=lJmwhHvCjGJMdYXLLpFktsldr14qszHSVthZBC2n5mdT3rcTryHB!169856990?photoid=15790&campophoto=PE_BIMG2'],\n",
" ['VILLALOBOS CORDERO MERCEDES',\n",
" '05/08/1997',\n",
" ' 20 años',\n",
" ' 164 cms.',\n",
" ' 65',\n",
" ' DELGADA',\n",
" ' MORENA OSCURA',\n",
" ' REDONDA MEDIANA',\n",
" ' GRANDES REDONDOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE NEGRO CHINO LARGO',\n",
" ' GRANDE',\n",
" ' CHATA ANCHA PEQUEÑA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS SEPARADAS NEGRO',\n",
" ' ROSAS GRUESOS',\n",
" ' REDONDO MEDIANO',\n",
" '09/08/2017',\n",
" ' CALLE ALTAMIRANO',\n",
" 'DISCUSIÓN CON SU CONCUBINO',\n",
" 'OPERACIÓN PARTO POR CESÁREA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=b1v0hHlYlGQR9nVPgYQQBn1kCql7JYQ2WV6v38pz6sbBKLk8KGTP!1620356072?photoid=15782&campophoto=PE_BIMG2'],\n",
" ['MEDINADE LA ROSA IMELDA',\n",
" '02/10/1998',\n",
" ' 18 años',\n",
" ' 170 cms.',\n",
" ' 78',\n",
" ' ROBUSTA',\n",
" ' MORENA CLARA',\n",
" ' REDONDA MEDIANA',\n",
" ' PEQUEÑOS RASGADOS NEGROS',\n",
" ' ABUNDANTE NEGRO CHINO MEDIANO',\n",
" ' GRANDE',\n",
" ' CHATA ANCHA PEQUEÑA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' CUADRANGULAR CHICO',\n",
" '01/08/2017',\n",
" ' EN SU DOMICILIO',\n",
" 'LA NOCHE DEL 31 DE JULIO DEL 2017 TODOS LOS QUE VIVIMOS EN LA CASA NOS RETIRAMOS ADORMIR COMO DE COSTUMBRE Y A LA MAÑANA SIGUIENTE DEL DÍA 01 DE AGOSTO DEL 2017 AL HABLARLE ELLA NO CONTESTO POR LO CUAL DECIDIMOS ENTRAR A SU RECAMARA Y ELLA YA NO ESTABA. ESPERAMOS HABER SI SE COMUNICABA O REGRESABA Y HASTA EL MOMENTO NO SABEMOS NADA DE ELLA.',\n",
" 'RASPONES EN BRAZO IZQUIERDO, HOMBRO IZQUIERDO, PIERNA IZQUIERDA Y TOBILLO IZQUIERDO.',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=v1wGhHvD31c4nxf9dLK21nQRjQhr1Bkd2rhdK1pL5k0QpgD4Rvyx!169856990?photoid=15779&campophoto=PE_BIMG2'],\n",
" ['AGUILARRODRIGUEZ MARCELA ADRIANA',\n",
" '10/01/1981',\n",
" ' 36 años',\n",
" ' 170 cms.',\n",
" ' 65',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS RASGADOS CAFÉ CLARO',\n",
" ' ABUNDANTE CASTAÑO CLARO LACIO LARGO',\n",
" ' MEDIANA',\n",
" ' CHATA DELGADA MEDIANA',\n",
" ' REGULAR',\n",
" ' POBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '28/07/2017',\n",
" ' PUERTO VALLARTA JALISCO',\n",
" 'VIAJO A LA CIUDAD DE MEXICO EL JUEVES 27 PARA IR EL VIERNES 28 AL MUNICIPIO DE JALCOMULCO VERACRUZ CON MIGUEL ANGEL RIZO VARGAS, IBAN A UNA BODA OTRAS CUATRO PERSONAS, DE AHI NO SABEMOS NADA',\n",
" '',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=GVjPhHlZ5gHsygYk0H0zjrVVG58RHbXwhPnNR41LMlQV5kxyNZ2f!1620356072?photoid=15772&campophoto=PE_BIMG2'],\n",
" ['MEZAHUERTA BRENDA VIRIDIANA',\n",
" '26/01/1999',\n",
" ' 18 años',\n",
" ' 160 cms.',\n",
" ' 60',\n",
" ' DELGADA',\n",
" ' MORENA',\n",
" ' REDONDA CHICA',\n",
" ' PEQUEÑOS RASGADOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE NEGRO LACIO LARGO',\n",
" ' CHICA',\n",
" ' CHATA DELGADA PEQUEÑA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS LINEAL NEGRO',\n",
" ' ROSAS DELGADOS',\n",
" ' REDONDO CHICO',\n",
" '24/07/2017',\n",
" ' CASA DE SU ABUELITA',\n",
" 'SALIO DE CASA DE SU ABUELITA EN UN COCHE JETTA AZUL CON N° DE PLACAS XWZ2920',\n",
" 'UN TATUAJE EN UN COSTADO DE LA CINTURA DEL LADO DERECHO EN FORMA DE DIAMANTE CON UN INFINITO',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=Lys8hHvGnKnlC2Mh17dyRG7f3V1SKdy9Cgpy1XyGK920wMhWH5qG!169856990?photoid=15764&campophoto=PE_BIMG2'],\n",
" ['MARTINEZCAMARGO ARACELI',\n",
" '18/04/1976',\n",
" ' 41 años',\n",
" ' 150 cms.',\n",
" ' 50',\n",
" ' MEDIANA',\n",
" ' MORENA',\n",
" ' CUADRADA GRANDE',\n",
" ' MEDIANOS RASGADOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE NEGRO QUEBRADO LARGO',\n",
" ' MEDIANA',\n",
" ' AFILADA AGUILEÑA MEDIANA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS DELGADOS',\n",
" ' REDONDO MEDIANO',\n",
" '09/07/2017',\n",
" ' CIUDAD DE MEXICO',\n",
" 'EL DOMINGO ME FUI A LA IGLESIA TEMPRANO Y CUANDO REGRESE ALREDEDOR DE LAS 12',\n",
" 'NINGUNA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=1ZQphHlc2hDFnQzDjq0YhT7ZYCQZH1Zfhxf64MPFQqbQnR7gynRf!1620356072?photoid=15749&campophoto=PE_BIMG2'],\n",
" ['MENDOZACAMPOS LUISA MARIA',\n",
" '03/12/1993',\n",
" ' 23 años',\n",
" ' 154 cms.',\n",
" ' 75',\n",
" ' ROBUSTA',\n",
" ' MORENA CLARA',\n",
" ' REDONDA MEDIANA',\n",
" ' MEDIANOS RASGADOS CAFÉ OBSCURO',\n",
" ' NORMAL CASTAÑO OBSCURO QUEBRADO MEDIANO',\n",
" ' MEDIANA',\n",
" ' CHATA BOLA MEDIANA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS RECTAS NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '26/06/2017',\n",
" ' EN SU CASA ANTES DE QUE SALIERA A LA ESCUELA',\n",
" 'SUBIÓ A LA COMBI EN LA PARADA DE LOS ACEITES CON RUMBO A LA UNIVERSIDAD, TECNOLOGICO UNIVERSITARIO NAUCALPAN, NO LLEGO A LA ESCUELA',\n",
" 'TATUAJE EN MUÑECA IZQUIERDA DE UNA CRUZ Y EL NOMBRE \"DAVID\"',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=5sTYhHvH7qw5t1y7f5Xx6xcZCTv8YR1PNyJhGb0Qg8FPzfl6Bxwd!169856990?photoid=15730&campophoto=PE_BIMG2'],\n",
" ['GUERREROROSALES LUZ DANIELA',\n",
" '16/02/1999',\n",
" ' 18 años',\n",
" ' 158 cms.',\n",
" ' 65',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' REDONDA CHICA',\n",
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE CASTAÑO CLARO LACIO LARGO',\n",
" ' MEDIANA',\n",
" ' AFILADA ANCHA MEDIANA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS DELGADOS',\n",
" ' TRIANGULAR MEDIANO',\n",
" '16/06/2017',\n",
" ' SE LE VIO AFUERA DE SU DOMICILIO',\n",
" 'ESE DIA SALIO A LA PLAZA DE LA COLONIA EL DORADO DE GOMEZ PALACIO DURANGO PARA VER A UN NOVIO QUE TRAIA DE NOMBRE SALVADOR BALTIERRE HERNANDEZ DE 20 AÑOS DE EDAD OJOS BORRADOS Y QUE YA ESE DIA NO REGRESO A LA CASA Y SE DESCONOCE EL PARADERO DE AMBOS',\n",
" '',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=plyMhHld1mtzpbYvCQkGbmcrZYLKTPQ1pkX3y6ByhsvKnLyDx061!1620356072?photoid=15768&campophoto=PE_BIMG2'],\n",
" ['GABRIELGONZALEZ MARIA ELENA',\n",
" '01/11/1985',\n",
" ' 31 años',\n",
" ' 157 cms.',\n",
" ' 70',\n",
" ' ROBUSTA',\n",
" ' MORENA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS RASGADOS CAFE OBSCURO',\n",
" ' NORMAL NEGRO QUEBRADO MEDIANO',\n",
" ' MEDIANA',\n",
" ' AFILADA DELGADA MEDIANA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS HACIA ABAJO NEGRO',\n",
" ' ROJOS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '06/06/2017',\n",
" ' CONSTITUCION DE LA REPUBLICA',\n",
" 'SE QUEDO EN LA ESQUINA DE AV. PINOS ESPERANDO TRANSPORTE PUBLICO PARA IR A REALIZAR TRAMITE DE PAPELES ESCOLARES, QUEDO EN COMUNICARSE MÁS TARDE Y NO LO HIZO',\n",
" 'CICATRIZ LABIO SUPERIOR DERECHO,CICATRIZ EN PECHO CASI A LA ALTURA DEL CUELLO,TATUAJE EN OMBLIGO DE FLORES, TATUAJE EN TOBILLO DERECHO',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=SGCVhHvLnJPyN1qHlgJ7yDsnMrnSGphSvkZX2GG0MZsTvh9RVC1N!169856990?photoid=15714&campophoto=PE_BIMG2'],\n",
" ['ALVAREZ DIAZ MIREYA JAZMIN',\n",
" '16/03/1992',\n",
" ' 25 años',\n",
" ' 155 cms.',\n",
" ' 50',\n",
" ' ROBUSTA',\n",
" ' BLANCA',\n",
" ' REDONDA MEDIANA',\n",
" ' MEDIANOS REDONDOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE CASTAÑO CLARO LACIO MEDIANO',\n",
" ' MEDIANA',\n",
" ' CHATA ANCHA MEDIANA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS LINEAL CASTAÑO',\n",
" ' ROSAS DELGADOS',\n",
" ' REDONDO MEDIANO',\n",
" '01/06/2017',\n",
" ' SAN MATEO ATARASQUILLO',\n",
" 'SE DESPARECIO DE MI DOMICILIO CON MI MENOR HIJO.',\n",
" 'LUNAR POMULO DERECHO.',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=0vrjhHlpmnDSChZqyy61QV3mMLWyZC8RPXxsr3YLh61x1QhnNV5X!1620356072?photoid=15710&campophoto=PE_BIMG2'],\n",
" ['MOTAMARTINEZ MAYRA MICHELL',\n",
" '29/12/1983',\n",
" ' 33 años',\n",
" ' 150 cms.',\n",
" ' 60',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA CHICA',\n",
" ' MEDIANOS RASGADOS CAFÉ CLARO',\n",
" ' NORMAL CASTAÑO CLARO LACIO MEDIANO',\n",
" ' MEDIANA',\n",
" ' RECTILINEA BOLA MEDIANA',\n",
" ' REGULAR',\n",
" ' POBLADAS HACIA ARRIBA NEGRO',\n",
" ' SE DESCONOCE GRUESOS',\n",
" ' TRIANGULAR CHICO',\n",
" '14/05/2017',\n",
" ' TIJUANA BAJA CALIFORNIA',\n",
" 'EL DÍA DOMINGO 14 DE MAYO DEL PRESENTE AÑO, SALIÓ DE SU DOMICILIO UBICADO EN LA COLONIA BUENOS AIRES NORTE DE LA CIUDAD DE TIJUANA BAJA CALIFORNIA, DESCONOCIÉNDOSE CON QUIEN Y A DONDE SE DIRIGIÓ, PERO TRANSMITIÓ UN VÍDEO EN VIVO POR FACEBOOK DESDE UN ANTRO LLAMADO COPEO A LAS 01',\n",
" 'CUENTA CON DOS LUNARES COLOR NEGRO, UNO DEBAJO DEL PECHO A LA ALTURA DE LA CLAVÍCULA Y EL OTRO EN EL LADO DERECHO DEL CUELLO.',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=QhfnhHvJngHypTHvvSn0JdwJdqvxD8zqBXcN7pJh0hbdNsJpS1Q1!169856990?photoid=15758&campophoto=PE_BIMG2'],\n",
" ['ALEJOBERNAL GLORIA',\n",
" '22/10/1968',\n",
" ' 48 años',\n",
" ' 150 cms.',\n",
" ' 48',\n",
" ' DELGADA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n",
" ' NORMAL NEGRO OTRO LARGO',\n",
" ' GRANDE',\n",
" ' DELGADA DELGADA MEDIANA',\n",
" ' REGULAR',\n",
" ' POBLADAS SEPARADAS CASTAÑO',\n",
" ' ROSAS GRUESOS',\n",
" ' REDONDO MEDIANO',\n",
" '04/05/2017',\n",
" ' EN SU TRABAJO UBICADO EN CALLE LAGUNA LUNA',\n",
" 'SALIO DE SU TRABAJO Y NO LLEGO A SU CASA AL PARECER RECIBIO UNA LLAMADA PERO NO SE SABE DE QUIEN',\n",
" 'CICATRIZ QUIRURGICA EN EMPEINE PIE DERECHO ,LUNAR ROJO TIPO MANCHA EN BRAZO IZQUIERDO Y MANCHA BLANCA EN VIENTRE HACIA EL AREA INGUINAL',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/Consulta/Extra_Flow/./../../Consulta/resources/images/silueta2.jpg;jsessionid=r3kyhHlfpJ7Mwjv1m3vmGRTGD92gLJ1P2b2279XC77n10G1QHTV7!1620356072'],\n",
" ['LOPEZSILVA ANA ROSA',\n",
" '30/08/1989',\n",
" ' 27 años',\n",
" ' 160 cms.',\n",
" ' 58',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS OVALADOS NEGROS',\n",
" ' ABUNDANTE NEGRO LACIO MEDIANO',\n",
" ' CHICA',\n",
" ' CHATA DELGADA MEDIANA',\n",
" ' AMPLIA',\n",
" ' PINTADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS DELGADOS',\n",
" ' REDONDO MEDIANO',\n",
" '02/05/2017',\n",
" ' GUERRERO',\n",
" 'EL 02 DE MAYO DE 2017, ANA SALIÓ DE CASA DE SU TÍA UBICADA EN ACAPULCO, GUERRERO, PARA VER A SUS MEDIAS HERMANAS QUE VIVEN EN EL MISMO MUNICIPIO, EN COMPAÑÍA DE SU MENOR HIJO, DESDE ENTONCES SE DESCONOCE SU PARADERO',\n",
" 'CLAVO QUIRÚRGICO EN EL PIE DERECHO, CICATRIZ DE CESARÍA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=j1CrhHvKvfcDnqdtk1HnPttWZpJQQpBhXx6QhsM5G0H9tmXzcJxQ!169856990?photoid=15692&campophoto=PE_BIMG2'],\n",
" ['TOLENTINOGARCIA MARGARITA',\n",
" '12/08/1930',\n",
" ' 87 años',\n",
" ' 120 cms.',\n",
" ' 45',\n",
" ' DELGADA',\n",
" ' MORENA CLARA',\n",
" ' CUADRADA MEDIANA',\n",
" ' PEQUEÑOS RASGADOS NEGROS',\n",
" ' NORMAL NEGRO LACIO MEDIANO',\n",
" ' MEDIANA',\n",
" ' REDONDA ANCHA MEDIANA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' TRIANGULAR CHICO',\n",
" '07/04/2017',\n",
" ' INTERIOR DEL METRO MERCED EN DIRECCION A PANTITLAN',\n",
" 'AL SUBIR AL METRO EN MERCED EN DIRECCION A PANTITLAN ELLA SE SUBIO AL METRO Y YO NO PUDE SUBIRME POR CUIDAR A OTROS FAMILIARES CERRANDOSE LAS PUERTAS Y RETIRARSE EL METRO',\n",
" 'LUNAR ABAJO DEL OJO LADO DERECHO, PELO NEGRO UN POCO CANOSO VESTIA CON VESTIDO COLOR GRIS CON DETALLES FLOREADOS COLOR NEGRO, ZAPATOS D PLASTI NEGROS',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=qGYXhHlQTj1Q5X1JgqdGLNBRBLyg2RNpqhMppZGS1SBD10DqBHq3!1620356072?photoid=15661&campophoto=PE_BIMG2'],\n",
" ['PEREZVELAZQUEZ GLORIA',\n",
" '01/05/1952',\n",
" ' 64 años',\n",
" ' 150 cms.',\n",
" ' 50',\n",
" ' DELGADA',\n",
" ' MORENA CLARA',\n",
" ' CUADRADA GRANDE',\n",
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n",
" ' NORMAL NEGRO LACIO CORTO',\n",
" ' MEDIANA',\n",
" ' CHATA ANCHA MEDIANA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS RECTAS NEGRO',\n",
" ' SE DESCONOCE MEDIANOS',\n",
" ' TRIANGULAR MEDIANO',\n",
" '21/03/2017',\n",
" ' CIUDAD DE MEXICO',\n",
" 'SE PIDE EL APOYO PARA LOCALIZAR A LA C. GLORIA PÉREZ VELAZQUEZ QUIEN SE ENCUENTRA DESAPARECIDA DESDE EL MARTES 21 DE MARZO DE 2017 POR LA TARDE.',\n",
" '',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=LTS2hHvMGKvnly6Mg26t88H4zSf2QBBxKxrhnDvMphtGpqyGdfyc!169856990?photoid=15646&campophoto=PE_BIMG2'],\n",
" ['JIMENEZ SOSA MINU HAYDEE',\n",
" '13/05/1976',\n",
" ' 41 años',\n",
" ' 169 cms.',\n",
" ' 58',\n",
" ' DELGADA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA MEDIANA',\n",
" ' PEQUEÑOS RASGADOS CAFE OBSCURO',\n",
" ' ABUNDANTE CASTAÑO OBSCURO QUEBRADO MEDIANO',\n",
" ' MEDIANA',\n",
" ' RECTILINEA BOLA AMPLIA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' MORADOS MEDIANOS',\n",
" ' CUADRANGULAR MEDIANO',\n",
" '16/03/2017',\n",
" ' CALLE TOLTECAS',\n",
" 'SE DIRIGIA A LA CENTRAL CAMIONERA SAN LAZARO Y YA NO SE VOLVIO A SABER DE ELLA',\n",
" '',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=sjlKhHlCQlXwbqRLlwGZcyvHGLk1yQ96f2hDZtY2Q5GqQk8Lcyw1!1620356072?photoid=15706&campophoto=PE_BIMG2'],\n",
" ['PARRAOLVERA LIZBETH YADIRA',\n",
" '29/06/1974',\n",
" ' 42 años',\n",
" ' 157 cms.',\n",
" ' 60',\n",
" ' MEDIANA',\n",
" ' APIÑONADA',\n",
" ' REDONDA MEDIANA',\n",
" ' GRANDES RASGADOS CAFÉ OBSCURO',\n",
" ' NORMAL CASTAÑO OBSCURO LACIO MEDIANO',\n",
" ' MEDIANA',\n",
" ' CHATA ANCHA MEDIANA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '14/03/2017',\n",
" ' OLIVAR DEL CONDE',\n",
" 'SALIO DE SU CASA PARA RECOGER UNAS COSAS EN EL TOREO',\n",
" 'NINGUNA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=fMRshHvN9nCnYcg5yxtJpPCdvDNGM5dgS3Jp6TGKJk2f11vJX3Kb!169856990?photoid=15639&campophoto=PE_BIMG2'],\n",
" ['SANCHEZPEREZ CARMEN IVETTE',\n",
" '11/06/1992',\n",
" ' 23 años',\n",
" ' 163 cms.',\n",
" ' 62',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS REDONDOS CAFÉ CLARO',\n",
" ' NORMAL NEGRO QUEBRADO MEDIANO',\n",
" ' MEDIANA',\n",
" ' RECTILINEA AGUILEÑA MEDIANA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS GRUESOS',\n",
" ' REDONDO MEDIANO',\n",
" '07/03/2017',\n",
" ' SAN LUIS POTOSI',\n",
" 'EL DÍA 07 DE MARZO DE 2017, CARMEN RECIBIÓ UNA LLAMADA A SU CELULAR, LA CUAL CONTESTÓ EN SU RECAMARA, TERMINANDO DE ALMORZAR SALIÓ DE SU CASA DICIENDO QUE IBA AL GIMNASIO. COMENTAN QUE NO LA VIERON LLEGAR A ESE LUGAR. DESDE ENTONCES SE DESCONOCE SU PARADERO <br/>',\n",
" 'TIENE BRAQUETES Y PECAS EN LOS PÓMULOS',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=CTXyhHlD1QcGjVtWzBLdgdmrvLsjLfNTs2NftCFXQwDYk3BJ1yfp!1620356072?photoid=15617&campophoto=PE_BIMG2'],\n",
" ['CHAVARRIA CARRILLO ITZEL ANAHI',\n",
" '10/08/1993',\n",
" ' 23 años',\n",
" ' 145 cms.',\n",
" ' 70',\n",
" ' MEDIANA',\n",
" ' BLANCA',\n",
" ' REDONDA MEDIANA',\n",
" ' MEDIANOS RASGADOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE CASTAÑO OBSCURO LACIO MEDIANO',\n",
" ' MEDIANA',\n",
" ' CHATA AGUILEÑA PEQUEÑA',\n",
" ' AMPLIA',\n",
" ' ESCASAS HACIA ABAJO CASTAÑO',\n",
" ' ROJOS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '06/03/2017',\n",
" ' DISTRITO FEDERAL',\n",
" 'EL DÍA 06 DE MARZO SALIÓ DE SU TRABAJO CON RUMBO A SU CASA LA CUAL QUEDA A POCAS CUADRAS, PERO NUNCA LLEGÓ. DESDE ENTONCES SE DESCONOCE SU PARADERO',\n",
" 'TIENE EL CABELLO PINTADO CON RAYITOS RUBIOS',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=071ThHvNM0sBhmh6jDF9SGhqV865bjHM9GydjhppRylTFh1mMDZQ!169856990?photoid=15614&campophoto=PE_BIMG2'],\n",
" ['PORTILLOMEJIA JUANA EDITH',\n",
" '24/06/1989',\n",
" ' 27 años',\n",
" ' 160 cms.',\n",
" ' 70',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA CHICA',\n",
" ' PEQUEÑOS RASGADOS CAFÉ OBSCURO',\n",
" ' ABUNDANTE NEGRO QUEBRADO LARGO',\n",
" ' CHICA',\n",
" ' RECTILINEA DELGADA PEQUEÑA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS SEPARADAS NEGRO',\n",
" ' ROJOS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '06/03/2017',\n",
" ' AL SALIR DE SU DOMICILIO ANTES MENCIONADO',\n",
" 'EL PASADO 6 DE MARZO SALIÓ DE SU DOMICILIO RUMBO AL TRABAJO A LAS 7',\n",
" 'TATUAJE EN MEDIO DE LA ESPALDA EN LA PARTE ALTA CON LA FIGURA DE UNA MARIPOSA CON LOS NOMBRES DE MELANIE Y MIGUEL',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/Consulta/Extra_Flow/./../../Consulta/resources/images/silueta2.jpg;jsessionid=nnp1hHlGKjgvvT50n2pytCTHJL12BJ4GRTbMGzMFl1zLvVpmc6RQ!1620356072'],\n",
" ['MANDUJANOMEDINA LOURDES',\n",
" '28/02/1962',\n",
" ' 54 años',\n",
" ' 154 cms.',\n",
" ' 74',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' OVALADA MEDIANA',\n",
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n",
" ' NORMAL CASTAÑO OBSCURO LACIO CORTO',\n",
" ' MEDIANA',\n",
" ' CHATA AGUILEÑA MEDIANA',\n",
" ' REGULAR',\n",
" ' SEMIPOBLADAS HACIA ARRIBA CASTAÑO',\n",
" ' ROSAS MEDIANOS',\n",
" ' REDONDO MEDIANO',\n",
" '24/02/2017',\n",
" ' CALLE TAURO EN LA PARADA DEL MICRO',\n",
" 'SALIA CON DIRECCIÓN A HGO',\n",
" 'PAÑO EN LA CARA',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/Consulta/Extra_Flow/./../../Consulta/resources/images/silueta2.jpg;jsessionid=XpGghHvPVqQL5fZG532M69yrysVvVPncnZ1B9j3tj3K1JFlLPx84!169856990'],\n",
" ['LARALOPEZ MARIA DE LOS ANGELES',\n",
" '02/09/1986',\n",
" ' 30 años',\n",
" ' 170 cms.',\n",
" ' 100',\n",
" ' ROBUSTA',\n",
" ' BLANCA',\n",
" ' REDONDA GRANDE',\n",
" ' GRANDES OVALADOS CAFÉ OBSCURO',\n",
" ' NORMAL RUBIO LACIO CORTO',\n",
" ' MEDIANA',\n",
" ' CHATA ANCHA MEDIANA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS HACIA ARRIBA CASTAÑO',\n",
" ' ROJOS DELGADOS',\n",
" ' REDONDO CHICO',\n",
" '23/02/2017',\n",
" ' SALIENDO DE SU DOMICILIO',\n",
" 'LLEGO DE TRABAJAR COMO A LAS 17',\n",
" 'CICATRIZ EN LABIO SUPERIOR PEQUENA, CICATRIZ DE CESAREA, MANCHAS EN LA PIEL',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=P2XvhHlFyJ5JrJ8pTGJGbPzn4bkPQvHjJfNVDsZVhhfpqDlZGN4C!1620356072?photoid=15603&campophoto=PE_BIMG2'],\n",
" ['VIZCARRA LEON ALEJANDRA',\n",
" '26/11/1993',\n",
" ' 22 años',\n",
" ' 148 cms.',\n",
" ' 52',\n",
" ' MEDIANA',\n",
" ' MORENA CLARA',\n",
" ' REDONDA CHICA',\n",
" ' PEQUEÑOS RASGADOS CAFÉ OBSCURO',\n",
" ' NORMAL CASTAÑO OBSCURO LACIO CORTO',\n",
" ' CHICA',\n",
" ' RECTILINEA ANCHA AMPLIA',\n",
" ' AMPLIA',\n",
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n",
" ' ROSAS MEDIANOS',\n",
" ' TRIANGULAR CHICO',\n",
" '16/02/2017',\n",
" ' TOWN CENTER ZUMPANGO',\n",
" 'SE TUVO CONTACTO CON ELLA POR MENSAJE HASTA LAS 3',\n",
" 'TATUAJE EN BRAZO IZQUIERDO, (\"AYE\"), PERFORACIÓN EN LA NARIZ Y EN LA CADERA TATUAJE DE UNA FLOR SIN TERMINAR.',\n",
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=DFVthHvQYB40WP2NdJGj9y22hM1QyTybQMtQHr2vR3tTQyvBffqx!169856990?photoid=15607&campophoto=PE_BIMG2']]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lista_registros"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def clean_string(input_string):\n",
" input_string =str(input_string)\n",
" input_string =input_string.replace(\"\\xa0\",\"\")\n",
" input_string =input_string.replace(\"\\r\",\"\")\n",
" input_string =input_string.replace(\"\\n\",\"\")\n",
" input_string =input_string.replace(\"<label>\",\"\")\n",
" input_string =input_string.replace(\"</label>\",\"\")\n",
" input_string =input_string.replace(\"</td>\",\"\")\n",
" input_string =input_string.replace(\"</span>\",\"\")\n",
" input_string =input_string.strip()\n",
" lista_string = input_string.split(':') \n",
" return lista_string[1]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def extract_page(url):\n",
" request_pagina = requests.get(url)\n",
" soup_pagina = BeautifulSoup(request_pagina.text)\n",
" data =extract_data(soup_pagina)\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def extract_data(soup_pagina):\n",
" page_table = soup_pagina.find_all('table')\n",
" td_table = page_table[5].find_all('td')\n",
" i = 4\n",
" lista_respuestas = []\n",
" while i < 24:\n",
" lista_respuestas.append(clean_string(td_table[i]))\n",
" i = i + 1\n",
" imgs= soup_pagina.find_all('img')\n",
" girl_photo = \"http://www.cns.gob.mx\"+soup_pagina.find_all('img')[37][\"src\"]\n",
" lista_respuestas.append(girl_photo)\n",
" return lista_respuestas"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_links(soup):\n",
" links_pagina = []\n",
" for link in soup.find_all('a'):\n",
" direccion_link = link.get('href',None)\n",
" try: \n",
" if \"Extra_FlowController_1id\" in direccion_link: \n",
" if direccion_link not in links_pagina:\n",
" links_pagina.append(direccion_link)\n",
" except:\n",
" print(\"\")\n",
" return links_pagina"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df_lista_registros = pandas.DataFrame(lista_registros)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df_lista_registros.to_csv(\"mujeres_ninas_mayores.csv\", quoting=csv.QUOTE_ALL)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-9-7be2c36cba81>, line 1)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-9-7be2c36cba81>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m lista = [<td class=\"bea-portal-layout-placeholder-container\">\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment