Created
November 11, 2017 20:49
-
-
Save ricalanis/65c21d15069afb80fc50479f98d374d2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from selenium import webdriver\n", | |
"from selenium.webdriver.common.keys import Keys\n", | |
"from bs4 import BeautifulSoup\n", | |
"import requests\n", | |
"import pandas\n", | |
"import simplejson" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def clean_string(input_string):\n", | |
" input_string =str(input_string)\n", | |
" input_string =input_string.replace(\"\\xa0\",\"\")\n", | |
" input_string =input_string.replace(\"\\r\",\"\")\n", | |
" input_string =input_string.replace(\"\\n\",\"\")\n", | |
" input_string =input_string.replace(\"<label>\",\"\")\n", | |
" input_string =input_string.replace(\"</label>\",\"\")\n", | |
" input_string =input_string.replace(\"</td>\",\"\")\n", | |
" input_string =input_string.replace(\"</span>\",\"\")\n", | |
" input_string =input_string.strip()\n", | |
" lista_string = input_string.split(':') \n", | |
" return lista_string[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def extract_page(url):\n", | |
" request_pagina = requests.get(url)\n", | |
" soup_pagina = BeautifulSoup(request_pagina.text)\n", | |
" data =extract_data(soup_pagina)\n", | |
" return data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def extract_data(soup_pagina):\n", | |
" page_table = soup_pagina.find_all('table')\n", | |
" td_table = page_table[5].find_all('td')\n", | |
" i = 4\n", | |
" lista_respuestas = []\n", | |
" while i < 24:\n", | |
" lista_respuestas.append(clean_string(td_table[i]))\n", | |
" i = i + 1\n", | |
" return lista_respuestas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_links(soup):\n", | |
" links_pagina = []\n", | |
" for link in soup.find_all('a'):\n", | |
" direccion_link = link.get('href',None)\n", | |
" try: \n", | |
" if \"Extra_FlowController_1id\" in direccion_link: \n", | |
" if direccion_link not in links_pagina:\n", | |
" links_pagina.append(direccion_link)\n", | |
" except:\n", | |
" print(\"\")\n", | |
" return links_pagina" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system (\"lxml\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n", | |
"\n", | |
"The code that caused this warning is on line 193 of the file /usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py. To get rid of this warning, change code that looks like this:\n", | |
"\n", | |
" BeautifulSoup([your markup])\n", | |
"\n", | |
"to this:\n", | |
"\n", | |
" BeautifulSoup([your markup], \"lxml\")\n", | |
"\n", | |
" markup_type=markup_type))\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-34-1d27adb1a63c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_page\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mlista_registros\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"http://www.cns.gob.mx:80/extraviadosWeb/portals/extraviados.portal?_nfpb=true&_st=&_windowLabel=Extra_FlowController_1&Extra_FlowController_1_actionOverride=%2FConsulta%2FExtra_Flow%2Fsiguientes\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0msoup\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBeautifulSoup\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpage_source\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"lxml\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mlinks_pagina\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_links\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msoup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, url)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0mLoads\u001b[0m \u001b[0ma\u001b[0m \u001b[0mweb\u001b[0m \u001b[0mpage\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcurrent\u001b[0m \u001b[0mbrowser\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 263\u001b[0m \"\"\"\n\u001b[0;32m--> 264\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGET\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'url'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wrap_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 251\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/remote_connection.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, command, params)\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTemplate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msubstitute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 463\u001b[0m \u001b[0murl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'%s%s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 464\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 465\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 466\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/remote_connection.py\u001b[0m in \u001b[0;36m_request\u001b[0;34m(self, method, url, body)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_conn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparsed_url\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 488\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_conn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 489\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mhttplib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mHTTPException\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 490\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_conn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1330\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1332\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1333\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 298\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"driver = webdriver.Chrome()\n", | |
"driver.get(\"http://www.cns.gob.mx/extraviadosWeb/portals/extraviados.portal\")\n", | |
"age_element = driver.find_element_by_name(\"Extra_FlowController_1wlw-select_key:{actionForm.edad}\")\n", | |
"age_element.send_keys(\"Ma\")\n", | |
"search_element =driver.find_element_by_name(\"Submit\")\n", | |
"search_element.click()\n", | |
"soup=BeautifulSoup(driver.page_source,\"lxml\")\n", | |
"links_pagina = get_links(soup)\n", | |
"lista_registros = []\n", | |
"while len(links_pagina)>0:\n", | |
" for link in links_pagina:\n", | |
" data = extract_page(link)\n", | |
" lista_registros.append(data)\n", | |
" driver.get(\"http://www.cns.gob.mx:80/extraviadosWeb/portals/extraviados.portal?_nfpb=true&_st=&_windowLabel=Extra_FlowController_1&Extra_FlowController_1_actionOverride=%2FConsulta%2FExtra_Flow%2Fsiguientes\")\n", | |
" soup=BeautifulSoup(driver.page_source,\"lxml\")\n", | |
" links_pagina = get_links(soup)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[['JUAREZGONZALEZ CAROLINA ANDREA',\n", | |
" '04/11/1973',\n", | |
" ' 43 años',\n", | |
" ' 173 cms.',\n", | |
" ' 70',\n", | |
" ' ROBUSTA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA CHICA',\n", | |
" ' MEDIANOS REDONDOS CAFÉ OBSCURO',\n", | |
" ' NORMAL CASTAÑO OBSCURO LACIO LARGO',\n", | |
" ' CHICA',\n", | |
" ' RESPINGADA DELGADA PEQUEÑA',\n", | |
" ' REGULAR',\n", | |
" ' TATUADAS RECTAS NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' TRIANGULAR MEDIANO',\n", | |
" '05/10/2017',\n", | |
" ' EN SU DOMICILIO',\n", | |
" 'SALIO DE SU DOMICILIO ALREDEDOR DE LAS 10 DE LA MAÑANA',\n", | |
" '',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=KshchHvQJT5GGRBPfdB9TKLYX7ZhGGvF64RxTkSGx9pbFJGpQFPP!169856990?photoid=15804&campophoto=PE_BIMG2'],\n", | |
" ['MONTEFORTHERNANDEZ MARIA DE LA LUZ',\n", | |
" '17/02/1928',\n", | |
" ' 89 años',\n", | |
" ' 162 cms.',\n", | |
" ' 50',\n", | |
" ' DELGADA',\n", | |
" ' BLANCA',\n", | |
" ' OVALADA CHICA',\n", | |
" ' PEQUEÑOS OVALADOS CAFÉ CLARO',\n", | |
" ' ESCASO CANO QUEBRADO CORTO',\n", | |
" ' MEDIANA',\n", | |
" ' AFILADA DELGADA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' ESCASAS RECTAS CASTAÑO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' CUADRADO MEDIANO',\n", | |
" '15/09/2017',\n", | |
" ' SU DOMICILIO',\n", | |
" 'LA SRA. SALIO DE SU DOMICILIO EN LA CD DE COLIMA',\n", | |
" 'PELO CANO, VESTIA PANTALON ROSA, BLUSA VERDE, CHAMARRA LILA, BOLSA ROJA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=zZfthHlW1pypFjCfM6nXkbF0762GlJp3p1fPGJLQBC2vyh0TD0rp!1620356072?photoid=15799&campophoto=PE_BIMG2'],\n", | |
" ['LOPEZROQUE MARIA DE LOS ANGELES',\n", | |
" '02/08/1996',\n", | |
" ' 22 años',\n", | |
" ' 140 cms.',\n", | |
" ' 60',\n", | |
" ' ROBUSTA',\n", | |
" ' MORENA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS OVALADOS NEGROS',\n", | |
" ' ABUNDANTE NEGRO QUEBRADO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA ANCHA PEQUEÑA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' MORADOS GRUESOS',\n", | |
" ' TRIANGULAR GRANDE',\n", | |
" '11/09/2017',\n", | |
" ' CENTRO DE OAXACA',\n", | |
" 'SALIO A SU TRABAJO Y NO VOLVIÓ.',\n", | |
" 'TIENE UN LUNAR GRANDE DE COLOR NEGRO EN LA PIERNA IZQUIERDA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=MJY5hHvBLSh4bCdDr3W4g1CrMLjTGx9D3cGVx1Gn6JDLDVlQKhrl!169856990?photoid=15797&campophoto=PE_BIMG2'],\n", | |
" ['VALDOVINOSPADILLA SUSANA',\n", | |
" '26/09/1994',\n", | |
" ' 24 años',\n", | |
" ' 157 cms.',\n", | |
" ' 45',\n", | |
" ' DELGADA',\n", | |
" ' BLANCA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' GRANDES OVALADOS CAFÉ CLARO',\n", | |
" ' ABUNDANTE CASTAÑO CLARO QUEBRADO LARGO',\n", | |
" ' GRANDE',\n", | |
" ' RECTILINEA AGUILEÑA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS LINEAL CASTAÑO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '30/08/2017',\n", | |
" ' JIQUILPAN',\n", | |
" 'LLEVABA LOS NIÑOS ALA ESCUELA Y YA NO VOLVIERON',\n", | |
" 'LLEVA A 3 MENORES CON ELLA Y TIENE UNA MORDIDA DE UN PERRO EN LABIO SUPERIOR IZQUIERDO ES GUERA SE PINTA EL CABELLO',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=HLJNhHlXlmmYQQr3QQ3rVHmFykt2mSTQGzxtMzlnWW8nVG2Mx5NS!1620356072?photoid=15792&campophoto=PE_BIMG2'],\n", | |
" ['GONZALEZLINARES JOCABETH',\n", | |
" '20/08/1987',\n", | |
" ' 30 años',\n", | |
" ' 156 cms.',\n", | |
" ' 60',\n", | |
" ' MEDIANA',\n", | |
" ' BLANCA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' MIEL GRANDES RASGADOS MIEL',\n", | |
" ' NORMAL NEGRO LACIO LARGO',\n", | |
" ' MEDIANA',\n", | |
" ' RECTILINEA ANCHA MEDIANA',\n", | |
" ' ANGOSTA',\n", | |
" ' PINTADAS SE DESCONOCE NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' CUADRADO MEDIANO',\n", | |
" '25/08/2017',\n", | |
" ' AVENIDA PATRIOTISMO',\n", | |
" 'SALIO DE TRABAJARA LAS 6 DE LA TARDE Y YA NO LLEGO A CASA.',\n", | |
" 'LUNAR EN MENTON DE LADO DERECHO',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=lJmwhHvCjGJMdYXLLpFktsldr14qszHSVthZBC2n5mdT3rcTryHB!169856990?photoid=15790&campophoto=PE_BIMG2'],\n", | |
" ['VILLALOBOS CORDERO MERCEDES',\n", | |
" '05/08/1997',\n", | |
" ' 20 años',\n", | |
" ' 164 cms.',\n", | |
" ' 65',\n", | |
" ' DELGADA',\n", | |
" ' MORENA OSCURA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' GRANDES REDONDOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE NEGRO CHINO LARGO',\n", | |
" ' GRANDE',\n", | |
" ' CHATA ANCHA PEQUEÑA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS SEPARADAS NEGRO',\n", | |
" ' ROSAS GRUESOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '09/08/2017',\n", | |
" ' CALLE ALTAMIRANO',\n", | |
" 'DISCUSIÓN CON SU CONCUBINO',\n", | |
" 'OPERACIÓN PARTO POR CESÁREA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=b1v0hHlYlGQR9nVPgYQQBn1kCql7JYQ2WV6v38pz6sbBKLk8KGTP!1620356072?photoid=15782&campophoto=PE_BIMG2'],\n", | |
" ['MEDINADE LA ROSA IMELDA',\n", | |
" '02/10/1998',\n", | |
" ' 18 años',\n", | |
" ' 170 cms.',\n", | |
" ' 78',\n", | |
" ' ROBUSTA',\n", | |
" ' MORENA CLARA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' PEQUEÑOS RASGADOS NEGROS',\n", | |
" ' ABUNDANTE NEGRO CHINO MEDIANO',\n", | |
" ' GRANDE',\n", | |
" ' CHATA ANCHA PEQUEÑA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' CUADRANGULAR CHICO',\n", | |
" '01/08/2017',\n", | |
" ' EN SU DOMICILIO',\n", | |
" 'LA NOCHE DEL 31 DE JULIO DEL 2017 TODOS LOS QUE VIVIMOS EN LA CASA NOS RETIRAMOS ADORMIR COMO DE COSTUMBRE Y A LA MAÑANA SIGUIENTE DEL DÍA 01 DE AGOSTO DEL 2017 AL HABLARLE ELLA NO CONTESTO POR LO CUAL DECIDIMOS ENTRAR A SU RECAMARA Y ELLA YA NO ESTABA. ESPERAMOS HABER SI SE COMUNICABA O REGRESABA Y HASTA EL MOMENTO NO SABEMOS NADA DE ELLA.',\n", | |
" 'RASPONES EN BRAZO IZQUIERDO, HOMBRO IZQUIERDO, PIERNA IZQUIERDA Y TOBILLO IZQUIERDO.',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=v1wGhHvD31c4nxf9dLK21nQRjQhr1Bkd2rhdK1pL5k0QpgD4Rvyx!169856990?photoid=15779&campophoto=PE_BIMG2'],\n", | |
" ['AGUILARRODRIGUEZ MARCELA ADRIANA',\n", | |
" '10/01/1981',\n", | |
" ' 36 años',\n", | |
" ' 170 cms.',\n", | |
" ' 65',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS RASGADOS CAFÉ CLARO',\n", | |
" ' ABUNDANTE CASTAÑO CLARO LACIO LARGO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA DELGADA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' POBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '28/07/2017',\n", | |
" ' PUERTO VALLARTA JALISCO',\n", | |
" 'VIAJO A LA CIUDAD DE MEXICO EL JUEVES 27 PARA IR EL VIERNES 28 AL MUNICIPIO DE JALCOMULCO VERACRUZ CON MIGUEL ANGEL RIZO VARGAS, IBAN A UNA BODA OTRAS CUATRO PERSONAS, DE AHI NO SABEMOS NADA',\n", | |
" '',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=GVjPhHlZ5gHsygYk0H0zjrVVG58RHbXwhPnNR41LMlQV5kxyNZ2f!1620356072?photoid=15772&campophoto=PE_BIMG2'],\n", | |
" ['MEZAHUERTA BRENDA VIRIDIANA',\n", | |
" '26/01/1999',\n", | |
" ' 18 años',\n", | |
" ' 160 cms.',\n", | |
" ' 60',\n", | |
" ' DELGADA',\n", | |
" ' MORENA',\n", | |
" ' REDONDA CHICA',\n", | |
" ' PEQUEÑOS RASGADOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE NEGRO LACIO LARGO',\n", | |
" ' CHICA',\n", | |
" ' CHATA DELGADA PEQUEÑA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS LINEAL NEGRO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' REDONDO CHICO',\n", | |
" '24/07/2017',\n", | |
" ' CASA DE SU ABUELITA',\n", | |
" 'SALIO DE CASA DE SU ABUELITA EN UN COCHE JETTA AZUL CON N° DE PLACAS XWZ2920',\n", | |
" 'UN TATUAJE EN UN COSTADO DE LA CINTURA DEL LADO DERECHO EN FORMA DE DIAMANTE CON UN INFINITO',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=Lys8hHvGnKnlC2Mh17dyRG7f3V1SKdy9Cgpy1XyGK920wMhWH5qG!169856990?photoid=15764&campophoto=PE_BIMG2'],\n", | |
" ['MARTINEZCAMARGO ARACELI',\n", | |
" '18/04/1976',\n", | |
" ' 41 años',\n", | |
" ' 150 cms.',\n", | |
" ' 50',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA',\n", | |
" ' CUADRADA GRANDE',\n", | |
" ' MEDIANOS RASGADOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE NEGRO QUEBRADO LARGO',\n", | |
" ' MEDIANA',\n", | |
" ' AFILADA AGUILEÑA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '09/07/2017',\n", | |
" ' CIUDAD DE MEXICO',\n", | |
" 'EL DOMINGO ME FUI A LA IGLESIA TEMPRANO Y CUANDO REGRESE ALREDEDOR DE LAS 12',\n", | |
" 'NINGUNA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=1ZQphHlc2hDFnQzDjq0YhT7ZYCQZH1Zfhxf64MPFQqbQnR7gynRf!1620356072?photoid=15749&campophoto=PE_BIMG2'],\n", | |
" ['MENDOZACAMPOS LUISA MARIA',\n", | |
" '03/12/1993',\n", | |
" ' 23 años',\n", | |
" ' 154 cms.',\n", | |
" ' 75',\n", | |
" ' ROBUSTA',\n", | |
" ' MORENA CLARA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' MEDIANOS RASGADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL CASTAÑO OBSCURO QUEBRADO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA BOLA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS RECTAS NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '26/06/2017',\n", | |
" ' EN SU CASA ANTES DE QUE SALIERA A LA ESCUELA',\n", | |
" 'SUBIÓ A LA COMBI EN LA PARADA DE LOS ACEITES CON RUMBO A LA UNIVERSIDAD, TECNOLOGICO UNIVERSITARIO NAUCALPAN, NO LLEGO A LA ESCUELA',\n", | |
" 'TATUAJE EN MUÑECA IZQUIERDA DE UNA CRUZ Y EL NOMBRE \"DAVID\"',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=5sTYhHvH7qw5t1y7f5Xx6xcZCTv8YR1PNyJhGb0Qg8FPzfl6Bxwd!169856990?photoid=15730&campophoto=PE_BIMG2'],\n", | |
" ['GUERREROROSALES LUZ DANIELA',\n", | |
" '16/02/1999',\n", | |
" ' 18 años',\n", | |
" ' 158 cms.',\n", | |
" ' 65',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' REDONDA CHICA',\n", | |
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE CASTAÑO CLARO LACIO LARGO',\n", | |
" ' MEDIANA',\n", | |
" ' AFILADA ANCHA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' TRIANGULAR MEDIANO',\n", | |
" '16/06/2017',\n", | |
" ' SE LE VIO AFUERA DE SU DOMICILIO',\n", | |
" 'ESE DIA SALIO A LA PLAZA DE LA COLONIA EL DORADO DE GOMEZ PALACIO DURANGO PARA VER A UN NOVIO QUE TRAIA DE NOMBRE SALVADOR BALTIERRE HERNANDEZ DE 20 AÑOS DE EDAD OJOS BORRADOS Y QUE YA ESE DIA NO REGRESO A LA CASA Y SE DESCONOCE EL PARADERO DE AMBOS',\n", | |
" '',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=plyMhHld1mtzpbYvCQkGbmcrZYLKTPQ1pkX3y6ByhsvKnLyDx061!1620356072?photoid=15768&campophoto=PE_BIMG2'],\n", | |
" ['GABRIELGONZALEZ MARIA ELENA',\n", | |
" '01/11/1985',\n", | |
" ' 31 años',\n", | |
" ' 157 cms.',\n", | |
" ' 70',\n", | |
" ' ROBUSTA',\n", | |
" ' MORENA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS RASGADOS CAFE OBSCURO',\n", | |
" ' NORMAL NEGRO QUEBRADO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' AFILADA DELGADA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS HACIA ABAJO NEGRO',\n", | |
" ' ROJOS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '06/06/2017',\n", | |
" ' CONSTITUCION DE LA REPUBLICA',\n", | |
" 'SE QUEDO EN LA ESQUINA DE AV. PINOS ESPERANDO TRANSPORTE PUBLICO PARA IR A REALIZAR TRAMITE DE PAPELES ESCOLARES, QUEDO EN COMUNICARSE MÁS TARDE Y NO LO HIZO',\n", | |
" 'CICATRIZ LABIO SUPERIOR DERECHO,CICATRIZ EN PECHO CASI A LA ALTURA DEL CUELLO,TATUAJE EN OMBLIGO DE FLORES, TATUAJE EN TOBILLO DERECHO',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=SGCVhHvLnJPyN1qHlgJ7yDsnMrnSGphSvkZX2GG0MZsTvh9RVC1N!169856990?photoid=15714&campophoto=PE_BIMG2'],\n", | |
" ['ALVAREZ DIAZ MIREYA JAZMIN',\n", | |
" '16/03/1992',\n", | |
" ' 25 años',\n", | |
" ' 155 cms.',\n", | |
" ' 50',\n", | |
" ' ROBUSTA',\n", | |
" ' BLANCA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' MEDIANOS REDONDOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE CASTAÑO CLARO LACIO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA ANCHA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS LINEAL CASTAÑO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '01/06/2017',\n", | |
" ' SAN MATEO ATARASQUILLO',\n", | |
" 'SE DESPARECIO DE MI DOMICILIO CON MI MENOR HIJO.',\n", | |
" 'LUNAR POMULO DERECHO.',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=0vrjhHlpmnDSChZqyy61QV3mMLWyZC8RPXxsr3YLh61x1QhnNV5X!1620356072?photoid=15710&campophoto=PE_BIMG2'],\n", | |
" ['MOTAMARTINEZ MAYRA MICHELL',\n", | |
" '29/12/1983',\n", | |
" ' 33 años',\n", | |
" ' 150 cms.',\n", | |
" ' 60',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA CHICA',\n", | |
" ' MEDIANOS RASGADOS CAFÉ CLARO',\n", | |
" ' NORMAL CASTAÑO CLARO LACIO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' RECTILINEA BOLA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' POBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' SE DESCONOCE GRUESOS',\n", | |
" ' TRIANGULAR CHICO',\n", | |
" '14/05/2017',\n", | |
" ' TIJUANA BAJA CALIFORNIA',\n", | |
" 'EL DÍA DOMINGO 14 DE MAYO DEL PRESENTE AÑO, SALIÓ DE SU DOMICILIO UBICADO EN LA COLONIA BUENOS AIRES NORTE DE LA CIUDAD DE TIJUANA BAJA CALIFORNIA, DESCONOCIÉNDOSE CON QUIEN Y A DONDE SE DIRIGIÓ, PERO TRANSMITIÓ UN VÍDEO EN VIVO POR FACEBOOK DESDE UN ANTRO LLAMADO COPEO A LAS 01',\n", | |
" 'CUENTA CON DOS LUNARES COLOR NEGRO, UNO DEBAJO DEL PECHO A LA ALTURA DE LA CLAVÍCULA Y EL OTRO EN EL LADO DERECHO DEL CUELLO.',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=QhfnhHvJngHypTHvvSn0JdwJdqvxD8zqBXcN7pJh0hbdNsJpS1Q1!169856990?photoid=15758&campophoto=PE_BIMG2'],\n", | |
" ['ALEJOBERNAL GLORIA',\n", | |
" '22/10/1968',\n", | |
" ' 48 años',\n", | |
" ' 150 cms.',\n", | |
" ' 48',\n", | |
" ' DELGADA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL NEGRO OTRO LARGO',\n", | |
" ' GRANDE',\n", | |
" ' DELGADA DELGADA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' POBLADAS SEPARADAS CASTAÑO',\n", | |
" ' ROSAS GRUESOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '04/05/2017',\n", | |
" ' EN SU TRABAJO UBICADO EN CALLE LAGUNA LUNA',\n", | |
" 'SALIO DE SU TRABAJO Y NO LLEGO A SU CASA AL PARECER RECIBIO UNA LLAMADA PERO NO SE SABE DE QUIEN',\n", | |
" 'CICATRIZ QUIRURGICA EN EMPEINE PIE DERECHO ,LUNAR ROJO TIPO MANCHA EN BRAZO IZQUIERDO Y MANCHA BLANCA EN VIENTRE HACIA EL AREA INGUINAL',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/Consulta/Extra_Flow/./../../Consulta/resources/images/silueta2.jpg;jsessionid=r3kyhHlfpJ7Mwjv1m3vmGRTGD92gLJ1P2b2279XC77n10G1QHTV7!1620356072'],\n", | |
" ['LOPEZSILVA ANA ROSA',\n", | |
" '30/08/1989',\n", | |
" ' 27 años',\n", | |
" ' 160 cms.',\n", | |
" ' 58',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS OVALADOS NEGROS',\n", | |
" ' ABUNDANTE NEGRO LACIO MEDIANO',\n", | |
" ' CHICA',\n", | |
" ' CHATA DELGADA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' PINTADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS DELGADOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '02/05/2017',\n", | |
" ' GUERRERO',\n", | |
" 'EL 02 DE MAYO DE 2017, ANA SALIÓ DE CASA DE SU TÍA UBICADA EN ACAPULCO, GUERRERO, PARA VER A SUS MEDIAS HERMANAS QUE VIVEN EN EL MISMO MUNICIPIO, EN COMPAÑÍA DE SU MENOR HIJO, DESDE ENTONCES SE DESCONOCE SU PARADERO',\n", | |
" 'CLAVO QUIRÚRGICO EN EL PIE DERECHO, CICATRIZ DE CESARÍA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=j1CrhHvKvfcDnqdtk1HnPttWZpJQQpBhXx6QhsM5G0H9tmXzcJxQ!169856990?photoid=15692&campophoto=PE_BIMG2'],\n", | |
" ['TOLENTINOGARCIA MARGARITA',\n", | |
" '12/08/1930',\n", | |
" ' 87 años',\n", | |
" ' 120 cms.',\n", | |
" ' 45',\n", | |
" ' DELGADA',\n", | |
" ' MORENA CLARA',\n", | |
" ' CUADRADA MEDIANA',\n", | |
" ' PEQUEÑOS RASGADOS NEGROS',\n", | |
" ' NORMAL NEGRO LACIO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' REDONDA ANCHA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' TRIANGULAR CHICO',\n", | |
" '07/04/2017',\n", | |
" ' INTERIOR DEL METRO MERCED EN DIRECCION A PANTITLAN',\n", | |
" 'AL SUBIR AL METRO EN MERCED EN DIRECCION A PANTITLAN ELLA SE SUBIO AL METRO Y YO NO PUDE SUBIRME POR CUIDAR A OTROS FAMILIARES CERRANDOSE LAS PUERTAS Y RETIRARSE EL METRO',\n", | |
" 'LUNAR ABAJO DEL OJO LADO DERECHO, PELO NEGRO UN POCO CANOSO VESTIA CON VESTIDO COLOR GRIS CON DETALLES FLOREADOS COLOR NEGRO, ZAPATOS D PLASTI NEGROS',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=qGYXhHlQTj1Q5X1JgqdGLNBRBLyg2RNpqhMppZGS1SBD10DqBHq3!1620356072?photoid=15661&campophoto=PE_BIMG2'],\n", | |
" ['PEREZVELAZQUEZ GLORIA',\n", | |
" '01/05/1952',\n", | |
" ' 64 años',\n", | |
" ' 150 cms.',\n", | |
" ' 50',\n", | |
" ' DELGADA',\n", | |
" ' MORENA CLARA',\n", | |
" ' CUADRADA GRANDE',\n", | |
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL NEGRO LACIO CORTO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA ANCHA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS RECTAS NEGRO',\n", | |
" ' SE DESCONOCE MEDIANOS',\n", | |
" ' TRIANGULAR MEDIANO',\n", | |
" '21/03/2017',\n", | |
" ' CIUDAD DE MEXICO',\n", | |
" 'SE PIDE EL APOYO PARA LOCALIZAR A LA C. GLORIA PÉREZ VELAZQUEZ QUIEN SE ENCUENTRA DESAPARECIDA DESDE EL MARTES 21 DE MARZO DE 2017 POR LA TARDE.',\n", | |
" '',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=LTS2hHvMGKvnly6Mg26t88H4zSf2QBBxKxrhnDvMphtGpqyGdfyc!169856990?photoid=15646&campophoto=PE_BIMG2'],\n", | |
" ['JIMENEZ SOSA MINU HAYDEE',\n", | |
" '13/05/1976',\n", | |
" ' 41 años',\n", | |
" ' 169 cms.',\n", | |
" ' 58',\n", | |
" ' DELGADA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' PEQUEÑOS RASGADOS CAFE OBSCURO',\n", | |
" ' ABUNDANTE CASTAÑO OBSCURO QUEBRADO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' RECTILINEA BOLA AMPLIA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' MORADOS MEDIANOS',\n", | |
" ' CUADRANGULAR MEDIANO',\n", | |
" '16/03/2017',\n", | |
" ' CALLE TOLTECAS',\n", | |
" 'SE DIRIGIA A LA CENTRAL CAMIONERA SAN LAZARO Y YA NO SE VOLVIO A SABER DE ELLA',\n", | |
" '',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=sjlKhHlCQlXwbqRLlwGZcyvHGLk1yQ96f2hDZtY2Q5GqQk8Lcyw1!1620356072?photoid=15706&campophoto=PE_BIMG2'],\n", | |
" ['PARRAOLVERA LIZBETH YADIRA',\n", | |
" '29/06/1974',\n", | |
" ' 42 años',\n", | |
" ' 157 cms.',\n", | |
" ' 60',\n", | |
" ' MEDIANA',\n", | |
" ' APIÑONADA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' GRANDES RASGADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL CASTAÑO OBSCURO LACIO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA ANCHA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '14/03/2017',\n", | |
" ' OLIVAR DEL CONDE',\n", | |
" 'SALIO DE SU CASA PARA RECOGER UNAS COSAS EN EL TOREO',\n", | |
" 'NINGUNA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=fMRshHvN9nCnYcg5yxtJpPCdvDNGM5dgS3Jp6TGKJk2f11vJX3Kb!169856990?photoid=15639&campophoto=PE_BIMG2'],\n", | |
" ['SANCHEZPEREZ CARMEN IVETTE',\n", | |
" '11/06/1992',\n", | |
" ' 23 años',\n", | |
" ' 163 cms.',\n", | |
" ' 62',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS REDONDOS CAFÉ CLARO',\n", | |
" ' NORMAL NEGRO QUEBRADO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' RECTILINEA AGUILEÑA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS GRUESOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '07/03/2017',\n", | |
" ' SAN LUIS POTOSI',\n", | |
" 'EL DÍA 07 DE MARZO DE 2017, CARMEN RECIBIÓ UNA LLAMADA A SU CELULAR, LA CUAL CONTESTÓ EN SU RECAMARA, TERMINANDO DE ALMORZAR SALIÓ DE SU CASA DICIENDO QUE IBA AL GIMNASIO. COMENTAN QUE NO LA VIERON LLEGAR A ESE LUGAR. DESDE ENTONCES SE DESCONOCE SU PARADERO <br/>',\n", | |
" 'TIENE BRAQUETES Y PECAS EN LOS PÓMULOS',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=CTXyhHlD1QcGjVtWzBLdgdmrvLsjLfNTs2NftCFXQwDYk3BJ1yfp!1620356072?photoid=15617&campophoto=PE_BIMG2'],\n", | |
" ['CHAVARRIA CARRILLO ITZEL ANAHI',\n", | |
" '10/08/1993',\n", | |
" ' 23 años',\n", | |
" ' 145 cms.',\n", | |
" ' 70',\n", | |
" ' MEDIANA',\n", | |
" ' BLANCA',\n", | |
" ' REDONDA MEDIANA',\n", | |
" ' MEDIANOS RASGADOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE CASTAÑO OBSCURO LACIO MEDIANO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA AGUILEÑA PEQUEÑA',\n", | |
" ' AMPLIA',\n", | |
" ' ESCASAS HACIA ABAJO CASTAÑO',\n", | |
" ' ROJOS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '06/03/2017',\n", | |
" ' DISTRITO FEDERAL',\n", | |
" 'EL DÍA 06 DE MARZO SALIÓ DE SU TRABAJO CON RUMBO A SU CASA LA CUAL QUEDA A POCAS CUADRAS, PERO NUNCA LLEGÓ. DESDE ENTONCES SE DESCONOCE SU PARADERO',\n", | |
" 'TIENE EL CABELLO PINTADO CON RAYITOS RUBIOS',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=071ThHvNM0sBhmh6jDF9SGhqV865bjHM9GydjhppRylTFh1mMDZQ!169856990?photoid=15614&campophoto=PE_BIMG2'],\n", | |
" ['PORTILLOMEJIA JUANA EDITH',\n", | |
" '24/06/1989',\n", | |
" ' 27 años',\n", | |
" ' 160 cms.',\n", | |
" ' 70',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA CHICA',\n", | |
" ' PEQUEÑOS RASGADOS CAFÉ OBSCURO',\n", | |
" ' ABUNDANTE NEGRO QUEBRADO LARGO',\n", | |
" ' CHICA',\n", | |
" ' RECTILINEA DELGADA PEQUEÑA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS SEPARADAS NEGRO',\n", | |
" ' ROJOS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '06/03/2017',\n", | |
" ' AL SALIR DE SU DOMICILIO ANTES MENCIONADO',\n", | |
" 'EL PASADO 6 DE MARZO SALIÓ DE SU DOMICILIO RUMBO AL TRABAJO A LAS 7',\n", | |
" 'TATUAJE EN MEDIO DE LA ESPALDA EN LA PARTE ALTA CON LA FIGURA DE UNA MARIPOSA CON LOS NOMBRES DE MELANIE Y MIGUEL',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/Consulta/Extra_Flow/./../../Consulta/resources/images/silueta2.jpg;jsessionid=nnp1hHlGKjgvvT50n2pytCTHJL12BJ4GRTbMGzMFl1zLvVpmc6RQ!1620356072'],\n", | |
" ['MANDUJANOMEDINA LOURDES',\n", | |
" '28/02/1962',\n", | |
" ' 54 años',\n", | |
" ' 154 cms.',\n", | |
" ' 74',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' OVALADA MEDIANA',\n", | |
" ' MEDIANOS OVALADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL CASTAÑO OBSCURO LACIO CORTO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA AGUILEÑA MEDIANA',\n", | |
" ' REGULAR',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA CASTAÑO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' REDONDO MEDIANO',\n", | |
" '24/02/2017',\n", | |
" ' CALLE TAURO EN LA PARADA DEL MICRO',\n", | |
" 'SALIA CON DIRECCIÓN A HGO',\n", | |
" 'PAÑO EN LA CARA',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/Consulta/Extra_Flow/./../../Consulta/resources/images/silueta2.jpg;jsessionid=XpGghHvPVqQL5fZG532M69yrysVvVPncnZ1B9j3tj3K1JFlLPx84!169856990'],\n", | |
" ['LARALOPEZ MARIA DE LOS ANGELES',\n", | |
" '02/09/1986',\n", | |
" ' 30 años',\n", | |
" ' 170 cms.',\n", | |
" ' 100',\n", | |
" ' ROBUSTA',\n", | |
" ' BLANCA',\n", | |
" ' REDONDA GRANDE',\n", | |
" ' GRANDES OVALADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL RUBIO LACIO CORTO',\n", | |
" ' MEDIANA',\n", | |
" ' CHATA ANCHA MEDIANA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA CASTAÑO',\n", | |
" ' ROJOS DELGADOS',\n", | |
" ' REDONDO CHICO',\n", | |
" '23/02/2017',\n", | |
" ' SALIENDO DE SU DOMICILIO',\n", | |
" 'LLEGO DE TRABAJAR COMO A LAS 17',\n", | |
" 'CICATRIZ EN LABIO SUPERIOR PEQUENA, CICATRIZ DE CESAREA, MANCHAS EN LA PIEL',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=P2XvhHlFyJ5JrJ8pTGJGbPzn4bkPQvHjJfNVDsZVhhfpqDlZGN4C!1620356072?photoid=15603&campophoto=PE_BIMG2'],\n", | |
" ['VIZCARRA LEON ALEJANDRA',\n", | |
" '26/11/1993',\n", | |
" ' 22 años',\n", | |
" ' 148 cms.',\n", | |
" ' 52',\n", | |
" ' MEDIANA',\n", | |
" ' MORENA CLARA',\n", | |
" ' REDONDA CHICA',\n", | |
" ' PEQUEÑOS RASGADOS CAFÉ OBSCURO',\n", | |
" ' NORMAL CASTAÑO OBSCURO LACIO CORTO',\n", | |
" ' CHICA',\n", | |
" ' RECTILINEA ANCHA AMPLIA',\n", | |
" ' AMPLIA',\n", | |
" ' SEMIPOBLADAS HACIA ARRIBA NEGRO',\n", | |
" ' ROSAS MEDIANOS',\n", | |
" ' TRIANGULAR CHICO',\n", | |
" '16/02/2017',\n", | |
" ' TOWN CENTER ZUMPANGO',\n", | |
" 'SE TUVO CONTACTO CON ELLA POR MENSAJE HASTA LAS 3',\n", | |
" 'TATUAJE EN BRAZO IZQUIERDO, (\"AYE\"), PERFORACIÓN EN LA NARIZ Y EN LA CADERA TATUAJE DE UNA FLOR SIN TERMINAR.',\n", | |
" 'http://www.cns.gob.mx/extraviadosWeb/showImage;jsessionid=DFVthHvQYB40WP2NdJGj9y22hM1QyTybQMtQHr2vR3tTQyvBffqx!169856990?photoid=15607&campophoto=PE_BIMG2']]" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lista_registros" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def clean_string(input_string):\n", | |
" input_string =str(input_string)\n", | |
" input_string =input_string.replace(\"\\xa0\",\"\")\n", | |
" input_string =input_string.replace(\"\\r\",\"\")\n", | |
" input_string =input_string.replace(\"\\n\",\"\")\n", | |
" input_string =input_string.replace(\"<label>\",\"\")\n", | |
" input_string =input_string.replace(\"</label>\",\"\")\n", | |
" input_string =input_string.replace(\"</td>\",\"\")\n", | |
" input_string =input_string.replace(\"</span>\",\"\")\n", | |
" input_string =input_string.strip()\n", | |
" lista_string = input_string.split(':') \n", | |
" return lista_string[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def extract_page(url):\n", | |
" request_pagina = requests.get(url)\n", | |
" soup_pagina = BeautifulSoup(request_pagina.text)\n", | |
" data =extract_data(soup_pagina)\n", | |
" return data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def extract_data(soup_pagina):\n", | |
" page_table = soup_pagina.find_all('table')\n", | |
" td_table = page_table[5].find_all('td')\n", | |
" i = 4\n", | |
" lista_respuestas = []\n", | |
" while i < 24:\n", | |
" lista_respuestas.append(clean_string(td_table[i]))\n", | |
" i = i + 1\n", | |
" imgs= soup_pagina.find_all('img')\n", | |
" girl_photo = \"http://www.cns.gob.mx\"+soup_pagina.find_all('img')[37][\"src\"]\n", | |
" lista_respuestas.append(girl_photo)\n", | |
" return lista_respuestas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_links(soup):\n", | |
" links_pagina = []\n", | |
" for link in soup.find_all('a'):\n", | |
" direccion_link = link.get('href',None)\n", | |
" try: \n", | |
" if \"Extra_FlowController_1id\" in direccion_link: \n", | |
" if direccion_link not in links_pagina:\n", | |
" links_pagina.append(direccion_link)\n", | |
" except:\n", | |
" print(\"\")\n", | |
" return links_pagina" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df_lista_registros = pandas.DataFrame(lista_registros)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df_lista_registros.to_csv(\"mujeres_ninas_mayores.csv\", quoting=csv.QUOTE_ALL)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "SyntaxError", | |
"evalue": "invalid syntax (<ipython-input-9-7be2c36cba81>, line 1)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-9-7be2c36cba81>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m lista = [<td class=\"bea-portal-layout-placeholder-container\">\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" | |
] | |
} | |
], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment