Last active
October 28, 2023 20:09
-
-
Save user0able/56e3e897afebf5bad83d04544263498a to your computer and use it in GitHub Desktop.
extrae rutificador
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import sqlite3 | |
from bs4 import BeautifulSoup | |
from requests_toolbelt.multipart.encoder import MultipartEncoder | |
CONN = sqlite3.connect('personas.db') | |
c = CONN.cursor() | |
c.execute(''' | |
CREATE TABLE IF NOT EXISTS personas ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
run TEXT NOT NULL UNIQUE, | |
nombre_completo TEXT, | |
direccion TEXT, | |
localidad_direccion TEXT | |
); | |
''') | |
c.execute(''' | |
CREATE TABLE IF NOT EXISTS errors ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
run TEXT NOT NULL UNIQUE | |
); | |
''') | |
ruts = [str(i)+"-0" for i in range(20500000, 30500000)] | |
batch_size = 100 | |
counter = 0 | |
for rut in ruts: | |
try: | |
multipart_data = MultipartEncoder( | |
fields={ | |
'term': rut | |
} | |
) | |
response = requests.post( | |
"https://www.nombrerutyfirma.com/rut", | |
data=multipart_data, | |
headers={ | |
'Content-Type': multipart_data.content_type | |
} | |
) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
tbody = soup.find('tbody') | |
tr = tbody.find('tr') if tbody else None | |
tds = tr.find_all('td') if tr else None | |
nombre_completo = tds[0].text if tds and len(tds) >= 5 else None | |
run = tds[1].text if tds and len(tds) >= 5 else None | |
direccion = tds[3].text if tds and len(tds) >= 5 else None | |
localidad_direccion = tds[4].text if tds and len(tds) >= 5 else None | |
print(f"Rut: {rut} - Nombre: {nombre_completo} - Direccion: {direccion} - Localidad: {localidad_direccion}") | |
if nombre_completo and run: | |
c.execute("INSERT INTO personas (run, nombre_completo, direccion, localidad_direccion) VALUES (?, ?, ?, ?)", (run, nombre_completo, direccion, localidad_direccion)) | |
print(f"Insertado: {rut}") | |
else: | |
c.execute("INSERT INTO errors (run) VALUES (?)", (rut,)) | |
print(f"Error: {rut}") | |
counter += 1 | |
if counter % batch_size == 0: | |
CONN.commit() | |
print(f"{counter} registros insertados y guardados.") | |
except Exception as e: | |
print(f"Error: {e}") | |
c.execute("INSERT INTO errors (run) VALUES (?)", (rut,)) | |
CONN.commit() | |
CONN.close() |
script en python que si no tienen un baneador de ancho de banda extrae todos los ruts dentro de un rango
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
El usuario ha enviado demasiadas solicitudes en un periodo de tiempo determinado. Diseñado para ser utilizado con esquemas de limitación de tasa. Propuesto en un borrador de Internet.
Funcionaría si no me banean xd (bien ahí el rutificador (y) )