Last active
November 19, 2022 09:08
-
-
Save s-nt-s/c24969176e91f11b127f5637ef2e9613 to your computer and use it in GitHub Desktop.
Obtener correos de gente que puede hacer el Certificado de Eficiencia Energética
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from web import Web | |
import re | |
import urllib3 | |
import bs4 | |
from urllib.parse import urlparse | |
from urllib.parse import parse_qs | |
import base64 | |
urllib3.disable_warnings() | |
class GetMails(Web): | |
def __init__(self, *args, **kvargs): | |
super().__init__(*args, verify=False, **kvargs) | |
self._mails = set() | |
def start(self): | |
pass | |
def next(self): | |
pass | |
def page(self): | |
pass | |
def mails(self): | |
url = self.start() | |
if isinstance(url, str): | |
self.get(url) | |
while True: | |
for m in self.page(): | |
if m and "@" in m: | |
self._mails.add(m.lower()) | |
url = self.next() | |
if url is None: | |
break | |
if isinstance(url, bs4.Tag): | |
url = url.attrs["href"] | |
if isinstance(url, str): | |
self.get(url) | |
mails = sorted(self._mails) | |
print(";".join(mails)) | |
return mails | |
class IG(GetMails): | |
def start(self): | |
url = "https://portal.coiim.es/servicios/certificacion-energetica/listado-de-certificadores/madrid" | |
print("Ingenieros industriales") | |
print(url) | |
return url | |
def page(self): | |
mails = set() | |
for mail in self.soup.select("ul.social-networks2 a"): | |
mail = mail.attrs["href"].strip() | |
mail = mail.split(":")[-1] | |
yield mail | |
return mails | |
def next(self): | |
return self.soup.select_one("li.next a") | |
class AP(GetMails): | |
def start(self): | |
url = "https://www.aparejadoresmadrid.es/zona-ciudadanos/listado-expertos-certificacion" | |
print("Aparejadores") | |
print(url) | |
self.get(url) | |
f_select = "section form" | |
f = self.soup.select_one(f_select) | |
f = f.find("input", id=re.compile(r".*valor-POBLACION.*")) | |
action, data = self.prepare_submit(f_select) | |
data[f.attrs["name"]][-2] = "Madrid" | |
self.get(action, **data) | |
def page(self): | |
for mail in self.soup.select("p.CA_nombre"): | |
mail = mail.get_text().strip() | |
yield mail.split()[-1] | |
def next(self): | |
a = self.soup.select_one("div.ir-siguiente-pag") | |
if a: | |
return a.find_parent("a") | |
class AR(GetMails): | |
def start(self): | |
print("Arquitectos") | |
print("https://www.coam.org/es/red-arquitectos") | |
self.get("https://www.coam.org/es/red-arquitectos/resultado-busqueda", **{ | |
"especialidad": [20] | |
}) | |
def page(self): | |
urls = set() | |
for a in self.soup.select("td.nombre a"): | |
a = a.attrs["href"] | |
if "/ficha/" in a: | |
urls.add(a) | |
for a in sorted(urls): | |
self.get(a) | |
for img in self.soup.findAll("img", src=re.compile(r".*/g_Image\.php\?email.*")): | |
mail = img.attrs["src"] | |
mail = parse_qs(urlparse(mail).query)['email'][0] | |
mail = mail.encode('ascii') | |
mail = base64.b64decode(mail) | |
mail = mail.decode('ascii') | |
yield mail | |
mails = set() | |
mails = mails.union(IG().mails()) | |
mails = mails.union(AP().mails()) | |
mails = mails.union(AR().mails()) | |
mails = sorted(mails) | |
print("") | |
print("Todos") | |
print(*mails) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment