Skip to content

Instantly share code, notes, and snippets.

@s-nt-s
Last active November 19, 2022 09:08
Show Gist options
  • Save s-nt-s/c24969176e91f11b127f5637ef2e9613 to your computer and use it in GitHub Desktop.
Save s-nt-s/c24969176e91f11b127f5637ef2e9613 to your computer and use it in GitHub Desktop.
Obtener correos de gente que puede hacer el Certificado de Eficiencia Energética
from web import Web
import re
import urllib3
import bs4
from urllib.parse import urlparse
from urllib.parse import parse_qs
import base64
urllib3.disable_warnings()
class GetMails(Web):
def __init__(self, *args, **kvargs):
super().__init__(*args, verify=False, **kvargs)
self._mails = set()
def start(self):
pass
def next(self):
pass
def page(self):
pass
def mails(self):
url = self.start()
if isinstance(url, str):
self.get(url)
while True:
for m in self.page():
if m and "@" in m:
self._mails.add(m.lower())
url = self.next()
if url is None:
break
if isinstance(url, bs4.Tag):
url = url.attrs["href"]
if isinstance(url, str):
self.get(url)
mails = sorted(self._mails)
print(";".join(mails))
return mails
class IG(GetMails):
def start(self):
url = "https://portal.coiim.es/servicios/certificacion-energetica/listado-de-certificadores/madrid"
print("Ingenieros industriales")
print(url)
return url
def page(self):
mails = set()
for mail in self.soup.select("ul.social-networks2 a"):
mail = mail.attrs["href"].strip()
mail = mail.split(":")[-1]
yield mail
return mails
def next(self):
return self.soup.select_one("li.next a")
class AP(GetMails):
def start(self):
url = "https://www.aparejadoresmadrid.es/zona-ciudadanos/listado-expertos-certificacion"
print("Aparejadores")
print(url)
self.get(url)
f_select = "section form"
f = self.soup.select_one(f_select)
f = f.find("input", id=re.compile(r".*valor-POBLACION.*"))
action, data = self.prepare_submit(f_select)
data[f.attrs["name"]][-2] = "Madrid"
self.get(action, **data)
def page(self):
for mail in self.soup.select("p.CA_nombre"):
mail = mail.get_text().strip()
yield mail.split()[-1]
def next(self):
a = self.soup.select_one("div.ir-siguiente-pag")
if a:
return a.find_parent("a")
class AR(GetMails):
def start(self):
print("Arquitectos")
print("https://www.coam.org/es/red-arquitectos")
self.get("https://www.coam.org/es/red-arquitectos/resultado-busqueda", **{
"especialidad": [20]
})
def page(self):
urls = set()
for a in self.soup.select("td.nombre a"):
a = a.attrs["href"]
if "/ficha/" in a:
urls.add(a)
for a in sorted(urls):
self.get(a)
for img in self.soup.findAll("img", src=re.compile(r".*/g_Image\.php\?email.*")):
mail = img.attrs["src"]
mail = parse_qs(urlparse(mail).query)['email'][0]
mail = mail.encode('ascii')
mail = base64.b64decode(mail)
mail = mail.decode('ascii')
yield mail
mails = set()
mails = mails.union(IG().mails())
mails = mails.union(AP().mails())
mails = mails.union(AR().mails())
mails = sorted(mails)
print("")
print("Todos")
print(*mails)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment