Skip to content

Instantly share code, notes, and snippets.

@Visgean
Created August 9, 2010 21:01
Show Gist options
  • Select an option

  • Save Visgean/516108 to your computer and use it in GitHub Desktop.

Select an option

Save Visgean/516108 to your computer and use it in GitHub Desktop.
#! /usr/bin/python
# -*- coding: UTF-8 -*-
# Poslanci by Visgean Skeloru
# Tento script slouží k zjištění mail adres všech poslanců české republiky.
import urllib
import re
baseUrl = "http://www.psp.cz/sqw/organy2.sqw?kr=1"
snemovna = "http://www.psp.cz/sqw/" # root link na poslaneckou snemovnu
krajPattern = '<A HREF=(snem.sqw\\?id=\w*)>(.*)</A>'
osobaPattern = '<a href="(detail.sqw\\?id=\w*)">.*</a>'
emailPattern = '<a href="mailto:(.*@.*)">'
adresy = []
urlObj = urllib.urlopen(baseUrl)
dataCelek = urlObj.read()
for kraj in re.findall(krajPattern, dataCelek):
link = kraj[0]
url = snemovna + link
urlObj = urllib.urlopen(url)
dataKraj = urlObj.read()
for osoba in re.findall(osobaPattern, dataKraj):
url = snemovna + osoba
urlObj = urllib.urlopen(url)
dataOsoba = urlObj.read()
adresa = re.findall(emailPattern, dataOsoba)[0]
adresy.append(adresa)
print adresa
data = ", ".join(adresy)
path = raw_input(u"Soubor: ")
if not path:
path = "adresses"
with open(path, "w") as file:
file.write(data)
print u"Úspěšně zapsáno. "
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment