Created
August 12, 2012 03:15
-
-
Save dansku/3329398 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import urllib2 | |
from bs4 import BeautifulSoup | |
import re | |
import time | |
#---[ Functions ]---# | |
def isTimeFormat(input): | |
try: | |
time.strptime(input, '%H:%M') | |
return True | |
except ValueError: | |
return False | |
#expressao regular pra remover a sujeira do campo da linha de onibus | |
linha_r = re.compile('[\t\n\r\-\|]+') | |
url = "http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibus" | |
f = urllib2.urlopen(url) | |
doc = BeautifulSoup(f.read()) | |
# print doc | |
EMPRESAS = [] | |
for e in doc.select('select[name="empresa"] > option'): | |
if e.has_key("title"): | |
EMPRESAS.append((e["title"].strip().encode("utf-8"), int(e["value"]))) | |
empresaId = 0 | |
horarioIdIda = 0 | |
horarioIdVolta = 0 | |
itinaratioId = 0 | |
for empresa,id in EMPRESAS: | |
print 'EMPRESA :' + empresa | |
empresaId = empresaId +1 | |
#Cria Empresa no banco | |
empresadb = { | |
'empresaNome': empresa, | |
'empresaId': empresaId | |
} | |
#Salvando no DB | |
print 'Empresa salva' | |
db.empresas.save(empresadb) | |
data = { | |
"empresa":id, | |
"opcao":1, | |
"passoGeral":1, | |
"passoEmpresa":1, | |
} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
for linha in doc.select('ul[class="listagem"] > li'): | |
linha_data = linha_r.split(linha.contents[0].strip().encode("utf-8")) | |
linha_numero = linha_data[0].strip() | |
linha_nome = linha_data[1].strip() | |
print '---> ONIBUS: ',linha_numero,"-",linha_nome.title() | |
#Passo geral 1 = IDA, 2 = VOLTA, 3 = ITINERARIO | |
aba = {'passoGeral' : '3'} | |
url2 = 'http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibuslinha&idLinha='+linha_numero | |
data = urllib.urlencode(aba) # Use urllib to encode the parameters | |
request = urllib2.Request(url2, data) | |
f = urllib2.urlopen(request) # This request is sent in HTTP POST | |
doc = BeautifulSoup(f.read()) | |
x = doc.find_all('ul','listagem')[0] | |
for k in x.find_all('li'): | |
print str(k)[16:65].title(), | |
# Passo geral 1 = IDA, 2 = VOLTA, 3 = ITINERARIO | |
aba = {'passoGeral' : '1'} | |
url2 = 'http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibuslinha&idLinha='+linha_numero | |
data = urllib.urlencode(aba) # Use urllib to encode the parameters | |
request = urllib2.Request(url2, data) | |
f = urllib2.urlopen(request) # This request is sent in HTTP POST | |
doc = BeautifulSoup(f.read()) | |
x = str(doc.find_all(valign="top")[1]) | |
y=0 | |
while True: | |
y = x.find('<br/>',y+1) | |
if y == -1: | |
break | |
else: | |
h = x[y-5:y] | |
if isTimeFormat(h) is True: | |
print h | |
#Passo geral 1 = IDA, 2 = VOLTA, 3 = ITINERARIO | |
aba = {'passoGeral' : '2'} | |
url2 = 'http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibuslinha&idLinha='+linha_numero | |
data = urllib.urlencode(aba) # Use urllib to encode the parameters | |
request = urllib2.Request(url2, data) | |
f = urllib2.urlopen(request) # This request is sent in HTTP POST | |
doc = BeautifulSoup(f.read()) | |
x = str(doc.find_all(valign="top")[1]) | |
print '------> Horario VOLTA:' | |
y=0 | |
while True: | |
y = x.find('<br/>',y+1) | |
if y == -1: | |
break | |
else: | |
h = x[y-5:y] | |
if isTimeFormat(h) is True: | |
print h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import urllib2 | |
from bs4 import BeautifulSoup | |
import re | |
import time | |
#---[ Funcoes ]---------------------------------------- | |
def isTimeFormat(input): | |
try: | |
time.strptime(input, '%H:%M') | |
return True | |
except ValueError: | |
return False | |
def busca_linhas(id,LINHAS): | |
data = { | |
"empresa":id, | |
"opcao":1, | |
"passoGeral":1, | |
"passoEmpresa":1, | |
} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
for linha in doc.select('ul[class="listagem"] > li'): | |
linha_data = linha_r.split(linha.contents[0].strip().encode("utf-8")) | |
linha_numero = linha_data[0].strip() | |
linha_nome = linha_data[1].strip() | |
linha_url = "http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibuslinha&idLinha="+linha_numero+"&menu=2" | |
itinerario = busca_dados_linha_itinerario(linha_url) | |
horaida = busca_dados_linha_horaida(linha_url) | |
horavolta = busca_dados_linha_horavolta(linha_url) | |
LINHAS[linha_nome] = {"nome":linha_nome,'empresa':empresa,'numero':linha_numero,"itinerario":itinerario,"horarios_ida":horaida,"horarios_volta":horavolta} | |
return LINHAS[linha_nome] | |
# Passo Geral (3=Itinerarios) (2=Volta) (1=Ida) | |
def busca_dados_linha_itinerario(url): | |
# print url | |
data = {"passoGeral":3} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
itinerario = [] | |
for caminho in doc.select('ul[class="listagem"] > li'): | |
itinerario.append(caminho.contents[0].strip().encode('utf-8')[3:]) | |
return itinerario | |
def busca_dados_linha_horaida(url): | |
# print url | |
data = {"passoGeral":1} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
horarios = {"dias_uteis":[], "sabado":[], "domingo":[]} | |
for horario in doc.select('td[valign="top"]')[1].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["dias_uteis"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[2].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["sabado"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[3].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["domingo"].append(horario.strip()) | |
return horarios | |
def busca_dados_linha_horavolta(url): | |
# print url | |
data = {"passoGeral":2} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
horarios = {"dias_uteis":[], "sabado":[], "domingo":[]} | |
for horario in doc.select('td[valign="top"]')[1].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["dias_uteis"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[2].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["sabado"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[3].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["domingo"].append(horario.strip()) | |
return horarios | |
#------------[ Programa ]------------ | |
# expressao regular pra remover a sujeira do campo da linha de onibus | |
linha_r = re.compile('[\t\n\r\-\|]+') | |
# url do site da prefeitura | |
url = "http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibus" | |
# carrega o url e 'embeleza' | |
f = urllib2.urlopen(url) | |
doc = BeautifulSoup(f.read()) | |
# le as empresas | |
EMPRESAS = {} | |
for e in doc.select('select[name="empresa"] > option'): | |
if e.has_key("title"): | |
nome = (e["title"].strip().encode("utf-8"), int(e["value"])) | |
EMPRESAS[nome] = {} | |
# carrega todas as linhas e suas informações | |
LINHAS = {} | |
for empresa,id in EMPRESAS: | |
print busca_linhas(id,LINHAS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import urllib2 | |
from bs4 import BeautifulSoup | |
import re | |
import time | |
# expressao regular pra remover a sujeira do campo da linha de onibus | |
linha_r = re.compile('[\t\n\r\-\|]+') | |
url = "http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibus" | |
f = urllib2.urlopen(url) | |
doc = BeautifulSoup(f.read()) | |
# print doc | |
EMPRESAS = {} | |
for e in doc.select('select[name="empresa"] > option'): | |
if e.has_key("title"): | |
nome = (e["title"].strip().encode("utf-8"), int(e["value"])) | |
EMPRESAS[nome] = {} | |
def isTimeFormat(input): | |
try: | |
time.strptime(input, '%H:%M') | |
return True | |
except ValueError: | |
return False | |
def busca_linhas(id,LINHAS): | |
data = { | |
"empresa":id, | |
"opcao":1, | |
"passoGeral":1, | |
"passoEmpresa":1, | |
} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
for linha in doc.select('ul[class="listagem"] > li'): | |
linha_data = linha_r.split(linha.contents[0].strip().encode("utf-8")) | |
linha_numero = linha_data[0].strip() | |
linha_nome = linha_data[1].strip() | |
linha_url = "http://www.pmf.sc.gov.br/servicos/index.php?pagina=onibuslinha&idLinha="+linha_numero+"&menu=2" | |
itinerario = busca_dados_linha_itinerario(linha_url) | |
horaida = busca_dados_linha_horaida(linha_url) | |
horavolta = busca_dados_linha_horavolta(linha_url) | |
LINHAS[linha_nome] = {"nome":linha_nome,'empresa':empresa,'numero':linha_numero,"itinerario":itinerario,"horarios_ida":horaida,"horarios_volta":horavolta} | |
print '---------------------------------//-------//---->' | |
print LINHAS[linha_nome] | |
# Passo Geral (3=Itinerarios) (2=Volta) (1=Ida) | |
def busca_dados_linha_itinerario(url): | |
# print url | |
data = {"passoGeral":3} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
itinerario = [] | |
for caminho in doc.select('ul[class="listagem"] > li'): | |
itinerario.append(caminho.contents[0].strip().encode('utf-8')[3:]) | |
return itinerario | |
def busca_dados_linha_horaida(url): | |
# print url | |
data = {"passoGeral":1} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
horarios = {"dias_uteis":[], "sabado":[], "domingo":[]} | |
for horario in doc.select('td[valign="top"]')[1].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["dias_uteis"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[2].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["sabado"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[3].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["domingo"].append(horario.strip()) | |
return horarios | |
def busca_dados_linha_horavolta(url): | |
# print url | |
data = {"passoGeral":2} | |
req = urllib2.Request(url, urllib.urlencode(data)) | |
doc = BeautifulSoup(urllib2.urlopen(req).read()) | |
horarios = {"dias_uteis":[], "sabado":[], "domingo":[]} | |
for horario in doc.select('td[valign="top"]')[1].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["dias_uteis"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[2].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["sabado"].append(horario.strip()) | |
for horario in doc.select('td[valign="top"]')[3].contents[::2]: | |
if isTimeFormat(horario.strip()[:4]) is True: horarios["domingo"].append(horario.strip()) | |
return horarios | |
LINHAS = {} | |
for empresa,id in EMPRESAS: | |
# print empresa | |
busca_linhas(id,LINHAS) | |
# print urllib.urlencode({"empresa", | |
print EMPRESAS | |
print LINHAS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment