Created
January 23, 2015 17:49
-
-
Save danilovazb/f319f4f634e6d1551e48 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import threading | |
import time | |
import urllib2,sys,json,requests,re | |
import pycurl | |
from bs4 import BeautifulSoup | |
from itertools import islice | |
# Máximo de conexões/threads simultâneas | |
MAX_CONEXOES = 50 | |
# Função para imprimir uma linha por vez via lock | |
print_lock = threading.Lock() | |
arquivoArgv = sys.argv[1] | |
def mostrar_msg(msg): | |
print_lock.acquire() | |
print msg | |
print_lock.release() | |
# Função para cada thread | |
def consultar_cpf(cpf): | |
cpfConsulta = cpf.strip() | |
url = "http://websro.correios.com.br/sro_bin/txect01$.QueryList" | |
refer = "http://websro.correios.com.br/sro_bin/txect01$.startup?P_LINGUA=001&P_TIPO=001" | |
cookie = {'ASPSESSIONIDSCTADABA': 'asdasdasdasdasd'} | |
payload = {'P_ITEMCODE':'','P_LINGUA':'001','P_TESTE':'','P_TIPO':'001','P_COD_UNI':cpfConsulta,'Z_ACTION':'Search'} | |
headers = {'Referer': refer} | |
response = requests.post(url, cookies=cookie, data=payload, headers=headers) | |
data = response.text | |
print data | |
soup = BeautifulSoup(data) | |
contribuinte = {'dados': ''} | |
for txt in soup.select("tr td.rowspan"): | |
contribuinte['dados'] += txt.get_text().encode('utf8').strip() + '|' | |
print contribuinte | |
# Thread principal | |
lista_threads = [] | |
with open(arquivoArgv, 'rb') as arquivo: | |
for linha in arquivo: | |
cpf = linha.strip() | |
while threading.active_count() > MAX_CONEXOES: | |
#mostrar_msg("Esperando 1s...") | |
time.sleep(1) | |
thread = threading.Thread(target=consultar_cpf, args=(cpf,)) | |
lista_threads.append(thread) | |
thread.start() | |
# Esperando pelas threads abertas terminarem | |
mostrar_msg("Esperando threads abertas terminarem...") | |
for thread in lista_threads: | |
thread.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment