Skip to content

Instantly share code, notes, and snippets.

@rfahham
Last active August 4, 2018 22:37
Show Gist options
  • Save rfahham/2871c6720e4152139dc667b3da1da4cb to your computer and use it in GitHub Desktop.
Save rfahham/2871c6720e4152139dc667b3da1da4cb to your computer and use it in GitHub Desktop.
Validação de URLs
# !/usr/bin/env python
# -- coding: utf-8 --
# dependências
import requests
import urllib3
import certifi
arq = open('lista_get_com_host.csv', 'r') # arquivo com a lista de urls
arq_200 = open('200_ok.csv', 'w')
arq_404 = open('404_not_found.csv', 'w')
cont_200 = 0
cont_404 = 0
urllib3.disable_warnings()
while True:
linha = arq.readline()
if linha == "":
break
url = linha.strip()
print url
r = requests.get(url, verify=False)
if (r.status_code == 200):
print('Status Code 200 ')
arq_200.write(linha)
cont_200 += 1
elif (r.status_code == 404):
print('Status Code 404 ')
arq_404.write(linha)
cont_404 += 1
print
print '---------------------------'
print
print 'Páginas com Sucesso: '
print(cont_200)
print
print 'Páginas não encontradas: '
print (cont_404)
print
print '---------------------------'
print
arq.close()
arq_200.close()
arq_404.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment