Last active
January 22, 2020 06:59
-
-
Save premrajnarkhede/e6ea38dcdb0381c7979f0d6c42d62c16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def getHTML(url): | |
""" | |
This function takes url as input | |
and gives raw html and final url as output | |
""" | |
error = "" | |
try: | |
response = requests.get(url) | |
r.raise_for_status() | |
except requests.exceptions.HTTPError as errh: | |
error = errh | |
print ("Http Error:",errh) | |
except requests.exceptions.ConnectionError as errc: | |
error = errc | |
print ("Error Connecting:",errc) | |
except requests.exceptions.Timeout as errt: | |
error = errt | |
print ("Timeout Error:",errt) | |
except requests.exceptions.RequestException as err: | |
error = err | |
print ("Something Else",err) | |
add_to_page_status_table(dict(url=url,code = response.code,error=error)) | |
html = None | |
final_url = url | |
if error=="" and response.code==200: | |
html = response.content | |
final_url = response.url | |
return html, final_url |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment