Created
November 17, 2017 14:59
-
-
Save aflansburg/4fcf35c56f5b158fc96345be48efcd1c to your computer and use it in GitHub Desktop.
Validating image links in Python with urllib
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
import urllib.error | |
# This function checks the headers returned from a urllib.request object | |
def check_imagelinks(imglist): | |
for img in imglist: | |
try: | |
with urllib.request.urlopen(img) as response: | |
# headers are returned as tuples so cast that as a list | |
headers = list(response.getheaders()) | |
if ('Content-Type', 'image/jpeg') not in headers: | |
print('*** Bad/broken image link found, removing url:\n' + 'img') | |
imglist.pop(imglist.index(img)) | |
except urllib.error.HTTPError: | |
# if server returns an 403 forbidden this will catch it | |
print('*** Bad/broken image link found, removing url:\n' + 'img') | |
imglist.pop(imglist.index(img)) | |
return imglist |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment