rishipr · May 11, 2020 21:00
diff --git a/download_images.py b/download_images.py
 import os
 from PIL import Image
 import requests
 import shutil
 import urllib.request
 from scraper import BREEDS


 num_downloaded = 0


 def main():
    for b in BREEDS:
        global num_downloaded
        num_downloaded = 0

        print(f"Downloading images for {b} breed...")

        file_path = f"data/{b.replace(' ', '_')}.txt"
        in_file = open(file_path, 'r')

        lines = in_file.readlines()
        for l in lines:
            if (num_downloaded == 300):
                break

            l = l.replace("\n", "").lower()
            b = b.replace(' ', '')

            # Skip if not an image file
            if not l.endswith('.jpg'):
                continue

            try:
                download_image(b, l)
            except:
                continue

        print("\n")
        in_file.close()


 def download_image(breed, url):
    global num_downloaded

    # Check url
    try:
        r = requests.get(url, timeout=1)
        r.raise_for_status()
    except:
        pass

    if r.status_code == 200:
        # Save image
        output_name = f"images/{breed}-{num_downloaded}.jpg"
        urllib.request.urlretrieve(url, output_name)

        # Check to see if image is valid
        if not check_validity(output_name):
            # If corrupted image, delete image and exit function
            if os.path.exists(output_name):
                os.remove(output_name)

            return

        print(f"Saved {output_name}")
        num_downloaded += 1

    else:
        raise Exception('Bad url...')


 def check_validity(img_name):
    try:
        img = Image.open(img_name)
        img.verify()

        return True
    except (IOError, SyntaxError):
        return False


 if __name__ == '__main__':
    if os.path.exists("images/"):
        shutil.rmtree('./images', ignore_errors=True)
        os.mkdir("./images")
    else:
        os.mkdir("./images")

    main()
	import os
	from PIL import Image
	import requests
	import shutil
	import urllib.request
	from scraper import BREEDS


	num_downloaded = 0


	def main():
	for b in BREEDS:
	global num_downloaded
	num_downloaded = 0

	print(f"Downloading images for {b} breed...")

	file_path = f"data/{b.replace(' ', '_')}.txt"
	in_file = open(file_path, 'r')

	lines = in_file.readlines()
	for l in lines:
	if (num_downloaded == 300):
	break

	l = l.replace("\n", "").lower()
	b = b.replace(' ', '')

	# Skip if not an image file
	if not l.endswith('.jpg'):
	continue

	try:
	download_image(b, l)
	except:
	continue

	print("\n")
	in_file.close()


	def download_image(breed, url):
	global num_downloaded

	# Check url
	try:
	r = requests.get(url, timeout=1)
	r.raise_for_status()
	except:
	pass

	if r.status_code == 200:
	# Save image
	output_name = f"images/{breed}-{num_downloaded}.jpg"
	urllib.request.urlretrieve(url, output_name)

	# Check to see if image is valid
	if not check_validity(output_name):
	# If corrupted image, delete image and exit function
	if os.path.exists(output_name):
	os.remove(output_name)

	return

	print(f"Saved {output_name}")
	num_downloaded += 1

	else:
	raise Exception('Bad url...')


	def check_validity(img_name):
	try:
	img = Image.open(img_name)
	img.verify()

	return True
	except (IOError, SyntaxError):
	return False


	if __name__ == '__main__':
	if os.path.exists("images/"):
	shutil.rmtree('./images', ignore_errors=True)
	os.mkdir("./images")
	else:
	os.mkdir("./images")

	main()