rawnly · October 6, 2018 10:58
diff --git a/download_collections.py b/download_collections.py
 import urllib2
 import requests
 import os 
 import json
 import sys

 # Local Lib
 from utils import progress, dirExists, unsplashURL, getCollectionPhotos

 # Container
 photos = []

 # Counters
 count = 0
 i = 0

 # Collections
 collections = ["827807"]
 collection_folder = True

 # Select picture quality
 q = ["raw", "full", "regular", "small", "thumb"]
 quality = q[0]


 if dirExists("photos") == False:
    os.mkdir("photos")

 if len(collections) == 0:
    print("No collections available")
    os._exit(1)

 for collection in collections:
    current_page = 1
    collection_counter = 0
    is_curated = len(collection) <= 3 and len(collection) > 0

    if is_curated:
        collection_url = unsplashURL("collections/curated/" + collection)
    else:
        collection_url = unsplashURL("collections/" + collection)

    res = requests.get(collection_url)
    col = res.json()
    
    total_photos = col["total_photos"]
   
    if collection_folder and dirExists("photos/" + col["title"]) == False:
        os.mkdir("photos/" + col["title"])
        
    while total_photos > collection_counter:
        response = requests.get( getCollectionPhotos(is_curated, collection, current_page) )
        print("Got: ", collection_counter, " items")

        data = response.json()

        count += len(data)
        collection_counter += len(data)
        current_page += 1

        for photo in data:
            id = photo["id"]
            download_url = photo["urls"]["raw"]

            photos.append({
                "id": id,
                "url": download_url,
                "collection": {
                    "id": collection,
                    "title": col["title"]
                }
            })


 print("Preparing to download", len(photos), "photos")
 for photo in photos:
    r = requests.get(photo["url"], allow_redirects=True)
    i+=1

    if collection_folder:
        path = "photos/" + photo["collection"]["title"] + "/" + photo["id"] + ".jpg"
    else:
        path = "photos/" + photo["id"] + ".jpg"

    open(path, "wb").write(r.content)

    progress(i, count, str(i) + " of " + str(count))
diff --git a/utils.py b/utils.py
 import sys
 import os
 import threading
 import requests
 from Queue import Queue

 def progress(count, total, status=''):
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
    sys.stdout.flush()  # As suggested by Rom Ruben (see: http://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113#comment50529068_27871113)


 def dirExists(path):
    return os.path.isdir(path)

 def unsplashURL(pathname, query=False):
    c_id = "4c29b3461e0cbb98c8f8cbb00fd13cffcfd7746f209b57097d1ffefa06fd352a"
    c_sec = "2b269ee625b6b60f35b1b7a01123bfcd7cc9201e2d8f5b568ced854a048fc569"

    if query:
        return "https://api.unsplash.com/" + pathname + "?client_id=" + c_id + "&client_secret=" + c_sec + "&" + query

    return "https://api.unsplash.com/" + pathname + "?client_id=" + c_id + "&client_secret=" + c_sec

  
 def getCollectionPhotos (curated, collection, page = 1):
    if curated:
        return unsplashURL("collections/curated/" + collection + "/photos") + "&page=" + str(page)

    return unsplashURL("collections/" + collection + "/photos") + "&page=" + str(page)
	import urllib2
	import requests
	import os
	import json
	import sys

	# Local Lib
	from utils import progress, dirExists, unsplashURL, getCollectionPhotos

	# Container
	photos = []

	# Counters
	count = 0
	i = 0

	# Collections
	collections = ["827807"]
	collection_folder = True

	# Select picture quality
	q = ["raw", "full", "regular", "small", "thumb"]
	quality = q[0]


	if dirExists("photos") == False:
	os.mkdir("photos")

	if len(collections) == 0:
	print("No collections available")
	os._exit(1)

	for collection in collections:
	current_page = 1
	collection_counter = 0
	is_curated = len(collection) <= 3 and len(collection) > 0

	if is_curated:
	collection_url = unsplashURL("collections/curated/" + collection)
	else:
	collection_url = unsplashURL("collections/" + collection)

	res = requests.get(collection_url)
	col = res.json()

	total_photos = col["total_photos"]

	if collection_folder and dirExists("photos/" + col["title"]) == False:
	os.mkdir("photos/" + col["title"])

	while total_photos > collection_counter:
	response = requests.get( getCollectionPhotos(is_curated, collection, current_page) )
	print("Got: ", collection_counter, " items")

	data = response.json()

	count += len(data)
	collection_counter += len(data)
	current_page += 1

	for photo in data:
	id = photo["id"]
	download_url = photo["urls"]["raw"]

	photos.append({
	"id": id,
	"url": download_url,
	"collection": {
	"id": collection,
	"title": col["title"]
	}
	})


	print("Preparing to download", len(photos), "photos")
	for photo in photos:
	r = requests.get(photo["url"], allow_redirects=True)
	i+=1

	if collection_folder:
	path = "photos/" + photo["collection"]["title"] + "/" + photo["id"] + ".jpg"
	else:
	path = "photos/" + photo["id"] + ".jpg"

	open(path, "wb").write(r.content)

	progress(i, count, str(i) + " of " + str(count))
	import sys
	import os
	import threading
	import requests
	from Queue import Queue

	def progress(count, total, status=''):
	bar_len = 60
	filled_len = int(round(bar_len * count / float(total)))

	percents = round(100.0 * count / float(total), 1)
	bar = '=' * filled_len + '-' * (bar_len - filled_len)

	sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
	sys.stdout.flush() # As suggested by Rom Ruben (see: http://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console/27871113#comment50529068_27871113)


	def dirExists(path):
	return os.path.isdir(path)

	def unsplashURL(pathname, query=False):
	c_id = "4c29b3461e0cbb98c8f8cbb00fd13cffcfd7746f209b57097d1ffefa06fd352a"
	c_sec = "2b269ee625b6b60f35b1b7a01123bfcd7cc9201e2d8f5b568ced854a048fc569"

	if query:
	return "https://api.unsplash.com/" + pathname + "?client_id=" + c_id + "&client_secret=" + c_sec + "&" + query

	return "https://api.unsplash.com/" + pathname + "?client_id=" + c_id + "&client_secret=" + c_sec


	def getCollectionPhotos (curated, collection, page = 1):
	if curated:
	return unsplashURL("collections/curated/" + collection + "/photos") + "&page=" + str(page)

	return unsplashURL("collections/" + collection + "/photos") + "&page=" + str(page)