Skip to content

Instantly share code, notes, and snippets.

@jacobh
Created January 18, 2014 14:25
Show Gist options
  • Save jacobh/8491282 to your computer and use it in GitHub Desktop.
Save jacobh/8491282 to your computer and use it in GitHub Desktop.
scraping my own flickr photos
from gevent.pool import Pool
import json
import os
import requests
from tqdm import tqdm
from urllib import urlretrieve
ENDPOINT = 'http://api.flickr.com/services/rest/'
API_KEY = 'nope'
USER_ID = '48735087@N06'
photo_directory = '/Users/jacob/Pictures/flickr/'
def get_page(page_number=1):
resp = requests.get(ENDPOINT, params={
'method': 'flickr.people.getPublicPhotos',
'api_key': API_KEY,
'user_id': USER_ID,
'format': 'json',
'page': page_number,
'extras': ','.join([
'date_taken',
'url_o',
]),
})
data = json.loads(resp.content[14:-1])
return data
def get_photos():
first_page = get_page()
page_count = first_page['photos']['pages']
for page_num in range(page_count + 1)[1:]:
if page_num == 1:
page = first_page
else:
page = get_page(page_num)
for photo in page['photos']['photo']:
yield photo
def download_url(url):
path = photo_directory + url.split('/')[-1]
if not os.path.exists(path):
urlretrieve(url, path)
pool = Pool(size=20)
for photo in tqdm(get_photos()):
pool.wait_available()
pool.apply_async(download_url, args=[photo['url_o']])
pool.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment