graphaelli · April 25, 2018 14:45
diff --git a/config.py b/config.py
 """
 Configuration for Flickr Backup & Download and Google Upload.
 """
 from collections import namedtuple                                                                                                                                                                                                        


 # fill in after first run, each will be loggeed
 flickr_oauth_token = None
 flickr_oauth_secret = None
 google_refresh_token = None  # only available at first run
 google_access_token = None

 #
 # Fill in keys and secrets before first run
 #

 # API Keys from https://www.flickr.com/services/apps/by/me.
 Flickr = namedtuple('Flickr', ('key', 'secret', 'oauth_token', 'oauth_secret'))
 flickr = Flickr(
    key='API_KEY',
    secret='API_SECRET',
    oauth_token=flickr_oauth_token,
    oauth_secret=flickr_oauth_secret,
 )


 # id and secret from https://console.cloud.google.com/apis/credentials
 Google = namedtuple('Google', ('client_id', 'secret', 'access_token', 'refresh_token'))
 google = Google(
    client_id='CLIENT_ID',
    secret='SECRET',
    access_token=google_access_token,
    refresh_token=google_refresh_token,
 )
diff --git a/flickr_backup.py b/flickr_backup.py
 #!/usr/bin/env python3
 """
 Dump information about all of the authenticated user's photos to back-$date.json:
 [
 {
  "id": "12345",
  "owner": "12345@N00",
  "secret": "9876",
  "server": "119",
  "farm": 1,
  "title": "",
  "ispublic": 1,
  "isfriend": 0,
  "isfamily": 0,
  "description": {
    "_content": ""
  },
  "dateupload": "1160371553",
  "datetaken": "2006-10-08 18:34:44",
  "datetakengranularity": "0",
  "datetakenunknown": 0,
  "media": "photo",
  "media_status": "ready",
  "url_o": "https://farm1.staticflickr.com/119/12345_9876_o.jpg",
  "height_o": "2112",
  "width_o": "2816"
 }
 ]
 """
 import datetime
 import json

 from requests_oauthlib import OAuth1Session, OAuth2Session

 from config import flickr


 authorization_base_url = 'https://www.flickr.com/services/oauth/authorize'
 request_token_url = 'https://www.flickr.com/services/oauth/request_token'
 access_token_url = 'https://www.flickr.com/services/oauth/access_token'


 def time_from(ts):
    return datetime.datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')


 def auth1():
    session = OAuth1Session(flickr.key, client_secret=flickr.secret, callback_uri='http://127.0.0.1:8000')
    session.fetch_request_token(request_token_url, params={'perms': 'read'})
    print('authorize:', session.authorization_url(authorization_base_url, perms='read'))
    redirect_response = input('Paste the full redirect URL here:')
    session.parse_authorization_response(redirect_response)
    print(session.fetch_access_token(access_token_url))
    return session


 def get_session():
    if not flickr.oauth_token:
        session = auth1()
        print(f'oauth_token="{session.auth.client.resource_owner_key}"')
        print(f'oauth_secret="{session.auth.client.resource_owner_secret}"')
    else:
        session = OAuth1Session(
            flickr.key,
            client_secret=flickr.secret,
            resource_owner_key=flickr.oauth_token,
            resource_owner_secret=flickr.oauth_secret,
        )
    return session


 def search(session, page):
    """ https://www.flickr.com/services/api/flickr.photos.search.html """
    r = session.get(
            'https://api.flickr.com/services/rest',
            params={
                'nojsoncallback': '1',
                'format': 'json',
                'method': 'flickr.photos.search',
                'sort': 'date-posted-asc',
                'per_page': 500,
                'page': page,
                'extras': 'description,url_o,date_upload,date_taken,media',
                'user_id': 'me',
            }
    )
    return r.json()


 def main():
    session = get_session()

    photos = []
    page_cnt = page_num = 1
    while page_num <= page_cnt:
        print("page", page_num, "of", page_cnt)
        page = search(session, page_num)
        photos.extend(page['photos']['photo'])
        page_num = page['photos']['page'] + 1
        page_cnt = page['photos']['pages']

    with open("backup-{:%Y%m%d_%H%M}.json".format(datetime.datetime.now()), mode="w") as f:
        json.dump(photos, f)

    photo_ids = {p['id'] for p in photos}
    print("{:d} photos found".format(len(photo_ids)))


 if __name__ == '__main__':
    main()
diff --git a/flickr_download.py b/flickr_download.py
 #!/usr/bin/env python3
 """
 Download all of the photos and videos previously queried with flickr_backup.py.
 Photos/videos will be stored by date taken in: photos/YYYY/MM/, 3 files each:
  photos/2006/04/${photo_id}.info.json
  photos/2006/04/${photo_id}.meta.json
  photos/2006/04/${photo_url_o}.jpg
  
 This script is resumable - it tries to minimize the number of api calls if a photo has already been downloaded.
 """
 import json
 import os
 import queue
 import sys
 import threading
 import time

 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
 import requests

 from flickr_backup import get_session


 def requests_retry_session(retries=10, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None):
    """https://www.peterbe.com/plog/best-practice-with-retries-with-requests"""
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session


 def info(session, photo_id, secret):
    """ https://www.flickr.com/services/api/flickr.photos.getInfo.html """
    r = session.get(
            'https://api.flickr.com/services/rest',
            params={
                'nojsoncallback': '1',
                'format': 'json',
                'method': 'flickr.photos.getInfo',
                'photo_id': photo_id,
                'secret': secret,
            }
    )
    return r.json()


 def sizes(session, photo_id):
    """ https://www.flickr.com/services/api/flickr.photos.getSizes.html """
    r = session.get(
            'https://api.flickr.com/services/rest',
            params={
                'nojsoncallback': '1',
                'format': 'json',
                'method': 'flickr.photos.getSizes',
                'photo_id': photo_id,
            }
    )
    return r.json()


 def video(session, photo_id):
    """ list of video urls to try """
    sz = sizes(session, photo_id)
    json.dump(sz, sys.stderr)

    def dims(x):
        if not x:
            return 0
        return int(x['width']) * int(x['height'])

    url = None
    for s in sz['sizes']['size']:
        # consider only downloadable videos 
        if s['media'] == 'video' and s['label'] != 'Video Player':
            # source sometimes doesn't exist ?!
            if requests.head(s['source']).status_code < 400:
                # take biggest video or the one labeled original
                if dims(s) > dims(url) or s['label'] == "Video Original":
                    url = s
    return url['source']


 def download(session, item):
    # create photo working directory
    if 'datetaken' in item:
        taken = time.strptime(item['datetaken'], '%Y-%m-%d %H:%M:%S')
    else:
        taken = time.gmtime(0)
    photo_dir = os.path.join('photos', "{:04d}".format(taken.tm_year), "{:02d}".format(taken.tm_mon))
    os.makedirs(photo_dir, exist_ok=True)

    pid = item['id']
    original = os.path.basename(item['url_o'])
    url_o = item['url_o']
    if item.get('media', '') == 'video':
        original = original + '.mp4'
        # defer url_o lookup until after checking if original is already downloaded

    # skip already downloaded
    photo_path = os.path.join(photo_dir, original)
    info_path = os.path.join(photo_dir, '{}.info.json'.format(pid))
    photo_downloaded = os.path.isfile(photo_path) and os.path.getsize(photo_path) > 0
    info_downloaded = os.path.isfile(info_path) and os.path.getsize(info_path) > 0

    if all((photo_downloaded, info_downloaded)):
        print('already downloaded', pid, photo_path)
        return

    # save metadata per photo
    meta_path = os.path.join(photo_dir, '{}.meta.json'.format(pid))
    with open(meta_path, mode='w') as m:
        json.dump(item, m)

    # download media
    if not photo_downloaded:
        # delayed until here to save API calls
        if item.get('media', '') == 'video':
            print('looking up video info for', pid)
            url_o = video(session, pid)

        print('downloading', pid)
        r = requests_retry_session().get(url_o, stream=True, timeout=10)
        r.raise_for_status()
        with open(photo_path, mode='wb') as p:
            for chunk in r.iter_content(chunk_size=4096):
                p.write(chunk)

    # save info per photo
    if not info_downloaded:
        print('getting info for', pid)
        i = info(session, pid, item['secret'])
        with open(info_path, mode='w') as m:
            json.dump(i, m)


 def worker(q, session):
    while True:
        item = q.get()
        if item is None:
            break
        try:
            download(session, item)
        except Exception as e:
            print("error with {}:".format(item), e)
        q.task_done()


 def main(source, num_worker_threads=4):
    q = queue.Queue()
    threads = []
    for i in range(num_worker_threads):
        session = get_session()
        t = threading.Thread(target=worker, args=(q,session))
        t.start()
        threads.append(t)

    for item in source:
        q.put(item)

    # block until all tasks are done
    q.join()

    # stop workers
    for i in range(num_worker_threads):
        q.put(None)
    for t in threads:
        t.join()


 if __name__ == '__main__':
    with open(sys.argv[1], 'r') as f:
        main(json.load(f), num_worker_threads=10)
diff --git a/google_upload.py b/google_upload.py
 #!/usr/bin/env python3
 """
 Upload photos [but not videos] previously downloaded with flickr_download.py.
 Title set in Flickr will be preserved, otherwise they will be set to 'Untitled' + the date taken.
 Descrption set in Flickr will be preserved.  Tags and other metadata will be included as the last line of the description.
 Photos will be added to the 'default' album aka Drop Box.  There is a 2000 item limit per album.

 Creates a ${photo_id}.google_upload.json for each item uploaded.

 To upload all jpgs in photos/:
 $ find photos -name '*jpg' | xargs -n 500 ./google_upload.py
 """
 import io
 import json
 import os
 import sys
 import time
 import xml.etree.ElementTree as ET

 from requests_oauthlib import OAuth2Session
 from urllib3.filepost import choose_boundary

 from config import google


 authorization_base_url = "https://accounts.google.com/o/oauth2/v2/auth"
 token_url = "https://www.googleapis.com/oauth2/v4/token"
 scope = [
    "https://www.googleapis.com/auth/userinfo.email",
    "https://www.googleapis.com/auth/userinfo.profile",
    "https://picasaweb.google.com/data/",
 ]


 def auth2():
    session = OAuth2Session(client_id=google.client_id, scope=scope, redirect_uri="https://localhost:8000/")
    authorization_url, state = session.authorization_url(authorization_base_url, access_type="offline", approval_prompt='force')
    print('authorize:', authorization_url)
    redirect_response = input('Paste the full redirect URL here:')
    token = session.fetch_token(token_url, client_secret=google.secret, authorization_response=redirect_response)
    with open('google-token', 'w') as g:
        g.write(str(token))
    return session


 def get_session():
    if google.access_token:
        def token_updater(token):
            print("token updated", token, file=sys.stderr)

        session = OAuth2Session(
            client_id=google.client_id,
            token={
                'access_token': google.access_token,
                'expires_at': time.time() - 10,
                'refresh_token': google.refresh_token,
            },
            auto_refresh_kwargs={
                'client_id': google.client_id,
                'client_secret': google.secret,
            },
            auto_refresh_url=token_url,
            token_updater=token_updater,
        )
    else:
        session = auth2()
        print(f'access_token="{session.access_token}"', file=sys.stderr)
        print(f'refresh_token="{session.refresh_token}"', file=sys.stderr)
    return session


 def upload_with_metadata(session, media, content_type, title, description=None, datetaken=None, album='default'):
    """ https://developers.google.com/picasa-web/docs/3.0/developers_guide_protocol#PostPhotos """
    term = 'photo'  # if content_type.startswith('image') else 'video'

    entry = ET.Element('entry', xmlns='http://www.w3.org/2005/Atom')
    te = ET.SubElement(entry, 'title')
    te.text = title
    if description:
        summ = ET.SubElement(entry, 'summary')
        summ.text = description
    ET.SubElement(entry, 'category',
            scheme="http://schemas.google.com/g/2005#kind",
            term=f"http://schemas.google.com/photos/2007#{term}"
    )
    #if content_type.startswith('image/'):
    boundary = choose_boundary()
    body = io.BytesIO()
    body.write(f'Media multipart posting\n--{boundary}\n'.encode('utf-8'))
    body.write('Content-Type: application/atom+xml\n\n'.encode('utf-8'))
    body.write(ET.tostring(entry))
    body.write(f'\n--{boundary}\n'.encode('utf-8'))
    body.write(f'Content-Type: {content_type}\n\n'.encode('utf-8'))
    body.write(media.read())
    body.write(f'\n--{boundary}--\n'.encode('utf-8'))
    body.seek(0)

    headers = {
        'Content-Type': f'multipart/related; boundary="{boundary}"',
        'GData-Version': '3',
        'MIME-version': '1.0',
    }
    return session.post(
        f'https://picasaweb.google.com/data/feed/api/user/default/albumid/{album}?alt=json',
        headers=headers,
        data=body,
    )


 def upload(session, photo):
        photo_dir = os.path.dirname(photo)
        flickr_pid = os.path.basename(photo).split("_", 1)[0]

        goog_path = os.path.join(photo_dir, f'{flickr_pid}.google_upload.json')
        if os.path.exists(goog_path):
            print(f"already uploaded {photo}")
            return

        with open(os.path.join(photo_dir, f'{flickr_pid}.info.json')) as i:
            info = json.load(i)
        if info["stat"] != "ok":
            raise Exception(f"bad stat for {photo}")
        info = info['photo']

        # https://developers.google.com/picasa-web/docs/3.0/developers_guide_protocol#PostVideo
        if info['media'] == 'video':
            content_type = 'video/mp4'
            ### temporarily skip videos ###
            print(f"skipped video {photo}")
            return
            ### ###
        elif info['originalformat'] == 'jpg':
            content_type = 'image/jpeg'
        elif info['originalformat'] == 'png':
            content_type = 'image/png'
        else:
            raise Exception(f"failed to detect content type for {photo}")

        title = info['title']['_content'] or 'Untitled ' + info["dates"]["taken"]
        description = info['description']['_content']
        if description != "":
            description += "\n\n"
        description += f'flickr:id={flickr_pid}'
        for k, v in info["dates"].items():
            if k == 'lastupdate':
                continue
            elif k in ('takengranularity', 'takenunknown') and v in ('', '0', 0):
                continue
            description += f'|date_{k}={v}'
        if info['tags']['tag']:
            description += '|tags=' + ",".join([t['_content'] for t in info['tags']['tag']])
        if 'location' in info:
            lat = info['location'].get('latitude', '')
            lon = info['location'].get('longitude', '')
            description += f'|location={lat},{lon}'

        print(f"uploading {photo} title: {title} description: {description}")
        with open(photo, 'rb') as p:
            rsp = upload_with_metadata(session, p, content_type, title, description=description)
            if rsp.status_code != 201:
                print(rsp.status_code, rsp.content.decode('utf-8'))
            rsp.raise_for_status()

        with open(goog_path, mode='wb') as g:
            g.write(rsp.content)


 def main():
    session = get_session()
    for photo in sys.argv[1:]:
        upload(session, photo)


 if __name__ == '__main__':
    main()
diff --git a/LICENSE b/LICENSE
 Copyright 2018 Gil Raphaelli

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 the Software, and to permit persons to whom the Software is furnished to do so,
 subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/requirements.txt b/requirements.txt
 appnope==0.1.0
 bleach==2.1.3
 certifi==2018.1.18
 chardet==3.0.4
 decorator==4.2.1
 entrypoints==0.2.3
 html5lib==1.0.1
 idna==2.6
 ipdb==0.11
 ipykernel==4.8.2
 ipython==6.2.1
 ipython-genutils==0.2.0
 ipywidgets==7.1.2
 jedi==0.11.1
 Jinja2==2.10
 jsonschema==2.6.0
 jupyter==1.0.0
 jupyter-client==5.2.3
 jupyter-console==5.2.0
 jupyter-core==4.4.0
 MarkupSafe==1.0
 mistune==0.8.3
 nbconvert==5.3.1
 nbformat==4.4.0
 notebook==5.4.1
 oauthlib==2.0.7
 pandocfilters==1.4.2
 parso==0.1.1
 pexpect==4.4.0
 pickleshare==0.7.4
 prompt-toolkit==1.0.15
 ptyprocess==0.5.2
 Pygments==2.2.0
 python-dateutil==2.7.1
 python-oauth2==1.1.0
 pyzmq==17.0.0
 qtconsole==4.3.1
 requests==2.18.4
 requests-oauthlib==0.8.0
 Send2Trash==1.5.0
 simplegeneric==0.8.1
 six==1.11.0
 terminado==0.8.1
 testpath==0.3.1
 tornado==5.0.1
 traitlets==4.3.2
 urllib3==1.22
 wcwidth==0.1.7
 webencodings==0.5.1
 widgetsnbextension==3.1.4
	"""
	Configuration for Flickr Backup & Download and Google Upload.
	"""
	from collections import namedtuple


	# fill in after first run, each will be loggeed
	flickr_oauth_token = None
	flickr_oauth_secret = None
	google_refresh_token = None # only available at first run
	google_access_token = None

	#
	# Fill in keys and secrets before first run
	#

	# API Keys from https://www.flickr.com/services/apps/by/me.
	Flickr = namedtuple('Flickr', ('key', 'secret', 'oauth_token', 'oauth_secret'))
	flickr = Flickr(
	key='API_KEY',
	secret='API_SECRET',
	oauth_token=flickr_oauth_token,
	oauth_secret=flickr_oauth_secret,
	)


	# id and secret from https://console.cloud.google.com/apis/credentials
	Google = namedtuple('Google', ('client_id', 'secret', 'access_token', 'refresh_token'))
	google = Google(
	client_id='CLIENT_ID',
	secret='SECRET',
	access_token=google_access_token,
	refresh_token=google_refresh_token,
	)
	#!/usr/bin/env python3
	"""
	Dump information about all of the authenticated user's photos to back-$date.json:
	[
	{
	"id": "12345",
	"owner": "12345@N00",
	"secret": "9876",
	"server": "119",
	"farm": 1,
	"title": "",
	"ispublic": 1,
	"isfriend": 0,
	"isfamily": 0,
	"description": {
	"_content": ""
	},
	"dateupload": "1160371553",
	"datetaken": "2006-10-08 18:34:44",
	"datetakengranularity": "0",
	"datetakenunknown": 0,
	"media": "photo",
	"media_status": "ready",
	"url_o": "https://farm1.staticflickr.com/119/12345_9876_o.jpg",
	"height_o": "2112",
	"width_o": "2816"
	}
	]
	"""
	import datetime
	import json

	from requests_oauthlib import OAuth1Session, OAuth2Session

	from config import flickr


	authorization_base_url = 'https://www.flickr.com/services/oauth/authorize'
	request_token_url = 'https://www.flickr.com/services/oauth/request_token'
	access_token_url = 'https://www.flickr.com/services/oauth/access_token'


	def time_from(ts):
	return datetime.datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')


	def auth1():
	session = OAuth1Session(flickr.key, client_secret=flickr.secret, callback_uri='http://127.0.0.1:8000')
	session.fetch_request_token(request_token_url, params={'perms': 'read'})
	print('authorize:', session.authorization_url(authorization_base_url, perms='read'))
	redirect_response = input('Paste the full redirect URL here:')
	session.parse_authorization_response(redirect_response)
	print(session.fetch_access_token(access_token_url))
	return session


	def get_session():
	if not flickr.oauth_token:
	session = auth1()
	print(f'oauth_token="{session.auth.client.resource_owner_key}"')
	print(f'oauth_secret="{session.auth.client.resource_owner_secret}"')
	else:
	session = OAuth1Session(
	flickr.key,
	client_secret=flickr.secret,
	resource_owner_key=flickr.oauth_token,
	resource_owner_secret=flickr.oauth_secret,
	)
	return session


	def search(session, page):
	""" https://www.flickr.com/services/api/flickr.photos.search.html """
	r = session.get(
	'https://api.flickr.com/services/rest',
	params={
	'nojsoncallback': '1',
	'format': 'json',
	'method': 'flickr.photos.search',
	'sort': 'date-posted-asc',
	'per_page': 500,
	'page': page,
	'extras': 'description,url_o,date_upload,date_taken,media',
	'user_id': 'me',
	}
	)
	return r.json()


	def main():
	session = get_session()

	photos = []
	page_cnt = page_num = 1
	while page_num <= page_cnt:
	print("page", page_num, "of", page_cnt)
	page = search(session, page_num)
	photos.extend(page['photos']['photo'])
	page_num = page['photos']['page'] + 1
	page_cnt = page['photos']['pages']

	with open("backup-{:%Y%m%d_%H%M}.json".format(datetime.datetime.now()), mode="w") as f:
	json.dump(photos, f)

	photo_ids = {p['id'] for p in photos}
	print("{:d} photos found".format(len(photo_ids)))


	if __name__ == '__main__':
	main()
	#!/usr/bin/env python3
	"""
	Download all of the photos and videos previously queried with flickr_backup.py.
	Photos/videos will be stored by date taken in: photos/YYYY/MM/, 3 files each:
	photos/2006/04/${photo_id}.info.json
	photos/2006/04/${photo_id}.meta.json
	photos/2006/04/${photo_url_o}.jpg

	This script is resumable - it tries to minimize the number of api calls if a photo has already been downloaded.
	"""
	import json
	import os
	import queue
	import sys
	import threading
	import time

	from requests.adapters import HTTPAdapter
	from requests.packages.urllib3.util.retry import Retry
	import requests

	from flickr_backup import get_session


	def requests_retry_session(retries=10, backoff_factor=0.3, status_forcelist=(500, 502, 504), session=None):
	"""https://www.peterbe.com/plog/best-practice-with-retries-with-requests"""
	session = session or requests.Session()
	retry = Retry(
	total=retries,
	read=retries,
	connect=retries,
	backoff_factor=backoff_factor,
	status_forcelist=status_forcelist,
	)
	adapter = HTTPAdapter(max_retries=retry)
	session.mount('http://', adapter)
	session.mount('https://', adapter)
	return session


	def info(session, photo_id, secret):
	""" https://www.flickr.com/services/api/flickr.photos.getInfo.html """
	r = session.get(
	'https://api.flickr.com/services/rest',
	params={
	'nojsoncallback': '1',
	'format': 'json',
	'method': 'flickr.photos.getInfo',
	'photo_id': photo_id,
	'secret': secret,
	}
	)
	return r.json()


	def sizes(session, photo_id):
	""" https://www.flickr.com/services/api/flickr.photos.getSizes.html """
	r = session.get(
	'https://api.flickr.com/services/rest',
	params={
	'nojsoncallback': '1',
	'format': 'json',
	'method': 'flickr.photos.getSizes',
	'photo_id': photo_id,
	}
	)
	return r.json()


	def video(session, photo_id):
	""" list of video urls to try """
	sz = sizes(session, photo_id)
	json.dump(sz, sys.stderr)

	def dims(x):
	if not x:
	return 0
	return int(x['width']) * int(x['height'])

	url = None
	for s in sz['sizes']['size']:
	# consider only downloadable videos
	if s['media'] == 'video' and s['label'] != 'Video Player':
	# source sometimes doesn't exist ?!
	if requests.head(s['source']).status_code < 400:
	# take biggest video or the one labeled original
	if dims(s) > dims(url) or s['label'] == "Video Original":
	url = s
	return url['source']


	def download(session, item):
	# create photo working directory
	if 'datetaken' in item:
	taken = time.strptime(item['datetaken'], '%Y-%m-%d %H:%M:%S')
	else:
	taken = time.gmtime(0)
	photo_dir = os.path.join('photos', "{:04d}".format(taken.tm_year), "{:02d}".format(taken.tm_mon))
	os.makedirs(photo_dir, exist_ok=True)

	pid = item['id']
	original = os.path.basename(item['url_o'])
	url_o = item['url_o']
	if item.get('media', '') == 'video':
	original = original + '.mp4'
	# defer url_o lookup until after checking if original is already downloaded

	# skip already downloaded
	photo_path = os.path.join(photo_dir, original)
	info_path = os.path.join(photo_dir, '{}.info.json'.format(pid))
	photo_downloaded = os.path.isfile(photo_path) and os.path.getsize(photo_path) > 0
	info_downloaded = os.path.isfile(info_path) and os.path.getsize(info_path) > 0

	if all((photo_downloaded, info_downloaded)):
	print('already downloaded', pid, photo_path)
	return

	# save metadata per photo
	meta_path = os.path.join(photo_dir, '{}.meta.json'.format(pid))
	with open(meta_path, mode='w') as m:
	json.dump(item, m)

	# download media
	if not photo_downloaded:
	# delayed until here to save API calls
	if item.get('media', '') == 'video':
	print('looking up video info for', pid)
	url_o = video(session, pid)

	print('downloading', pid)
	r = requests_retry_session().get(url_o, stream=True, timeout=10)
	r.raise_for_status()
	with open(photo_path, mode='wb') as p:
	for chunk in r.iter_content(chunk_size=4096):
	p.write(chunk)

	# save info per photo
	if not info_downloaded:
	print('getting info for', pid)
	i = info(session, pid, item['secret'])
	with open(info_path, mode='w') as m:
	json.dump(i, m)


	def worker(q, session):
	while True:
	item = q.get()
	if item is None:
	break
	try:
	download(session, item)
	except Exception as e:
	print("error with {}:".format(item), e)
	q.task_done()


	def main(source, num_worker_threads=4):
	q = queue.Queue()
	threads = []
	for i in range(num_worker_threads):
	session = get_session()
	t = threading.Thread(target=worker, args=(q,session))
	t.start()
	threads.append(t)

	for item in source:
	q.put(item)

	# block until all tasks are done
	q.join()

	# stop workers
	for i in range(num_worker_threads):
	q.put(None)
	for t in threads:
	t.join()


	if __name__ == '__main__':
	with open(sys.argv[1], 'r') as f:
	main(json.load(f), num_worker_threads=10)
	#!/usr/bin/env python3
	"""
	Upload photos [but not videos] previously downloaded with flickr_download.py.
	Title set in Flickr will be preserved, otherwise they will be set to 'Untitled' + the date taken.
	Descrption set in Flickr will be preserved. Tags and other metadata will be included as the last line of the description.
	Photos will be added to the 'default' album aka Drop Box. There is a 2000 item limit per album.

	Creates a ${photo_id}.google_upload.json for each item uploaded.

	To upload all jpgs in photos/:
	$ find photos -name '*jpg' \| xargs -n 500 ./google_upload.py
	"""
	import io
	import json
	import os
	import sys
	import time
	import xml.etree.ElementTree as ET

	from requests_oauthlib import OAuth2Session
	from urllib3.filepost import choose_boundary

	from config import google


	authorization_base_url = "https://accounts.google.com/o/oauth2/v2/auth"
	token_url = "https://www.googleapis.com/oauth2/v4/token"
	scope = [
	"https://www.googleapis.com/auth/userinfo.email",
	"https://www.googleapis.com/auth/userinfo.profile",
	"https://picasaweb.google.com/data/",
	]


	def auth2():
	session = OAuth2Session(client_id=google.client_id, scope=scope, redirect_uri="https://localhost:8000/")
	authorization_url, state = session.authorization_url(authorization_base_url, access_type="offline", approval_prompt='force')
	print('authorize:', authorization_url)
	redirect_response = input('Paste the full redirect URL here:')
	token = session.fetch_token(token_url, client_secret=google.secret, authorization_response=redirect_response)
	with open('google-token', 'w') as g:
	g.write(str(token))
	return session


	def get_session():
	if google.access_token:
	def token_updater(token):
	print("token updated", token, file=sys.stderr)

	session = OAuth2Session(
	client_id=google.client_id,
	token={
	'access_token': google.access_token,
	'expires_at': time.time() - 10,
	'refresh_token': google.refresh_token,
	},
	auto_refresh_kwargs={
	'client_id': google.client_id,
	'client_secret': google.secret,
	},
	auto_refresh_url=token_url,
	token_updater=token_updater,
	)
	else:
	session = auth2()
	print(f'access_token="{session.access_token}"', file=sys.stderr)
	print(f'refresh_token="{session.refresh_token}"', file=sys.stderr)
	return session


	def upload_with_metadata(session, media, content_type, title, description=None, datetaken=None, album='default'):
	""" https://developers.google.com/picasa-web/docs/3.0/developers_guide_protocol#PostPhotos """
	term = 'photo' # if content_type.startswith('image') else 'video'

	entry = ET.Element('entry', xmlns='http://www.w3.org/2005/Atom')
	te = ET.SubElement(entry, 'title')
	te.text = title
	if description:
	summ = ET.SubElement(entry, 'summary')
	summ.text = description
	ET.SubElement(entry, 'category',
	scheme="http://schemas.google.com/g/2005#kind",
	term=f"http://schemas.google.com/photos/2007#{term}"
	)
	#if content_type.startswith('image/'):
	boundary = choose_boundary()
	body = io.BytesIO()
	body.write(f'Media multipart posting\n--{boundary}\n'.encode('utf-8'))
	body.write('Content-Type: application/atom+xml\n\n'.encode('utf-8'))
	body.write(ET.tostring(entry))
	body.write(f'\n--{boundary}\n'.encode('utf-8'))
	body.write(f'Content-Type: {content_type}\n\n'.encode('utf-8'))
	body.write(media.read())
	body.write(f'\n--{boundary}--\n'.encode('utf-8'))
	body.seek(0)

	headers = {
	'Content-Type': f'multipart/related; boundary="{boundary}"',
	'GData-Version': '3',
	'MIME-version': '1.0',
	}
	return session.post(
	f'https://picasaweb.google.com/data/feed/api/user/default/albumid/{album}?alt=json',
	headers=headers,
	data=body,
	)


	def upload(session, photo):
	photo_dir = os.path.dirname(photo)
	flickr_pid = os.path.basename(photo).split("_", 1)[0]

	goog_path = os.path.join(photo_dir, f'{flickr_pid}.google_upload.json')
	if os.path.exists(goog_path):
	print(f"already uploaded {photo}")
	return

	with open(os.path.join(photo_dir, f'{flickr_pid}.info.json')) as i:
	info = json.load(i)
	if info["stat"] != "ok":
	raise Exception(f"bad stat for {photo}")
	info = info['photo']

	# https://developers.google.com/picasa-web/docs/3.0/developers_guide_protocol#PostVideo
	if info['media'] == 'video':
	content_type = 'video/mp4'
	### temporarily skip videos ###
	print(f"skipped video {photo}")
	return
	### ###
	elif info['originalformat'] == 'jpg':
	content_type = 'image/jpeg'
	elif info['originalformat'] == 'png':
	content_type = 'image/png'
	else:
	raise Exception(f"failed to detect content type for {photo}")

	title = info['title']['_content'] or 'Untitled ' + info["dates"]["taken"]
	description = info['description']['_content']
	if description != "":
	description += "\n\n"
	description += f'flickr:id={flickr_pid}'
	for k, v in info["dates"].items():
	if k == 'lastupdate':
	continue
	elif k in ('takengranularity', 'takenunknown') and v in ('', '0', 0):
	continue
	description += f'\|date_{k}={v}'
	if info['tags']['tag']:
	description += '\|tags=' + ",".join([t['_content'] for t in info['tags']['tag']])
	if 'location' in info:
	lat = info['location'].get('latitude', '')
	lon = info['location'].get('longitude', '')
	description += f'\|location={lat},{lon}'

	print(f"uploading {photo} title: {title} description: {description}")
	with open(photo, 'rb') as p:
	rsp = upload_with_metadata(session, p, content_type, title, description=description)
	if rsp.status_code != 201:
	print(rsp.status_code, rsp.content.decode('utf-8'))
	rsp.raise_for_status()

	with open(goog_path, mode='wb') as g:
	g.write(rsp.content)


	def main():
	session = get_session()
	for photo in sys.argv[1:]:
	upload(session, photo)


	if __name__ == '__main__':
	main()
	Copyright 2018 Gil Raphaelli

	Permission is hereby granted, free of charge, to any person obtaining a copy of
	this software and associated documentation files (the "Software"), to deal in
	the Software without restriction, including without limitation the rights to
	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
	the Software, and to permit persons to whom the Software is furnished to do so,
	subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
	FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
	COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
	IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
	CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	appnope==0.1.0
	bleach==2.1.3
	certifi==2018.1.18
	chardet==3.0.4
	decorator==4.2.1
	entrypoints==0.2.3
	html5lib==1.0.1
	idna==2.6
	ipdb==0.11
	ipykernel==4.8.2
	ipython==6.2.1
	ipython-genutils==0.2.0
	ipywidgets==7.1.2
	jedi==0.11.1
	Jinja2==2.10
	jsonschema==2.6.0
	jupyter==1.0.0
	jupyter-client==5.2.3
	jupyter-console==5.2.0
	jupyter-core==4.4.0
	MarkupSafe==1.0
	mistune==0.8.3
	nbconvert==5.3.1
	nbformat==4.4.0
	notebook==5.4.1
	oauthlib==2.0.7
	pandocfilters==1.4.2
	parso==0.1.1
	pexpect==4.4.0
	pickleshare==0.7.4
	prompt-toolkit==1.0.15
	ptyprocess==0.5.2
	Pygments==2.2.0
	python-dateutil==2.7.1
	python-oauth2==1.1.0
	pyzmq==17.0.0
	qtconsole==4.3.1
	requests==2.18.4
	requests-oauthlib==0.8.0
	Send2Trash==1.5.0
	simplegeneric==0.8.1
	six==1.11.0
	terminado==0.8.1
	testpath==0.3.1
	tornado==5.0.1
	traitlets==4.3.2
	urllib3==1.22
	wcwidth==0.1.7
	webencodings==0.5.1
	widgetsnbextension==3.1.4