Last active
November 2, 2022 13:40
-
-
Save k-funk/b71d6a685201b96f50fe0a83c0e97aa6 to your computer and use it in GitHub Desktop.
Scraping Google Photos Public Album
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# inspired by https://medium.com/p/d49f037c8e3c/responses/show (hopefully the regex is updated there when this one breaks) | |
# also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album | |
import logging | |
import re | |
import requests | |
logger = logging.getLogger(__name__) | |
# originally this was 139min chars. not actually sure the length they can be | |
REGEX = r"(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9\-_]{128,})" | |
def get_photos_from_html(html): | |
# first and last elements are the album cover | |
return re.findall(REGEX, html)[1:-1] | |
def get_photo_urls(album_url): | |
logger.info('Scraping Google Photos album at: {}'.format(album_url)) | |
try: | |
r = requests.get(album_url) | |
photo_urls = get_photos_from_html(r.text) or [] | |
if not len(photo_urls): | |
raise Exception('No photos found.') | |
logger.info("# of images: {}".format(len(photo_urls))) | |
photo_urls.reverse() # makes the order appear the way it does on the website | |
return photo_urls | |
except Exception as err: | |
logger.error('Google Photos scraping failed:\n{}'.format(str(err))) | |
return [] | |
if __name__ == "__main__": | |
print(get_photo_urls('https://photos.app.goo.gl/...')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment