-
-
Save anoduck/240d0d2c40abd21a080f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
############################### README ############################### | |
# External dependencies: | |
# * libmagic | |
# * python-dateutil | |
# * python-magic | |
# * requests | |
# | |
# The shell command call at the end was tested with GNU date from GNU | |
# coreutils. Other implementations might not work (for instance, BSD | |
# date is not compatible). However, you may comment out the last | |
# section without affecting the download. | |
# | |
# The "last_update_timestamp" is reserved for the update feature, | |
# which is not implemented yet. | |
###################################################################### | |
import commands | |
import json | |
import os | |
import sys | |
import urllib | |
import dateutil.parser | |
import magic | |
import requests | |
########################### CUSTOMIZE THIS ########################### | |
page_id = "567045406742962" | |
access_token = "app_id|app_secret" # see https://developers.facebook.com/docs/facebook-login/access-tokens#apptokens | |
dest = os.path.expanduser("~/img/sns/apink-official-facebook") | |
website_title = "apink-official-facebook" | |
###################################################################### | |
if not os.path.exists(dest): | |
os.makedirs(dest) | |
# read last update time, if it is available | |
last_update_record = dest + "/last_update_timestamp" | |
if os.path.exists(last_update_record): | |
f = open(last_update_record, "r") | |
last_update_timestamp = f.readline() | |
f.close() | |
last_update_time = dateutil.parser.parse(last_update_timestamp) | |
else: | |
last_update_time = dateutil.parser.parse("1970-01-01T00:00+00:00") | |
# this function makes an API call with only an access_token (which | |
# could be just app-id|app-secret) | |
def fb_public_call(endpoint, params, access_token): | |
params["access_token"] = access_token | |
response = requests.get("https://graph.facebook.com/" + endpoint, | |
params=params) | |
return response.json() | |
# this function downloads a photo | |
# return codes are defined below | |
SUCCESS = 0 | |
FAILED_DOWNLOAD = 1 | |
UNRECOGNIZED_MIME = 2 | |
OLD_PHOTO = 255 # photo older than last update time | |
def handle_photo(photo, album_id): | |
# print information | |
photo_id = photo["id"] | |
time = dateutil.parser.parse(photo["created_time"]) | |
if time < last_update_time: | |
return OLD_PHOTO | |
time_print = time.strftime("%b %d, %Y") | |
time_full = time.strftime("%Y%m%d%H%M%S") | |
original_image = photo["images"][0] | |
height = original_image["height"] | |
width = original_image["width"] | |
format_string = "date: %s id: %s size: %sx%s" | |
print format_string % (time_print, photo_id, width, | |
height) | |
# download file | |
source_uri = original_image["source"] | |
filename = time_full + "-" + website_title + "-" + \ | |
album_id + "-" + photo_id | |
filepath = dest + "/" + filename | |
urllib.urlretrieve(source_uri, filepath) | |
# identify mime type and attach extension | |
if os.path.exists(filepath): | |
mime = magic.from_file(filepath, mime=True) | |
if mime == "image/gif": | |
newfilepath = filepath + ".gif" | |
elif mime == "image/jpeg": | |
newfilepath = filepath + ".jpg" | |
elif mime == "image/png": | |
newfilepath = filepath + ".png" | |
else: | |
err = filepath + ": error: " + \ | |
"unrecgonized image type\n" | |
sys.stderr.write(err) | |
return UNRECOGNIZED_MIME | |
os.rename(filepath, newfilepath) | |
return SUCCESS | |
else: | |
# donwload failed for whatever reason | |
err = "error: " + filename + " failed to " + \ | |
"downloaded from " + source_uri + "\n" | |
sys.stderr.write(err) | |
return FAILED_DOWNLOAD | |
# this function handles an album, i.e., download newly added photos | |
# since the last update | |
def handle_album(album): | |
# print album info | |
album_id = album["id"] | |
format_string = "downloading album \"%s\" " + \ | |
"(album id: %s; photo count: %s)" | |
print format_string % (album["name"], album_id, | |
album["count"]) | |
print "-" * 80 | |
# retrieve photos in the album | |
photos_response = fb_public_call(album["id"] + "/photos", | |
params, access_token) | |
while True: | |
for photo in photos_response["data"]: | |
if handle_photo(photo, album_id) == OLD_PHOTO: | |
# already encountered old photo in this album | |
# no need to look further into the past | |
return | |
if "next" in photos_response["paging"]: | |
next_uri = photos_response["paging"]["next"] | |
photos_response = requests.get(next_uri).json() | |
else: | |
break | |
params = {} | |
# retrieve albums | |
albums_response = fb_public_call(page_id + "/albums", params, | |
access_token); | |
while True: | |
for album in albums_response["data"]: | |
handle_album(album) | |
if "next" in albums_response["paging"]: | |
next_uri = albums_response["paging"]["next"] | |
albums_response = requests.get(next_uri).json() | |
else: | |
break | |
# update feature yet to be implemented | |
# create a file "last_update_timestamp" for future use | |
f = open(last_update_record, "w") | |
f.write(commands.getoutput("date -u --iso-8601=seconds")) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment