Last active
January 30, 2023 18:28
-
-
Save bombarie/c543ea3347bcf49e811e4d6a2ed4414a to your computer and use it in GitHub Desktop.
Reconstructs Flickr library based on exported metadata (which factually has all the data but sucks as an offline backup)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 17 jan 2023 - parsing downloaded Flickr data (image dumps and metadata) in order to restore the albums and re-date the images to original date | |
# import required modules | |
import os | |
import json | |
import shutil | |
import filedate | |
''' | |
[ INTRO ] | |
This Python script aims to recreate your Flickr library as you had it. It requires you to request an export of your account's data. | |
The created files will be a combination of your account data (just metadata files) and a collection of zips containing - more or less | |
randomly organized - the pictures and videos of you account. | |
When I did this in December 2022, it really annoyed me that all the images and videos were randomly organized. Plus, the date the photo was | |
created or uploaded was lost. However, in the provided 'account data' download all the knowledge is present, it just needs to be reapplied | |
to your downloaded collection. | |
In essence this script recreates the albums your images belonged to. It also changes the created date of the file to the original uploaded | |
date. | |
[ DEPENDENCIES ] | |
filedate | |
- pip install filedate | |
- https://pypi.org/project/filedate/ | |
[ INSTRUCTIONS ] | |
Unzip all your downloaded files (account data and all images/videos zips) into one folder. | |
Below are a few variables that you will need to fill in manually as the naming on your end will be different than it was for me. See the | |
comments. | |
While running the script it will output a lot of output messages to the console. | |
[ DISCLAIMER ] | |
I really don't know too much about Python. This script could most likely be x times more efficient. | |
I'm happy it worked for me and hopefully it will work for you too :) | |
''' | |
# Directory containing all photo json files (look for files named 'photo_xxxxxxxxx.json' eg. 'photo_108214041.json'). | |
directory = '72157721417080014_89350bcd6a9e_part1' | |
# List of all the the directories containing your images/videos. Flickr possibly multiple zips to download, hence the necessity | |
# for this. Adjust the number of items in this List to your needs | |
fileDirs = ['data-download-1', 'data-download-2', 'data-download-3', 'data-download-4', 'data-download-5'] | |
# Output dir | |
outputDir = 'output-dir' | |
# You don't need to touch any of the script below, it should just do its thing. | |
print('Wait until finished, mmkay? 2200 files on a macbook pro m1 took about 15 seconds. You mileage may vary.') | |
# make sure the output directory exists | |
if not os.path.exists(outputDir): | |
print('output directory ' + outputDir + ' did not exist >> creating...') | |
os.makedirs(outputDir) | |
# iterate over files in dir | |
for filename in os.listdir(directory): | |
# skip anything that isn't a 'photo_xxxxxxx.json' file | |
if not filename.startswith('photo_'): | |
# print('filename ' + filename + ' does not start with \'photo_\'') | |
continue | |
f = os.path.join(directory, filename) | |
# checking if it is a file | |
if os.path.isfile(f): | |
# open file | |
_file = open(f) | |
# open as json file | |
data = json.load(_file) | |
# this works! | |
# if data['albums']: | |
# if (len(data['albums']) > 1): | |
# print('file ' + str(f) + ' is in more than 1 album') | |
# print(_file + ' >> ' + data['id']) | |
# print(str(_file.name) + ' >> ' + str(data['id'])) | |
# now iterate over the data dirs (where all the images are) | |
for _dir in fileDirs: | |
#loop over all files in the dirs | |
for _filename in os.listdir(_dir): | |
_f = os.path.join(_dir, _filename) | |
# checking if it is a file | |
if os.path.isfile(_f): | |
# check if the id is part of the filename | |
if str(data['id']) in _filename: | |
# copy image to albums, otherwise just to the root | |
if data['albums']: | |
for album in data['albums']: | |
# print('this photo is part of the album ' + str(album['title'])) | |
albumdir = os.path.join(outputDir, album['title']) | |
dirExists = os.path.exists(albumdir) | |
if not dirExists: | |
# print('creating directory ' + album['title']) | |
os.makedirs(albumdir) | |
outputString = shutil.copy2(_f, albumdir) | |
if outputString: | |
a_file = filedate.File(outputString) | |
if data['date_taken']: | |
a_file.set( | |
created = data['date_taken'], | |
modified = data['date_taken'], | |
accessed = data['date_taken'] | |
) | |
else: | |
# date_imported | |
a_file.set( | |
created = data['date_imported'], | |
modified = data['date_imported'], | |
accessed = data['date_imported'] | |
) | |
else: | |
outputString = shutil.copy2(_f, outputDir) | |
if outputString: | |
a_file = filedate.File(outputString) | |
if data['date_taken']: | |
a_file.set( | |
created = data['date_taken'], | |
modified = data['date_taken'], | |
accessed = data['date_taken'] | |
) | |
else: | |
# date_imported | |
a_file.set( | |
created = data['date_imported'], | |
modified = data['date_imported'], | |
accessed = data['date_imported'] | |
) | |
# print('') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment