Forked from nicolevanderhoeven/downloadfirebase.py
Last active
January 12, 2023 07:07
-
-
Save seltzered/f93cd6dbe7db28ac820591e7ee14b820 to your computer and use it in GitHub Desktop.
Download Roam images (firebase) and google doc images (googleusercontent) locally for use with Logseq/Obsidian vault - Fork that has page-based image names, and assumes no-extension images are PNGS rather than not downloading at all
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files. | |
# To use, replace PATH in the variable vaultDir with your vault's root directory. | |
# This automatically puts filenames in ../assets (Logseq style) - change the newFilePath variable if you want to change this | |
import re | |
import glob | |
import os | |
import requests | |
import calendar | |
import time | |
dryRun = False | |
#SET YOUR VAULT DIRECTORY HERE - whatever is a directory full of markdown files | |
vaultDir = '/path/to/my/logseq/pages' | |
# When generating image file with filename, | |
# remove special characters. | |
# (e.g. '[' and ']' for titles that also have [[blockrefs]]) | |
filenamePrefixStripCharPattern = r'\[|\]' | |
# add a migration descriptor to the filename, | |
# just in case you need to ever do post-processing | |
# on only these migrated images. | |
filenameMigrationDescriptor = 'fromRoam' | |
#default assumed image extension if one isn't found | |
defaultAssumedExt = 'ASSUMEDPNG.png' | |
firebaseShort = 'none' | |
fullRead = 'none' | |
fileFullPath = '' | |
fullTempFilePath = '' | |
i = 0 | |
ext = '' | |
# Walk through all files in all directories within the specified vault directory | |
for subdir, dirs, files in os.walk(vaultDir): | |
for file in files: | |
# Open file in directory | |
print("filename: " + file) | |
fileFullPath = os.path.join(subdir,file) | |
fhand = open(fileFullPath, errors='ignore') | |
fileImageCount = 0 | |
lineCount = 0 | |
for line in fhand: | |
# Download the Firebase file and save it in the assets folder | |
if ('firebasestorage' in line) or ('googleusercontent' in line): | |
print('\nfirebasestorage or googleusercontent asset link found') | |
try: | |
if ('firebasestorage' in line): | |
# If it's a PDF, it will be in the format {{pdf: link}} | |
if '{{pdf:' in line: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line) | |
else: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line) | |
firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png | |
resolvedUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6 | |
print('link: ' + resolvedUrl) | |
# Download the file locally | |
#-------------------------------------------------- | |
if not dryRun: | |
r = requests.get(resolvedUrl) | |
try: | |
# # Get file extension of file. Ex: .png; .jpeg | |
reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg | |
ext = '.' + reg.group(2) # .jpeg | |
except: | |
ext = defaultAssumedExt | |
if ('googleusercontent' in line): | |
# assume some image copied over from a google doc | |
link = re.search(r'https://(.*)googleusercontent(.*)\)', line) | |
resolvedUrl = link.group(0)[:-1] # https://lh5.googleusercontent.com/YHmVyEd9j6bLx32A9YF8biKxdSfZk5HH-g8VZ4elg_szfojPTq3VE8FtcHZ1HN3_p8uhrcPKK6d4ynbc12BC2dDXuGsOtAPc7wxbF3auTcMBUEU5NNRNpNGuEyZZIKDVfyArf3IE | |
if not dryRun: | |
r = requests.get(resolvedUrl) | |
ext = defaultAssumedExt | |
print('\nextension: ' + ext) | |
# Create assets folder if it doesn't exist | |
if (not dryRun) and (not os.path.exists(vaultDir + '../assets')): | |
os.makedirs(vaultDir + '../assets') | |
# # Create new local file out of downloaded firebase file | |
filenamePrefix = os.path.splitext(file)[0] | |
filenamePrefix = re.sub(filenamePrefixStripCharPattern, '', filenamePrefix) | |
newFilePath = '../assets/' + filenamePrefix + '-' + filenameMigrationDescriptor + '-' + str(fileImageCount) + ext | |
print("\ncreating image:" + newFilePath) | |
print(resolvedUrl + '>>>' + newFilePath) | |
if not dryRun: | |
with open(vaultDir + '/' + newFilePath,'wb') as output_file: | |
output_file.write(r.content) | |
fileImageCount = fileImageCount + 1 | |
except AttributeError: # This is to prevent the AttributeError exception when no matches are returned | |
continue | |
if not dryRun: | |
# Save Markdown file with new local file link as a temp file | |
# If there is already a temp version of a file, open that. | |
fullTempFilePath = vaultDir + '/temp_' + file | |
if os.path.exists(fullTempFilePath): | |
fullRead = open(fullTempFilePath, errors='ignore') | |
else: | |
fullRead = open(fileFullPath, errors='ignore') | |
data = fullRead.read() | |
data = data.replace(resolvedUrl,newFilePath) | |
with open(fullTempFilePath,'wt') as temp_file: | |
temp_file.write(data) | |
i = i + 1 | |
if os.path.exists(fullTempFilePath): | |
path = os.replace(fullTempFilePath,fileFullPath) | |
fullRead.close() | |
# Close file | |
fhand.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment