Skip to content

Instantly share code, notes, and snippets.

@seltzered
Forked from nicolevanderhoeven/downloadfirebase.py
Last active January 12, 2023 07:07
Show Gist options
  • Save seltzered/f93cd6dbe7db28ac820591e7ee14b820 to your computer and use it in GitHub Desktop.
Save seltzered/f93cd6dbe7db28ac820591e7ee14b820 to your computer and use it in GitHub Desktop.
Download Roam images (firebase) and google doc images (googleusercontent) locally for use with Logseq/Obsidian vault - Fork that has page-based image names, and assumes no-extension images are PNGS rather than not downloading at all
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files.
# To use, replace PATH in the variable vaultDir with your vault's root directory.
# This automatically puts filenames in ../assets (Logseq style) - change the newFilePath variable if you want to change this
import re
import glob
import os
import requests
import calendar
import time
dryRun = False
#SET YOUR VAULT DIRECTORY HERE - whatever is a directory full of markdown files
vaultDir = '/path/to/my/logseq/pages'
# When generating image file with filename,
# remove special characters.
# (e.g. '[' and ']' for titles that also have [[blockrefs]])
filenamePrefixStripCharPattern = r'\[|\]'
# add a migration descriptor to the filename,
# just in case you need to ever do post-processing
# on only these migrated images.
filenameMigrationDescriptor = 'fromRoam'
#default assumed image extension if one isn't found
defaultAssumedExt = 'ASSUMEDPNG.png'
firebaseShort = 'none'
fullRead = 'none'
fileFullPath = ''
fullTempFilePath = ''
i = 0
ext = ''
# Walk through all files in all directories within the specified vault directory
for subdir, dirs, files in os.walk(vaultDir):
for file in files:
# Open file in directory
print("filename: " + file)
fileFullPath = os.path.join(subdir,file)
fhand = open(fileFullPath, errors='ignore')
fileImageCount = 0
lineCount = 0
for line in fhand:
# Download the Firebase file and save it in the assets folder
if ('firebasestorage' in line) or ('googleusercontent' in line):
print('\nfirebasestorage or googleusercontent asset link found')
try:
if ('firebasestorage' in line):
# If it's a PDF, it will be in the format {{pdf: link}}
if '{{pdf:' in line:
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line)
else:
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line)
firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png
resolvedUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6
print('link: ' + resolvedUrl)
# Download the file locally
#--------------------------------------------------
if not dryRun:
r = requests.get(resolvedUrl)
try:
# # Get file extension of file. Ex: .png; .jpeg
reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg
ext = '.' + reg.group(2) # .jpeg
except:
ext = defaultAssumedExt
if ('googleusercontent' in line):
# assume some image copied over from a google doc
link = re.search(r'https://(.*)googleusercontent(.*)\)', line)
resolvedUrl = link.group(0)[:-1] # https://lh5.googleusercontent.com/YHmVyEd9j6bLx32A9YF8biKxdSfZk5HH-g8VZ4elg_szfojPTq3VE8FtcHZ1HN3_p8uhrcPKK6d4ynbc12BC2dDXuGsOtAPc7wxbF3auTcMBUEU5NNRNpNGuEyZZIKDVfyArf3IE
if not dryRun:
r = requests.get(resolvedUrl)
ext = defaultAssumedExt
print('\nextension: ' + ext)
# Create assets folder if it doesn't exist
if (not dryRun) and (not os.path.exists(vaultDir + '../assets')):
os.makedirs(vaultDir + '../assets')
# # Create new local file out of downloaded firebase file
filenamePrefix = os.path.splitext(file)[0]
filenamePrefix = re.sub(filenamePrefixStripCharPattern, '', filenamePrefix)
newFilePath = '../assets/' + filenamePrefix + '-' + filenameMigrationDescriptor + '-' + str(fileImageCount) + ext
print("\ncreating image:" + newFilePath)
print(resolvedUrl + '>>>' + newFilePath)
if not dryRun:
with open(vaultDir + '/' + newFilePath,'wb') as output_file:
output_file.write(r.content)
fileImageCount = fileImageCount + 1
except AttributeError: # This is to prevent the AttributeError exception when no matches are returned
continue
if not dryRun:
# Save Markdown file with new local file link as a temp file
# If there is already a temp version of a file, open that.
fullTempFilePath = vaultDir + '/temp_' + file
if os.path.exists(fullTempFilePath):
fullRead = open(fullTempFilePath, errors='ignore')
else:
fullRead = open(fileFullPath, errors='ignore')
data = fullRead.read()
data = data.replace(resolvedUrl,newFilePath)
with open(fullTempFilePath,'wt') as temp_file:
temp_file.write(data)
i = i + 1
if os.path.exists(fullTempFilePath):
path = os.replace(fullTempFilePath,fileFullPath)
fullRead.close()
# Close file
fhand.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment