-
-
Save nicolevanderhoeven/b25c97a8a68ea97e7bdf5ee674fdaec4 to your computer and use it in GitHub Desktop.
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files. | |
# To use, replace PATH in the variable vaultDir with your vault's root directory. | |
# This automatically puts filenames in /assets - change the newFilePath variable if you want to change this | |
import re | |
import glob | |
import os | |
import requests | |
import calendar | |
import time | |
vaultDir = '/PATH' | |
firebaseShort = 'none' | |
fullRead = 'none' | |
fileFullPath = '' | |
fullTempFilePath = '' | |
i = 0 | |
ext = '' | |
# Walk through all files in all directories within the specified vault directory | |
for subdir, dirs, files in os.walk(vaultDir): | |
for file in files: | |
# Open file in directory | |
fileFullPath = os.path.join(subdir,file) | |
fhand = open(fileFullPath, errors='ignore') | |
for line in fhand: | |
# Download the Firebase file and save it in the assets folder | |
if 'firebasestorage' in line: | |
try: | |
# If it's a PDF, it will be in the format {{pdf: link}} | |
if '{{pdf:' in line: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line) | |
else: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line) | |
firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png | |
firebaseUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6 | |
# Download the file locally | |
r = requests.get(firebaseUrl) | |
timestamp = calendar.timegm(time.gmtime()) | |
# Get file extension of file. Ex: .png; .jpeg | |
reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg | |
ext = '.' + reg.group(2) # .jpeg | |
# Create assets folder if it doesn't exist | |
if not os.path.exists(vaultDir + '/assets'): | |
os.makedirs(vaultDir + '/assets') | |
# Create new local file out of downloaded firebase file | |
newFilePath = 'assets/' + str(timestamp) + '_' + str(i) + ext | |
# print(firebaseUrl + '>>>' + newFilePath) | |
with open(vaultDir + '/' + newFilePath,'wb') as output_file: | |
output_file.write(r.content) | |
except AttributeError: # This is to prevent the AttributeError exception when no matches are returned | |
continue | |
# Save Markdown file with new local file link as a temp file | |
# If there is already a temp version of a file, open that. | |
fullTempFilePath = vaultDir + '/temp_' + file | |
if os.path.exists(fullTempFilePath): | |
fullRead = open(fullTempFilePath, errors='ignore') | |
else: | |
fullRead = open(fileFullPath, errors='ignore') | |
data = fullRead.read() | |
data = data.replace(firebaseUrl,newFilePath) | |
with open(fullTempFilePath,'wt') as temp_file: | |
temp_file.write(data) | |
i = i + 1 | |
if os.path.exists(fullTempFilePath): | |
path = os.replace(fullTempFilePath,fileFullPath) | |
fullRead.close() | |
# Close file | |
fhand.close() |
Glad it worked for you! :) Happy to help.
Hi Nicole
Thanks for providing this script.
I've encountered some issues running the script, it shows
"ModuleNotFoundError: No module named 'requests'
Christies-MBP:python ChristieW$ python3 /Users/ChristieW/Desktop/python/downloadfirebase.py
Traceback (most recent call last):
File "/Users/ChristieW/Desktop/python/downloadfirebase.py", line 8, in
import requests
ModuleNotFoundError: No module named 'requests'"
I'm really new to python, could you please help with this?
Cheers
Hi @londonshiba ! It sounds the issue may be that you have more than one version of Python on your laptop, and the wrong one is being picked up as the default. You could try the suggestions on this page to see if they work!
Hi Nicole, thank you for your great script! It also works very well with Logseq whose default attachment storage path is just /assets.
@kaoskey Oh, that's great! That is a happy accident. Great to know it would work with Logseq too! I'm glad it worked for you. :)
Excellent, thank you! This saved me a lot of time!
Hi Nicole, thank you very much for sharing your work.
One question: I see a lot of files with the extension .enc which I can not open at all not even changing the extension to png, etc.
I found that in firebase the original file name was xxxxx.png.enc (see example below) :
https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2Fturkito%2F9-veaCy5jy.png.enc?alt=media&token=72c259b5-bded-4d1f-b6ef-d2146b8f1755
Any idea? Thank you.
EDIT: I think it might be related to encryption in Roam. Never mind, thanks again.
Thank you so much for this @nicolevanderhoeven ! Getting my firebase images over from Roam to Logseq has been killing me. I am a total noob, but I think I at least have the script running now.
Hoping for a bit of guidance.
When I run, it creates an assets folder and puts one image in there correctly but then stops.
Any ideas? (I might be doing something really obvious wrong.)
ERROR:
PermissionError: [WinError 5] Access is denied: '~/MyDirectory//temp_2021_07_31.md'
It worked for me, thanks!
Thank you so much for this! Made a fork for anyone trying to download roam images and desiring filenames based on page titles (e.g. 'my-page-1.png' vs. 'timestampnum-1.png' ) : https://gist.github.com/seltzered/f93cd6dbe7db28ac820591e7ee14b820
Hi Nicole, thanks for making this and making it avaialble.
I'm trying to download voice memos (audio files) with this script but I fear I'm out of my depth. I'm a very novice programmer. If this is fairly straightforward, I would love any tips. If it's most complicated, I'll get more experience before I attempt further. Thanks for your time.
Hi Nicole,
I was able to get the audio files downloaded but the links in the MD files aren't working for PDFs or audio files. Here's what I'm seeing:
{{pdf: assets/1686594009_3.pdf}
{{audio: assets/1686594007_0.wav}
Again, if this is a simple fix, great. If not, still appreciate your time.
Thank you Nichole, the script worked perfectly!