-
-
Save nicolevanderhoeven/b25c97a8a68ea97e7bdf5ee674fdaec4 to your computer and use it in GitHub Desktop.
# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files. | |
# To use, replace PATH in the variable vaultDir with your vault's root directory. | |
# This automatically puts filenames in /assets - change the newFilePath variable if you want to change this | |
import re | |
import glob | |
import os | |
import requests | |
import calendar | |
import time | |
vaultDir = '/PATH' | |
firebaseShort = 'none' | |
fullRead = 'none' | |
fileFullPath = '' | |
fullTempFilePath = '' | |
i = 0 | |
ext = '' | |
# Walk through all files in all directories within the specified vault directory | |
for subdir, dirs, files in os.walk(vaultDir): | |
for file in files: | |
# Open file in directory | |
fileFullPath = os.path.join(subdir,file) | |
fhand = open(fileFullPath, errors='ignore') | |
for line in fhand: | |
# Download the Firebase file and save it in the assets folder | |
if 'firebasestorage' in line: | |
try: | |
# If it's a PDF, it will be in the format {{pdf: link}} | |
if '{{pdf:' in line: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line) | |
else: | |
link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line) | |
firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png | |
firebaseUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6 | |
# Download the file locally | |
r = requests.get(firebaseUrl) | |
timestamp = calendar.timegm(time.gmtime()) | |
# Get file extension of file. Ex: .png; .jpeg | |
reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg | |
ext = '.' + reg.group(2) # .jpeg | |
# Create assets folder if it doesn't exist | |
if not os.path.exists(vaultDir + '/assets'): | |
os.makedirs(vaultDir + '/assets') | |
# Create new local file out of downloaded firebase file | |
newFilePath = 'assets/' + str(timestamp) + '_' + str(i) + ext | |
# print(firebaseUrl + '>>>' + newFilePath) | |
with open(vaultDir + '/' + newFilePath,'wb') as output_file: | |
output_file.write(r.content) | |
except AttributeError: # This is to prevent the AttributeError exception when no matches are returned | |
continue | |
# Save Markdown file with new local file link as a temp file | |
# If there is already a temp version of a file, open that. | |
fullTempFilePath = vaultDir + '/temp_' + file | |
if os.path.exists(fullTempFilePath): | |
fullRead = open(fullTempFilePath, errors='ignore') | |
else: | |
fullRead = open(fileFullPath, errors='ignore') | |
data = fullRead.read() | |
data = data.replace(firebaseUrl,newFilePath) | |
with open(fullTempFilePath,'wt') as temp_file: | |
temp_file.write(data) | |
i = i + 1 | |
if os.path.exists(fullTempFilePath): | |
path = os.replace(fullTempFilePath,fileFullPath) | |
fullRead.close() | |
# Close file | |
fhand.close() |
Hey Josh! Hmmm, could you try moving the script outside of the Obsidian vault? As long as you're setting PATH
to the absolute filepath of your Obsidian vault (ie /Users/josh/Documents/Obsidian
) in the script, you shouldn't need to run the script from within the vault. Also, are there any errors that are printed to the terminal?
Hey Nicole, thanks for the help - it seems it was the path format I used that wasn't working. The script is downloading the firebase assets now!
Thanks for the help :)
Just reporting that the script worked perfectly for all my PDF's, including in subfolders. However I've noticed it leaves out .pptx
& .txt
files (and probably others) that I uploaded to Roam. I don't have many of them so I'm not bothered personally, but it may be worth knowing about. I've not learnt Python yet but I imagine its probably pretty simple to sort out myself by changing the section if '{{pdf:'
to another extension or format.
Anyway, thanks again for all the help - please feel welcome to close this issue :)
Thank you Nichole, the script worked perfectly!
Glad it worked for you! :) Happy to help.
Hi Nicole
Thanks for providing this script.
I've encountered some issues running the script, it shows
"ModuleNotFoundError: No module named 'requests'
Christies-MBP:python ChristieW$ python3 /Users/ChristieW/Desktop/python/downloadfirebase.py
Traceback (most recent call last):
File "/Users/ChristieW/Desktop/python/downloadfirebase.py", line 8, in
import requests
ModuleNotFoundError: No module named 'requests'"
I'm really new to python, could you please help with this?
Cheers
Hi @londonshiba ! It sounds the issue may be that you have more than one version of Python on your laptop, and the wrong one is being picked up as the default. You could try the suggestions on this page to see if they work!
Hi Nicole, thank you for your great script! It also works very well with Logseq whose default attachment storage path is just /assets.
@kaoskey Oh, that's great! That is a happy accident. Great to know it would work with Logseq too! I'm glad it worked for you. :)
Excellent, thank you! This saved me a lot of time!
Hi Nicole, thank you very much for sharing your work.
One question: I see a lot of files with the extension .enc which I can not open at all not even changing the extension to png, etc.
I found that in firebase the original file name was xxxxx.png.enc (see example below) :
https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2Fturkito%2F9-veaCy5jy.png.enc?alt=media&token=72c259b5-bded-4d1f-b6ef-d2146b8f1755
Any idea? Thank you.
EDIT: I think it might be related to encryption in Roam. Never mind, thanks again.
Thank you so much for this @nicolevanderhoeven ! Getting my firebase images over from Roam to Logseq has been killing me. I am a total noob, but I think I at least have the script running now.
Hoping for a bit of guidance.
When I run, it creates an assets folder and puts one image in there correctly but then stops.
Any ideas? (I might be doing something really obvious wrong.)
ERROR:
PermissionError: [WinError 5] Access is denied: '~/MyDirectory//temp_2021_07_31.md'
It worked for me, thanks!
Thank you so much for this! Made a fork for anyone trying to download roam images and desiring filenames based on page titles (e.g. 'my-page-1.png' vs. 'timestampnum-1.png' ) : https://gist.github.com/seltzered/f93cd6dbe7db28ac820591e7ee14b820
Hi Nicole, thanks for making this and making it avaialble.
I'm trying to download voice memos (audio files) with this script but I fear I'm out of my depth. I'm a very novice programmer. If this is fairly straightforward, I would love any tips. If it's most complicated, I'll get more experience before I attempt further. Thanks for your time.
Hi Nicole,
I was able to get the audio files downloaded but the links in the MD files aren't working for PDFs or audio files. Here's what I'm seeing:
{{pdf: assets/1686594009_3.pdf}
{{audio: assets/1686594007_0.wav}
Again, if this is a simple fix, great. If not, still appreciate your time.
Hi Nicole, thanks for writing this script. I'm just wondering if I'm doing something wrong; the script runs something, but doesn't seem to output anything to assets?
The process I'm following is to download the file, chuck the script into the obsidian vault, use vim to edit the path to (/Documents/Obsidian) or (~/Documents/Obsidian) and then
cd
into the vault to runpython3 downloadfirebase.py
as instructed... but an assets folder is neither created nor output to. I'm wondering if you can help as your script is the only one I've found to transfer my files from firebase to Obsidian?Thanks!