seltzered · January 12, 2023 07:07
diff --git a/downloadRoamFirebase.py b/downloadRoamFirebase.py
 # Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files.
 # To use, replace PATH in the variable vaultDir with your vault's root directory.
 # This automatically puts filenames in ../assets (Logseq style) - change the newFilePath variable if you want to change this

 import re
 import glob
 import os
 import requests
 import calendar
 import time

 dryRun = False

 #SET YOUR VAULT DIRECTORY HERE - whatever is a directory full of markdown files
 vaultDir = '/path/to/my/logseq/pages'

 # When generating image file with filename,
 # remove special characters.
 # (e.g. '[' and ']' for titles that also have [[blockrefs]])
 filenamePrefixStripCharPattern = r'\[|\]'

 # add a migration descriptor to the filename, 
 # just in case you need to ever do post-processing
 # on only these migrated images.
 filenameMigrationDescriptor = 'fromRoam'

 #default assumed image extension if one isn't found
 defaultAssumedExt = 'ASSUMEDPNG.png'

 firebaseShort = 'none'
 fullRead = 'none'
 fileFullPath = ''
 fullTempFilePath = ''
 i = 0
 ext = ''

 # Walk through all files in all directories within the specified vault directory
 for subdir, dirs, files in os.walk(vaultDir):
    for file in files:
        # Open file in directory
        print("filename: " + file)
        fileFullPath = os.path.join(subdir,file)
        fhand = open(fileFullPath, errors='ignore')
        fileImageCount = 0
        lineCount = 0
        for line in fhand:
            # Download the Firebase file and save it in the assets folder
            if ('firebasestorage' in line) or ('googleusercontent' in line):
                print('\nfirebasestorage or googleusercontent asset link found')
                try:
                    if ('firebasestorage' in line):
                        # If it's a PDF, it will be in the format {{pdf: link}}
                        if '{{pdf:' in line:
                            link = re.search(r'https://firebasestorage(.*)\?alt(.*)\}', line)
                        else:
                            link = re.search(r'https://firebasestorage(.*)\?alt(.*)\)', line)
                        firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png
                        resolvedUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6
                        print('link: ' + resolvedUrl)
                        
                        # Download the file locally
                        #--------------------------------------------------
                        if not dryRun:
                            r = requests.get(resolvedUrl)
                        try:
                            # # Get file extension of file. Ex: .png; .jpeg
                            reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg
                            ext = '.' + reg.group(2) # .jpeg
                        except:
                            ext = defaultAssumedExt
                    if ('googleusercontent' in line):
                        # assume some image copied over from a google doc
                        link = re.search(r'https://(.*)googleusercontent(.*)\)', line)
                        resolvedUrl = link.group(0)[:-1] # https://lh5.googleusercontent.com/YHmVyEd9j6bLx32A9YF8biKxdSfZk5HH-g8VZ4elg_szfojPTq3VE8FtcHZ1HN3_p8uhrcPKK6d4ynbc12BC2dDXuGsOtAPc7wxbF3auTcMBUEU5NNRNpNGuEyZZIKDVfyArf3IE
                        if not dryRun:
                            r = requests.get(resolvedUrl)
                        ext = defaultAssumedExt

                    print('\nextension: ' + ext)

                    # Create assets folder if it doesn't exist
                    if (not dryRun) and (not os.path.exists(vaultDir + '../assets')):
                        os.makedirs(vaultDir + '../assets')
                    # # Create new local file out of downloaded firebase file
                    filenamePrefix = os.path.splitext(file)[0]
                    filenamePrefix = re.sub(filenamePrefixStripCharPattern, '', filenamePrefix)
                    newFilePath = '../assets/' + filenamePrefix + '-' + filenameMigrationDescriptor + '-' + str(fileImageCount) + ext
                    print("\ncreating image:" + newFilePath)
                    print(resolvedUrl + '>>>' + newFilePath)
                    if not dryRun:
                        with open(vaultDir + '/' + newFilePath,'wb') as output_file:
                            output_file.write(r.content)
                    fileImageCount = fileImageCount + 1
                except AttributeError: # This is to prevent the AttributeError exception when no matches are returned
                    continue
                if not dryRun:
                    # Save Markdown file with new local file link as a temp file
                    # If there is already a temp version of a file, open that.
                    fullTempFilePath = vaultDir + '/temp_' + file
                    if os.path.exists(fullTempFilePath):
                        fullRead = open(fullTempFilePath, errors='ignore')
                    else:
                        fullRead = open(fileFullPath, errors='ignore')
                    data = fullRead.read()
                    data = data.replace(resolvedUrl,newFilePath)
                    with open(fullTempFilePath,'wt') as temp_file:
                        temp_file.write(data)
                        i = i + 1
                    if os.path.exists(fullTempFilePath):
                        path = os.replace(fullTempFilePath,fileFullPath)
                    fullRead.close()
        # Close file
        fhand.close()
	# Opens files in directory, outputs firebase URLs to a file, downloads them, and replaces the links with a link to the new files.
	# To use, replace PATH in the variable vaultDir with your vault's root directory.
	# This automatically puts filenames in ../assets (Logseq style) - change the newFilePath variable if you want to change this

	import re
	import glob
	import os
	import requests
	import calendar
	import time

	dryRun = False

	#SET YOUR VAULT DIRECTORY HERE - whatever is a directory full of markdown files
	vaultDir = '/path/to/my/logseq/pages'

	# When generating image file with filename,
	# remove special characters.
	# (e.g. '[' and ']' for titles that also have [[blockrefs]])
	filenamePrefixStripCharPattern = r'\[\|\]'

	# add a migration descriptor to the filename,
	# just in case you need to ever do post-processing
	# on only these migrated images.
	filenameMigrationDescriptor = 'fromRoam'

	#default assumed image extension if one isn't found
	defaultAssumedExt = 'ASSUMEDPNG.png'

	firebaseShort = 'none'
	fullRead = 'none'
	fileFullPath = ''
	fullTempFilePath = ''
	i = 0
	ext = ''

	# Walk through all files in all directories within the specified vault directory
	for subdir, dirs, files in os.walk(vaultDir):
	for file in files:
	# Open file in directory
	print("filename: " + file)
	fileFullPath = os.path.join(subdir,file)
	fhand = open(fileFullPath, errors='ignore')
	fileImageCount = 0
	lineCount = 0
	for line in fhand:
	# Download the Firebase file and save it in the assets folder
	if ('firebasestorage' in line) or ('googleusercontent' in line):
	print('\nfirebasestorage or googleusercontent asset link found')
	try:
	if ('firebasestorage' in line):
	# If it's a PDF, it will be in the format {{pdf: link}}
	if '{{pdf:' in line:
	link = re.search(r'https://firebasestorage(.)\?alt(.)\}', line)
	else:
	link = re.search(r'https://firebasestorage(.)\?alt(.)\)', line)
	firebaseShort = 'https://firebasestorage' + link.group(1) # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png
	resolvedUrl = link.group(0)[:-1] # https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2FDownloadMyBrain%2FLy4Wel-rjk.png?alt=media&token=0fbafc8f-0a47-4720-9e68-88f70803ced6
	print('link: ' + resolvedUrl)

	# Download the file locally
	#--------------------------------------------------
	if not dryRun:
	r = requests.get(resolvedUrl)
	try:
	# # Get file extension of file. Ex: .png; .jpeg
	reg = re.search(r'(.*)\.(.+)', firebaseShort[-5:]) # a.png / .jpeg
	ext = '.' + reg.group(2) # .jpeg
	except:
	ext = defaultAssumedExt
	if ('googleusercontent' in line):
	# assume some image copied over from a google doc
	link = re.search(r'https://(.)googleusercontent(.)\)', line)
	resolvedUrl = link.group(0)[:-1] # https://lh5.googleusercontent.com/YHmVyEd9j6bLx32A9YF8biKxdSfZk5HH-g8VZ4elg_szfojPTq3VE8FtcHZ1HN3_p8uhrcPKK6d4ynbc12BC2dDXuGsOtAPc7wxbF3auTcMBUEU5NNRNpNGuEyZZIKDVfyArf3IE
	if not dryRun:
	r = requests.get(resolvedUrl)
	ext = defaultAssumedExt

	print('\nextension: ' + ext)

	# Create assets folder if it doesn't exist
	if (not dryRun) and (not os.path.exists(vaultDir + '../assets')):
	os.makedirs(vaultDir + '../assets')
	# # Create new local file out of downloaded firebase file
	filenamePrefix = os.path.splitext(file)[0]
	filenamePrefix = re.sub(filenamePrefixStripCharPattern, '', filenamePrefix)
	newFilePath = '../assets/' + filenamePrefix + '-' + filenameMigrationDescriptor + '-' + str(fileImageCount) + ext
	print("\ncreating image:" + newFilePath)
	print(resolvedUrl + '>>>' + newFilePath)
	if not dryRun:
	with open(vaultDir + '/' + newFilePath,'wb') as output_file:
	output_file.write(r.content)
	fileImageCount = fileImageCount + 1
	except AttributeError: # This is to prevent the AttributeError exception when no matches are returned
	continue
	if not dryRun:
	# Save Markdown file with new local file link as a temp file
	# If there is already a temp version of a file, open that.
	fullTempFilePath = vaultDir + '/temp_' + file
	if os.path.exists(fullTempFilePath):
	fullRead = open(fullTempFilePath, errors='ignore')
	else:
	fullRead = open(fileFullPath, errors='ignore')
	data = fullRead.read()
	data = data.replace(resolvedUrl,newFilePath)
	with open(fullTempFilePath,'wt') as temp_file:
	temp_file.write(data)
	i = i + 1
	if os.path.exists(fullTempFilePath):
	path = os.replace(fullTempFilePath,fileFullPath)
	fullRead.close()
	# Close file
	fhand.close()
No results found