Created
December 20, 2019 12:16
-
-
Save ksamuel/fb3af1345626cb66c474f38d8f037405 to your computer and use it in GitHub Desktop.
A quick and dirty python script to download all firefox bookmarks as stand alone static pages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
usage: download_bookmarks.py [-h] [--concurrency [CONCURRENCY]] [--directory DIRECTORY] bookmarks | |
positional arguments: | |
bookmarks The path to the sqlite db file containing | |
the bookmarks. It's the places.sqlite file | |
in your default profile dir. | |
optional arguments: | |
-h, --help show this help message and exit | |
--concurrency [CONCURRENCY], -c [CONCURRENCY] | |
Max number of bookmarks to process in parallel | |
--directory DIRECTORY, -d DIRECTORY | |
Directory to store the downloaded files. Will be recursively created if it doesn't exist. Otherwise, | |
a temp dir will be used. | |
""" | |
import argparse | |
import asyncio | |
import sqlite3 | |
import sys | |
import time | |
from pathlib import Path | |
from tempfile import TemporaryDirectory | |
if not sys.version_info >= (3, 8): | |
sys.exit("This script requires Python 3.8 or higher") | |
async def download(url, directory): | |
print(f"Downloading: {url} - START") | |
proc = await asyncio.create_subprocess_shell( | |
f"wget -p -k -P {directory} {url}", stderr=asyncio.subprocess.PIPE | |
) | |
_, stderr = await proc.communicate() | |
if stderr: | |
print(f"Downloading: {url} - ERROR") | |
print(f"\n[stderr]\n{stderr.decode()}") | |
print(f"Downloading: {url} - DONE") | |
async def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"bookmarks", | |
help="The path to the sqlite db file containing the bookmarks. It's the places.sqlite file in your default profile dir.", | |
) | |
parser.add_argument( | |
"--concurrency", | |
"-c", | |
type=int, | |
nargs="?", | |
help="Max number of bookmarks to process in parallel", | |
default=40, | |
) | |
parser.add_argument( | |
"--directory", | |
"-d", | |
help="Directory to store the downloaded files. Will be recursively created if it doesn't exist. Otherwise, a temp dir will be used.", | |
) | |
args = parser.parse_args() | |
directory = args.directory or TemporaryDirectory().name | |
directory = Path(directory) | |
try: | |
directory.mkdir(exist_ok=True, parents=True) | |
except OSError as e: | |
sys.exit(f"Error while creating the output directory: {e}") | |
bookmark_file = Path(args.bookmarks) | |
if not bookmark_file.is_file(): | |
sys.exit(f'Cannot find "{bookmark_file}"') | |
if not bookmark_file.name == "places.sqlite": | |
sys.exit( | |
f'The bookmark file should be a "place.sqlite" file, got "{bookmark_file}"' | |
) | |
with sqlite3.connect(bookmark_file) as con: | |
try: | |
urls = { | |
url | |
for (url,) in con.execute( | |
""" | |
SELECT url from moz_places,moz_bookmarks | |
WHERE moz_places.id=moz_bookmarks.fk; | |
""" | |
) | |
} | |
except sqlite3.OperationalError as e: | |
if "locked" in str(e): | |
sys.exit("Close Firefox before running this script") | |
raise | |
total = len(urls) | |
print(f"Ready to process {total} bookmarks") | |
print(f"Saving results in: {directory}") | |
if not args.directory: | |
print("Starting in 5 secs") | |
time.sleep(5) | |
running_tasks = set() | |
for i, url in enumerate(urls, 1): | |
if len(running_tasks) >= args.concurrency: | |
done, running_tasks = await asyncio.wait( | |
running_tasks, return_when=asyncio.FIRST_COMPLETED | |
) | |
running_tasks.add(download(url, directory)) | |
print(f"Progress {i}/{total}") | |
await asyncio.wait(running_tasks) | |
print(f"Results saved in: {directory}") | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment