Skip to content

Instantly share code, notes, and snippets.

@jasonm23
Created June 20, 2025 08:45
Show Gist options
  • Save jasonm23/8ac4155aa718890f1756a380edae2e53 to your computer and use it in GitHub Desktop.
Save jasonm23/8ac4155aa718890f1756a380edae2e53 to your computer and use it in GitHub Desktop.
Get bookmarks directly from firefox places.sqlite WIP
import sqlite3
import os
import json
import argparse
import datetime
import sys # For sys.platform
import shutil # For copying database, if enabled
import configparser # For parsing profiles.ini
def get_firefox_profile_path(profile_name=None, newest=False):
"""
Determines the full path to the Firefox profile directory based on OS.
If profile_name is None, it attempts to find the default profile or
the newest profile by places.sqlite modification time if 'newest' is True.
"""
base_path = ""
if sys.platform.startswith('linux'):
base_path = os.path.expanduser("~/.mozilla/firefox/")
elif sys.platform == 'darwin': # macOS
base_path = os.path.expanduser("~/Library/Application Support/Firefox/Profiles/")
elif sys.platform == 'win32': # Windows
base_path = os.path.join(os.environ.get('APPDATA', ''), "Mozilla", "Firefox", "Profiles")
else:
raise OSError(f"Unsupported operating system: {sys.platform}")
if not os.path.isdir(base_path):
raise FileNotFoundError(f"Firefox profiles base directory not found: '{base_path}'")
profiles_ini_path = os.path.join(base_path, "profiles.ini")
available_profiles = [] # Store { 'name': '', 'path': '', 'is_default': False, 'last_modified': 0 }
if os.path.exists(profiles_ini_path):
config = configparser.ConfigParser()
config.read(profiles_ini_path)
for section in config.sections():
if section.startswith("Profile"):
is_relative = config.getboolean(section, 'IsRelative', fallback=True)
path_segment = config.get(section, 'Path')
profile_abs_path = os.path.join(base_path, path_segment) if is_relative else path_segment
profile_data = {
'name': config.get(section, 'Name', fallback=os.path.basename(profile_abs_path)),
'path': profile_abs_path,
'is_default': config.getboolean(section, 'Default', fallback=False),
'last_modified': 0 # Will populate this with places.sqlite mtime
}
places_sqlite_path = os.path.join(profile_data['path'], "places.sqlite")
if os.path.exists(places_sqlite_path):
profile_data['last_modified'] = os.path.getmtime(places_sqlite_path)
available_profiles.append(profile_data)
# else: print(f"Warning: places.sqlite not found for profile '{profile_data['name']}' at '{places_sqlite_path}'", file=sys.stderr)
else:
# Fallback for old default profile structure or if profiles.ini is missing
# This will only find profiles that are direct subdirectories and contain places.sqlite
for d in os.listdir(base_path):
potential_profile_path = os.path.join(base_path, d)
if os.path.isdir(potential_profile_path):
places_sqlite_path = os.path.join(potential_profile_path, "places.sqlite")
if os.path.exists(places_sqlite_path):
available_profiles.append({
'name': d,
'path': potential_profile_path,
'is_default': False, # Can't determine from here
'last_modified': os.path.getmtime(places_sqlite_path)
})
if profile_name:
for p in available_profiles:
if p['name'] == profile_name or os.path.basename(p['path']) == profile_name:
print(f"Using specified profile: '{p['name']}' at '{p['path']}'")
return p['path']
raise FileNotFoundError(f"Firefox profile '{profile_name}' not found.")
if newest:
if not available_profiles:
raise FileNotFoundError("No Firefox profiles with places.sqlite found to determine newest.")
# Sort by last_modified (descending) to get the newest
available_profiles.sort(key=lambda x: x['last_modified'], reverse=True)
print(f"Using newest profile: '{available_profiles[0]['name']}' (last modified: {datetime.datetime.fromtimestamp(available_profiles[0]['last_modified']).isoformat()}) at '{available_profiles[0]['path']}'")
return available_profiles[0]['path']
# Fallback to default profile from profiles.ini if no specific profile requested
for p in available_profiles:
if p['is_default']:
print(f"Using default profile: '{p['name']}' at '{p['path']}'")
return p['path']
# If no specific profile, no newest flag, and no default found, raise error
if available_profiles:
print("No specific profile requested, no --newest-profile flag, and no default profile found in profiles.ini.", file=sys.stderr)
print("Available profiles:", file=sys.stderr)
for p in available_profiles:
print(f" - {p['name']} (Path: {p['path']}, Last Modified: {datetime.datetime.fromtimestamp(p['last_modified']).isoformat()})", file=sys.stderr)
raise FileNotFoundError("Please specify a profile using --profile or --newest-profile.")
else:
raise FileNotFoundError("No Firefox profiles found containing places.sqlite.")
def convert_firefox_timestamp(microseconds):
"""
Converts a Firefox microsecond timestamp to an ISO 8601 string.
Firefox timestamps are Unix epoch in microseconds.
"""
if microseconds is None:
return None
# Convert microseconds to seconds
seconds = microseconds / 1_000_000
try:
# Create datetime object from Unix timestamp (seconds)
dt_object = datetime.datetime.fromtimestamp(seconds, tz=datetime.timezone.utc)
return dt_object.isoformat()
except (ValueError, OSError):
return None
def build_tree(flat_data, initial_root_ids):
"""
Builds a hierarchical tree structure from flat bookmark data based on parent IDs.
Args:
flat_data (list): A list of dictionaries, each representing a bookmark/folder
with all SQL extracted fields.
initial_root_ids (list): A list of IDs that should be considered as the top-level
nodes in the final JSON output.
Returns:
list: A list of dictionaries representing the root level of the hierarchy,
with 'children' lists for nested items, in the specified JSON format.
"""
nodes_map = {}
for item in flat_data:
node = {
"type": item['ItemType'].lower(),
"name": item['ItemName'],
"id": str(item['id']), # ID as string
"add_date": convert_firefox_timestamp(item['dateAdded']),
"last_modified": convert_firefox_timestamp(item['lastModified']),
"position": item['position'] # Store position
}
if item['ItemType'].lower() == 'bookmark':
node["href"] = item['Href']
if item['Tags']:
node["tags"] = [tag.strip() for tag in item['Tags'].split(',')]
# IconURI and IconData related fields removed
else: # For folders, remove href and icon related fields if present
node.pop("href", None)
# icon_uri and icon fields will not be added, so no need to pop
nodes_map[item['id']] = node
# Populate children lists
for item in flat_data:
current_node = nodes_map[item['id']]
if item['parent'] is not None and item['parent'] in nodes_map:
parent_node = nodes_map[item['parent']]
if "children" not in parent_node:
parent_node["children"] = []
parent_node["children"].append(current_node)
# Construct the final tree from the specified initial_root_ids
final_tree = []
for root_id in initial_root_ids:
if root_id in nodes_map:
root_node = nodes_map[root_id]
# Ensure children key exists even if empty
if "children" not in root_node:
root_node["children"] = []
final_tree.append(root_node)
# Sort children for consistent output (optional but good practice)
for node_id in nodes_map:
if 'children' in nodes_map[node_id] and nodes_map[node_id]['children']:
nodes_map[node_id]['children'].sort(key=lambda x: (
x.get('position', float('inf')), # Primary sort by position
0 if x['type'] == 'folder' else 1, # Fallback: Folders before bookmarks
x['name'].lower() # Fallback: Then alphabetical by name
))
return final_tree
def extract_firefox_bookmarks_and_tags(db_path, start_folder_name=None, all_bookmarks=False):
"""
Extracts bookmarks and subfolders from a specific parent folder or all root folders
in a Firefox places.sqlite database, including all specified fields for JSON output.
Args:
db_path (str): The full path to the Firefox places.sqlite database file.
start_folder_name (str, optional): The name of a specific top-level folder
to start the extraction from.
all_bookmarks (bool): If True, extract all bookmarks from standard Firefox
root folders (Bookmarks Menu, Bookmarks Toolbar, Unsorted).
Returns:
tuple: A tuple containing (list of flat data dictionaries, list of root IDs),
or (empty list, empty list) on error or no results.
"""
conn = None
flat_results = []
initial_root_ids = []
if not os.path.exists(db_path):
print(f"Error: Database file not found at '{db_path}'", file=sys.stderr)
return flat_results, initial_root_ids
try:
conn = sqlite3.connect(db_path, timeout=10) # Use a timeout for brief locks
cursor = conn.cursor()
# Pre-check for database lock / basic operational status
try:
cursor.execute("SELECT key FROM moz_meta LIMIT 1;") # A very simple, fast query on a core table
cursor.fetchone()
except sqlite3.OperationalError as oe_precheck:
error_message_precheck = str(oe_precheck).lower()
if "database is locked" in error_message_precheck:
print(f"SQLite Pre-check Error: Database is locked. (Details: {oe_precheck})", file=sys.stderr)
print("Please ensure Firefox is closed or use the --copy-db option.", file=sys.stderr)
else:
print(f"SQLite Pre-check Error: Failed initial database access. (Details: {oe_precheck})", file=sys.stderr)
print("This indicates a problem with accessing the database, not necessarily a simple lock.", file=sys.stderr)
return flat_results, initial_root_ids # Return empty
# Determine the initial IDs for the recursive query
if all_bookmarks:
# Firefox's standard root bookmark folders
# ID 2: Bookmarks Menu, ID 3: Bookmarks Toolbar, ID 5: Unsorted Bookmarks
initial_root_ids = [2, 3, 5]
# Ensure these IDs actually exist as folders in the database
cursor.execute("SELECT id FROM moz_bookmarks WHERE id IN (2, 3, 5) AND type = 2;")
existing_roots = [row[0] for row in cursor.fetchall()]
if not existing_roots:
print("Warning: Standard Firefox root bookmark folders (IDs 2, 3, 5) not found or not folders. Cannot extract all bookmarks.", file=sys.stderr)
return flat_results, initial_root_ids # Return empty results if roots are missing
initial_anchor_condition = f"mb.id IN ({','.join(map(str, existing_roots))})"
initial_root_ids = existing_roots # Update to only include actually existing roots
print(f"Extracting all bookmarks from Firefox root folders (IDs: {', '.join(map(str, initial_root_ids))})")
elif start_folder_name:
cursor.execute(
"SELECT id FROM moz_bookmarks WHERE title = ? AND type = 2;",
(start_folder_name,)
)
folder_id_result = cursor.fetchone()
if not folder_id_result:
print(f"Error: Folder '{start_folder_name}' not found in bookmarks.", file=sys.stderr)
return flat_results, initial_root_ids
initial_root_ids = [folder_id_result[0]]
initial_anchor_condition = f"mb.id = {initial_root_ids[0]}"
print(f"Extracting bookmarks from specific folder: '{start_folder_name}' (ID: {initial_root_ids[0]})")
else:
print("Error: No specific folder name or --all-bookmarks flag provided.", file=sys.stderr)
return flat_results, initial_root_ids
# 2. The main SQL query with all desired fields
sql_query = f"""
WITH RECURSIVE BookmarkHierarchy AS (
-- Anchor member: Start with the specified folder(s)
SELECT
mb.id, mb.type, mb.parent, mb.title,
mp.url, -- Correctly sourced from moz_places
mb.position, -- Add position
mb.fk,
mb.dateAdded, mb.lastModified,
1 AS level,
CAST(mb.id AS TEXT) AS path_ids,
CAST(mb.title AS TEXT) AS path_titles
FROM moz_bookmarks AS mb
LEFT JOIN moz_places AS mp ON mb.fk = mp.id -- Join to get URL
WHERE {initial_anchor_condition}
UNION ALL
-- Recursive member: Find children (bookmarks and subfolders) of the current level's items
SELECT
child_mb.id, child_mb.type, child_mb.parent, child_mb.title,
child_mp.url, -- Correctly sourced from moz_places
child_mb.position, -- Add position
child_mb.fk,
child_mb.dateAdded, child_mb.lastModified,
parent_item.level + 1 AS level,
parent_item.path_ids || ' -> ' || CAST(child_mb.id AS TEXT) AS path_ids,
parent_item.path_titles || ' -> ' || child_mb.title AS path_titles
FROM moz_bookmarks AS child_mb
LEFT JOIN moz_places AS child_mp ON child_mb.fk = child_mp.id -- Join to get URL
JOIN BookmarkHierarchy AS parent_item ON child_mb.parent = parent_item.id
WHERE child_mb.type IN (1, 2) -- Include bookmarks (1) and folders (2)
)
SELECT
bh.id,
bh.title AS ItemName,
CASE bh.type WHEN 1 THEN 'bookmark' WHEN 2 THEN 'folder' ELSE 'unknown' END AS ItemType,
bh.url AS Href,
bh.parent,
bh.dateAdded,
bh.lastModified,
bh.position, -- Select position
item_tags.concated_tags AS Tags,
-- IconURI and IconData removed
bh.level,
bh.path_titles AS FullPath
FROM BookmarkHierarchy AS bh
-- Subquery to handle distinct tag concatenation for each item
LEFT JOIN (
SELECT
item_id, -- This is the grouping key for the outer GROUP_CONCAT
GROUP_CONCAT(keyword, ', ') AS concated_tags -- Apply GROUP_CONCAT to already distinct keywords
FROM (
SELECT DISTINCT -- Ensure keyword is distinct per item_id
sub_mia.item_id,
sub_mk.keyword
FROM moz_items_annos AS sub_mia
JOIN moz_anno_attributes AS sub_maa ON sub_mia.anno_attribute_id = sub_maa.id
JOIN moz_keywords AS sub_mk ON sub_maa.name = 'bookmark-tags' AND CAST(sub_mia.content AS INTEGER) = sub_mk.id
) AS distinct_item_keywords
GROUP BY item_id -- Group by item_id to concatenate keywords for each item
) AS item_tags ON bh.id = item_tags.item_id
WHERE
bh.type IN (1, 2) -- Ensure we only process bookmarks and folders from the hierarchy
GROUP BY bh.id, bh.title, bh.type, bh.url, bh.parent, bh.dateAdded, bh.lastModified,
bh.position, bh.level, bh.path_titles -- Add position to GROUP BY
ORDER BY FullPath, ItemType DESC, ItemName;
"""
cursor.execute(sql_query)
rows = cursor.fetchall()
column_names = [description[0] for description in cursor.description]
for row in rows:
item_data = dict(zip(column_names, row))
flat_results.append(item_data)
except sqlite3.OperationalError as e:
error_message = str(e).lower()
if "database is locked" in error_message:
print(f"SQLite Operational Error: Database became locked during main query. (Details: {e})", file=sys.stderr)
print("Please ensure Firefox is closed or use the --copy-db option.", file=sys.stderr)
else:
print(f"SQLite Query Error during bookmark extraction: {e}", file=sys.stderr)
print("This indicates an issue with the SQL query itself.", file=sys.stderr)
except sqlite3.Error as e:
print(f"Database error occurred: {e}", file=sys.stderr)
except Exception as e:
print(f"An unexpected error occurred: {e}", file=sys.stderr)
finally:
if conn:
conn.close()
return flat_results, initial_root_ids
def list_folders_tree_format(db_path):
"""
Connects to the database and prints the folder hierarchy in a tree-like format.
"""
conn = None
try:
conn = sqlite3.connect(db_path, timeout=10)
cursor = conn.cursor()
# Pre-check for database lock / basic operational status
try:
cursor.execute("SELECT key FROM moz_meta LIMIT 1;") # A very simple, fast query
cursor.fetchone()
except sqlite3.OperationalError as oe_precheck:
error_message_precheck = str(oe_precheck).lower()
if "database is locked" in error_message_precheck:
print(f"SQLite Pre-check Error: Database is locked. (Details: {oe_precheck})", file=sys.stderr)
else:
print(f"SQLite Pre-check Error: Failed initial database access. (Details: {oe_precheck})", file=sys.stderr)
print("Please ensure Firefox is closed or use the --copy-db option.", file=sys.stderr)
sys.exit(1)
cursor.execute("SELECT id, parent, title FROM moz_bookmarks WHERE type = 2;")
folders = cursor.fetchall()
folder_map = {f[0]: {'title': f[2] if f[2] else ' (Untitled Folder) ', 'children': [], 'parent': f[1]} for f in folders}
# Firefox's fixed root folders (where user-created folders live)
# These are usually present even if empty
FIREFOX_ROOT_IDS = {
2: 'Bookmarks Menu',
3: 'Bookmarks Toolbar',
5: 'Unsorted Bookmarks'
}
# Populate children
for folder_id, data in folder_map.items():
parent_id = data['parent']
if parent_id in folder_map:
folder_map[parent_id]['children'].append(folder_id)
elif parent_id in FIREFOX_ROOT_IDS:
# Add these as top-level roots if they have children
if parent_id not in folder_map: # Only add if not already in map from direct query
folder_map[parent_id] = {
'title': FIREFOX_ROOT_IDS[parent_id],
'children': [],
'parent': None # Mark as a true root for this view
}
folder_map[parent_id]['children'].append(folder_id)
# Identify true top-level folders for printing (those whose parents are not in our map,
# or are the special Firefox roots)
root_nodes_for_display = []
for folder_id, data in folder_map.items():
if data['parent'] not in folder_map and (data['parent'] is None or data['parent'] in [0, 1] or folder_id in FIREFOX_ROOT_IDS): # 0/1 are internal DB roots
root_nodes_for_display.append(folder_id)
# Ensure consistent order for display
root_nodes_for_display.sort(key=lambda x: folder_map[x]['title'].lower())
def print_tree(node_id, indent=""):
node = folder_map[node_id]
print(f"{indent}├── {node['title']} (ID: {node_id})")
# Sort children for consistent output
node['children'].sort(key=lambda x: folder_map[x]['title'].lower())
for i, child_id in enumerate(node['children']):
if i == len(node['children']) - 1:
# Last child, use '└──' for the child, and a space for its children
print_tree(child_id, indent + " ")
else:
# Not last child, use '├──' for the child, and '| ' for its children
print_tree(child_id, indent + "│ ")
if not root_nodes_for_display:
print("No bookmark folders found in this profile.", file=sys.stderr)
return
print("\n--- Firefox Bookmark Folder Hierarchy ---")
for i, root_id in enumerate(root_nodes_for_display):
if i == len(root_nodes_for_display) - 1:
print_tree(root_id, "")
else:
print_tree(root_id, "")
print("---------------------------------------\n")
except sqlite3.OperationalError as e:
error_message = str(e).lower()
if "database is locked" in error_message:
print(f"SQLite Operational Error: Database became locked during folder listing. (Details: {e})", file=sys.stderr)
else:
print(f"SQLite Query Error during folder listing: {e}", file=sys.stderr)
print("Please ensure Firefox is closed or use the --copy-db option.", file=sys.stderr)
sys.exit(1)
except sqlite3.Error as e:
print(f"Database error occurred: {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred: {e}", file=sys.stderr)
sys.exit(1)
finally:
if conn:
conn.close()
# --- Main execution block ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Extracts hierarchical bookmark data with tags, dates, and icons from Firefox places.sqlite. "
"Also provides options to list folder hierarchy."
)
parser.add_argument(
"--profile",
"-p",
type=str,
help="Firefox profile name (e.g., 'fdsz0a65.default-release'). Overrides --newest-profile."
)
parser.add_argument(
"--newest-profile",
action="store_true",
help="Automatically select the newest Firefox profile based on places.sqlite modification time."
)
parser.add_argument(
"--folder-name",
"-f",
type=str,
help="The name of a specific top-level folder to extract bookmarks from (e.g., 'ocodo-links')."
)
parser.add_argument(
"--all-bookmarks",
"-a",
action="store_true",
help="Extract all bookmarks from Firefox's standard root folders (Bookmarks Menu, Bookmarks Toolbar, Unsorted Bookmarks). Overrides --folder_name."
)
parser.add_argument(
"--output-json",
"-o",
type=str,
default="bookmarks.json",
help="Path to the output JSON file. (Default: 'bookmarks.json')"
)
parser.add_argument(
"--copy-db",
action="store_true",
help="Create a temporary copy of places.sqlite before processing to avoid locked database issues. The copy will be deleted afterwards."
)
parser.add_argument(
"--list-folders",
"-l",
action="store_true",
help="List all bookmark folders in a tree-like hierarchy and exit. This will ignore all other extraction-related arguments."
)
args = parser.parse_args()
# --- Profile and DB Path Resolution (common to both modes) ---
profile_name_from_env = os.environ.get("FIREFOX_PROFILE_NAME")
selected_profile_name = args.profile
use_newest_profile = args.newest_profile
if not selected_profile_name and not use_newest_profile:
if profile_name_from_env:
selected_profile_name = profile_name_from_env
print(f"Using profile from FIREFOX_PROFILE_NAME environment variable: '{selected_profile_name}'")
else:
print("Error: No Firefox profile specified. Please use --profile <name> or --newest-profile, or set FIREFOX_PROFILE_NAME environment variable.", file=sys.stderr)
parser.print_help(sys.stderr)
exit(1)
try:
profile_dir = get_firefox_profile_path(selected_profile_name, use_newest_profile)
db_path_original = os.path.join(profile_dir, "places.sqlite")
except FileNotFoundError as e:
print(f"Error: {e}", file=sys.stderr)
print("Please ensure the profile is correct and Firefox profile directory exists.", file=sys.stderr)
exit(1)
except OSError as e:
print(f"Error determining Firefox profile path: {e}", file=sys.stderr)
exit(1)
final_db_path_to_use = db_path_original
temp_db_copy_path = None
if args.copy_db:
if os.path.exists(db_path_original):
temp_db_copy_path = db_path_original + ".temp_copy"
try:
print(f"Creating a temporary copy of places.sqlite at: {temp_db_copy_path}")
shutil.copyfile(db_path_original, temp_db_copy_path)
final_db_path_to_use = temp_db_copy_path
except Exception as e:
print(f"Warning: Could not create database copy: {e}. Attempting to use original DB path.", file=sys.stderr)
else:
print(f"Warning: Original database '{db_path_original}' not found for copying. Attempting to use path directly.", file=sys.stderr)
# --- End Profile and DB Path Resolution ---
# --- Mode Selection ---
if args.list_folders:
print(f"Listing folders from: {final_db_path_to_use}")
list_folders_tree_format(final_db_path_to_use)
else:
# --- Normal Bookmark Extraction Logic ---
print(f"Attempting to extract from: {final_db_path_to_use}")
# Determine extraction mode (specific folder vs. all bookmarks)
start_folder = args.folder_name
extract_all = args.all_bookmarks
if extract_all:
if start_folder:
print("Warning: --all-bookmarks overrides --folder_name. Extracting all bookmarks.", file=sys.stderr)
start_folder = None # Ensure start_folder is None if all_bookmarks is True
elif not start_folder:
print("Error: No specific folder name (--folder_name) or --all-bookmarks flag provided.", file=sys.stderr)
print("You must specify either a folder to extract or use --all-bookmarks to get all Firefox bookmarks.", file=sys.stderr)
parser.print_help(sys.stderr)
exit(1)
flat_extracted_data, initial_root_ids = extract_firefox_bookmarks_and_tags(
final_db_path_to_use,
start_folder_name=start_folder,
all_bookmarks=extract_all
)
if flat_extracted_data:
if not initial_root_ids:
print("No valid root IDs determined for tree building. Exiting.", file=sys.stderr)
exit(1)
hierarchical_data = build_tree(flat_extracted_data, initial_root_ids)
if not hierarchical_data:
print("No data in the specified folder(s) or error building tree.", file=sys.stderr)
else:
with open(args.output_json, 'w', encoding='utf-8') as f:
json.dump(hierarchical_data, f, indent=2, ensure_ascii=False)
print(f"\nExtracted data successfully saved to '{args.output_json}' in the specified JSON format.")
else:
print("No data extracted or an error occurred during extraction.", file=sys.stderr)
# Clean up temporary database copy if it was created (always attempt, regardless of mode)
if temp_db_copy_path and os.path.exists(temp_db_copy_path):
try:
os.remove(temp_db_copy_path)
print(f"Cleaned up temporary database copy: {temp_db_copy_path}")
except Exception as e:
print(f"Warning: Could not remove temporary database copy: {e}", file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment