-
-
Save tmonjalo/33c4402b0d35f1233020bf427b5539fa to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3 | |
""" | |
List all Firefox tabs with title and URL | |
Supported input: json or jsonlz4 recovery files | |
Default output: title (URL) | |
Output format can be specified as argument | |
""" | |
import platform | |
import sys | |
import pathlib | |
import lz4.block | |
import json | |
if platform.system() == 'Windows': | |
path = pathlib.Path(os.environ['APPDATA']).joinpath('Mozilla\\Firefox\\Profiles') | |
elif platform.system() == 'Darwin': | |
path = pathlib.Path.home().joinpath('Library/Application Support/Firefox/Profiles') | |
else: | |
path = pathlib.Path.home().joinpath('.mozilla/firefox') | |
files = path.glob('*default*/sessionstore-backups/recovery.js*') | |
try: | |
template = sys.argv[1] | |
except IndexError: | |
template = '%s (%s)' | |
for f in files: | |
b = f.read_bytes() | |
if b[:8] == b'mozLz40\0': | |
b = lz4.block.decompress(b[8:]) | |
j = json.loads(b) | |
for w in j['windows']: | |
for t in w['tabs']: | |
i = t['index'] - 1 | |
if len(t['entries']) > i: | |
print(template % ( | |
t['entries'][i]['title'], | |
t['entries'][i]['url'] | |
)) |
The location for me was actually ~/snap/firefox/common/.mozilla/firefox/
. You can find your profile location by the following the steps here: https://support.mozilla.org/en-US/kb/profiles-where-firefox-stores-user-data
I was looking for a way to list my open Firefox tabs and found this after trying other solutions which were outdated. I use the Firefox flatpak with single window so I asked AI to refactor @RanTalbott's script and this seems to do what I want:
#! /usr/bin/env python3
import argparse
import pathlib
import lz4.block
import json
from urllib.parse import urlparse
def parse_arguments():
parser = argparse.ArgumentParser(description="Extract open tab titles and URLs from Firefox sessionstore backups.")
parser.add_argument('--profile', type=str, default='default', help="The Firefox profile name (default: 'default').")
parser.add_argument('--mozilla-path', type=str, default='~/.var/app/org.mozilla.firefox/.mozilla/firefox', help="The base path to the Firefox profiles directory (default: '~/.var/app/org.mozilla.firefox/.mozilla/firefox').")
return parser.parse_args()
def get_session_files(mozilla_path, profile):
path = pathlib.Path(mozilla_path).expanduser()
return path.glob(f'*{profile}*/sessionstore-backups/recovery.*')
def read_and_decompress_file(file_path):
try:
b = file_path.read_bytes()
if b.startswith(b'mozLz40\0'):
return lz4.block.decompress(b[8:])
else:
print(f"Skipping non-LZ4 file: {file_path}")
return None
except (lz4.block.LZ4BlockError, FileNotFoundError) as e:
print(f"Error reading or decompressing file {file_path}: {e}")
return None
def parse_json_data(data, file_path):
try:
return json.loads(data)
except json.JSONDecodeError as e:
print(f"Error parsing JSON data from file {file_path}: {e}")
return None
def extract_titles_from_session(session_data, unique_titles, file_path):
for window in session_data.get('windows', []):
if not isinstance(window, dict):
print(f"Invalid window structure in file {file_path}")
continue
for tab in window.get('tabs', []):
if not isinstance(tab, dict):
print(f"Invalid tab structure in file {file_path}")
continue
index = tab.get('index', 0) - 1
entries = tab.get('entries', [])
if not isinstance(entries, list):
print(f"Invalid entries structure in file {file_path}")
continue
if 0 <= index < len(entries):
entry = entries[index]
if not isinstance(entry, dict):
print(f"Invalid entry structure in file {file_path}")
continue
title = entry.get('title', 'Untitled')
url = entry.get('url', '')
if url:
try:
website = urlparse(url).netloc
if website.startswith("www."):
website = website[4:]
unique_titles.add(f"{website} - {title}")
except ValueError:
print(f"Invalid URL format in file {file_path}: {url}")
def main():
args = parse_arguments()
files = get_session_files(args.mozilla_path, args.profile)
unique_titles = set()
for f in files:
if not f.exists() or not f.is_file():
print(f"Skipping invalid or inaccessible file: {f}")
continue
data = read_and_decompress_file(f)
if data is None:
continue
session_data = parse_json_data(data, f)
if session_data is None:
continue
extract_titles_from_session(session_data, unique_titles, f)
for title in unique_titles:
print(title)
if __name__ == "__main__":
main()
Thanks for this script.
Does someone know if and how it isposisble under Linux to get the workspace (number or name)
from a tab('s title)? Due to the nature of window hierarachy (so far I understand it), tools like
'xdotool' are unable to map a title to a workspace (works partially only) - can this script be of help?
Here's a version that builds on it to add JSON export as well as the history for each tab:
#! /usr/bin/env python3
import argparse
import pathlib
import lz4.block
import json
from urllib.parse import urlparse
import datetime
import sys
def parse_arguments():
parser = argparse.ArgumentParser(description="Extract open tab titles and URLs from Firefox sessionstore backups.")
parser.add_argument('--profile', type=str, default='default', help="The Firefox profile name (default: 'default').")
parser.add_argument('--mozilla-path', type=str, default='~/.var/app/org.mozilla.firefox/.mozilla/firefox', help="The base path to the Firefox profiles directory (default: '~/.var/app/org.mozilla.firefox/.mozilla/firefox').")
parser.add_argument('--output', type=str, default='tabs_export.json', help="Output JSON file path (default: 'tabs_export.json').")
parser.add_argument('--show-schema', action='store_true', help="Display the output JSON schema and exit.")
return parser.parse_args()
def get_session_files(mozilla_path, profile):
path = pathlib.Path(mozilla_path).expanduser()
return path.glob(f'*{profile}*/sessionstore-backups/recovery.*')
def read_and_decompress_file(file_path):
try:
b = file_path.read_bytes()
if b.startswith(b'mozLz40\0'):
return lz4.block.decompress(b[8:])
else:
print(f"Skipping non-LZ4 file: {file_path}")
return None
except (lz4.block.LZ4BlockError, FileNotFoundError) as e:
print(f"Error reading or decompressing file {file_path}: {e}")
return None
def parse_json_data(data, file_path):
try:
return json.loads(data)
except json.JSONDecodeError as e:
print(f"Error parsing JSON data from file {file_path}: {e}")
return None
def format_timestamp(timestamp):
"""Convert Firefox timestamp to readable format if available"""
if not timestamp:
return None
# Firefox uses microseconds since epoch
try:
return datetime.datetime.fromtimestamp(timestamp/1000000).isoformat()
except (ValueError, TypeError, OverflowError):
return str(timestamp) # Return original if conversion fails
def extract_tabs_from_session(session_data, file_path):
tabs_data = []
for window_idx, window in enumerate(session_data.get('windows', [])):
if not isinstance(window, dict):
print(f"Invalid window structure in file {file_path}")
continue
for tab_idx, tab in enumerate(window.get('tabs', [])):
if not isinstance(tab, dict):
print(f"Invalid tab structure in file {file_path}")
continue
tab_data = {
"window_index": window_idx,
"tab_index": tab_idx,
"history": []
}
# Extract last access time if available
if "lastAccessed" in tab:
tab_data["last_accessed"] = tab.get("lastAccessed")
# Get current position in history
current_index = tab.get('index', 0) - 1
tab_data["current_index"] = current_index
# Extract history entries
entries = tab.get('entries', [])
if not isinstance(entries, list):
print(f"Invalid entries structure in file {file_path}")
continue
for entry_idx, entry in enumerate(entries):
if not isinstance(entry, dict):
print(f"Invalid entry structure in file {file_path}")
continue
history_entry = {
"entry_index": entry_idx,
"title": entry.get('title', 'Untitled'),
"url": entry.get('url', ''),
"is_current": entry_idx == current_index
}
# Extract timestamp if available
if "lastAccessed" in entry:
history_entry["accessed_at"] = format_timestamp(entry.get("lastAccessed"))
# Extract scroll position
if "scroll" in entry and isinstance(entry["scroll"], dict):
scroll_data = entry.get("scroll", {})
history_entry["scroll_position"] = {
"x": scroll_data.get("scroll", {}).get("x", 0) if isinstance(scroll_data.get("scroll"), dict) else 0,
"y": scroll_data.get("scroll", {}).get("y", 0) if isinstance(scroll_data.get("scroll"), dict) else 0
}
tab_data["history"].append(history_entry)
# Set current page info
if 0 <= current_index < len(entries):
current_entry = entries[current_index]
tab_data["current_title"] = current_entry.get('title', 'Untitled')
tab_data["current_url"] = current_entry.get('url', '')
# Domain for convenience
try:
domain = urlparse(current_entry.get('url', '')).netloc
if domain.startswith("www."):
domain = domain[4:]
tab_data["domain"] = domain
except ValueError:
tab_data["domain"] = ""
tabs_data.append(tab_data)
return tabs_data
def display_schema():
"""Display the JSON schema of the output."""
schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Firefox Tab Export",
"type": "object",
"properties": {
"export_date": {
"type": "string",
"format": "date-time",
"description": "ISO8601 timestamp of when the export was generated"
},
"total_tabs": {
"type": "integer",
"description": "Total number of tabs exported"
},
"tabs": {
"type": "array",
"items": {
"type": "object",
"properties": {
"window_index": {
"type": "integer",
"description": "Index of the window containing this tab"
},
"tab_index": {
"type": "integer",
"description": "Index of this tab within its window"
},
"last_accessed": {
"type": ["integer", "null"],
"format": "milliseconds",
"description": "Timestamp of when this tab was last accessed"
},
"current_index": {
"type": "integer",
"description": "Current position in the tab's history"
},
"current_title": {
"type": "string",
"description": "Title of the current page"
},
"current_url": {
"type": "string",
"format": "uri",
"description": "Full URL of the current page"
},
"domain": {
"type": "string",
"description": "Domain name of the current page (without www prefix)"
},
"history": {
"type": "array",
"description": "Navigation history of this tab",
"items": {
"type": "object",
"properties": {
"entry_index": {
"type": "integer",
"description": "Position of this entry in the history"
},
"title": {
"type": "string",
"description": "Page title"
},
"url": {
"type": "string",
"format": "uri",
"description": "Full page URL"
},
"is_current": {
"type": "boolean",
"description": "Whether this is the currently visible page in the tab"
},
"accessed_at": {
"type": ["string", "null"],
"format": "date-time",
"description": "ISO8601 timestamp of when this history entry was accessed"
},
"scroll_position": {
"type": "object",
"description": "Scroll position in pixels",
"properties": {
"x": {
"type": "integer",
"description": "Horizontal scroll position in pixels"
},
"y": {
"type": "integer",
"description": "Vertical scroll position in pixels"
}
}
}
}
}
}
}
}
}
}
}
print(json.dumps(schema, indent=2))
print("\nExample output structure:")
example = {
"export_date": "2025-05-17T10:30:45.123456",
"total_tabs": 2,
"tabs": [
{
"window_index": 0,
"tab_index": 0,
"last_accessed": 1681943790848,
"current_index": 1,
"current_title": "Example Page",
"current_url": "https://example.com/page",
"domain": "example.com",
"history": [
{
"entry_index": 0,
"title": "Example Home",
"url": "https://example.com/",
"is_current": False,
"accessed_at": "2025-05-17T10:10:25.123456",
"scroll_position": {
"x": 0,
"y": 0
}
},
{
"entry_index": 1,
"title": "Example Page",
"url": "https://example.com/page",
"is_current": True,
"accessed_at": "2025-05-17T10:15:30.123456",
"scroll_position": {
"x": 0,
"y": 1250
}
}
]
},
{
"window_index": 0,
"tab_index": 1,
"last_accessed": 1681943790848,
"current_index": 0,
"current_title": "Another Page",
"current_url": "https://another-example.org/",
"domain": "another-example.org",
"history": [
{
"entry_index": 0,
"title": "Another Page",
"url": "https://another-example.org/",
"is_current": True,
"accessed_at": "2025-05-17T10:20:15.123456",
"scroll_position": {
"x": 0,
"y": 500
}
}
]
}
]
}
print(json.dumps(example, indent=2))
def main():
args = parse_arguments()
# If show-schema flag is set, display schema and exit
if args.show_schema:
display_schema()
sys.exit(0)
files = get_session_files(args.mozilla_path, args.profile)
all_tabs = []
for f in files:
if not f.exists() or not f.is_file():
print(f"Skipping invalid or inaccessible file: {f}")
continue
data = read_and_decompress_file(f)
if data is None:
continue
session_data = parse_json_data(data, f)
if session_data is None:
continue
tabs = extract_tabs_from_session(session_data, f)
if tabs:
all_tabs.extend(tabs)
# Create final output with metadata
output_data = {
"export_date": datetime.datetime.now().isoformat(),
"total_tabs": len(all_tabs),
"tabs": all_tabs
}
# Write to JSON file
with open(args.output, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=2, ensure_ascii=False)
print(f"Exported {len(all_tabs)} tabs to {args.output}")
if __name__ == "__main__":
main()
Just a guess, but maybe the version of Firefox you're running is returning something other than a string for t['entries'][i]['title'] or t['entries'][i]['url']. Maybe a list or a tuple if there's something odd about the tab? I'd do a try/except, and print out type and size info for them on error.