Last active
May 17, 2025 22:51
-
-
Save tmonjalo/33c4402b0d35f1233020bf427b5539fa to your computer and use it in GitHub Desktop.
List all Firefox tabs with title and URL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" | |
List all Firefox tabs with title and URL | |
Supported input: json or jsonlz4 recovery files | |
Default output: title (URL) | |
Output format can be specified as argument | |
""" | |
import platform | |
import sys | |
import pathlib | |
import lz4.block | |
import json | |
if platform.system() == 'Windows': | |
path = pathlib.Path(os.environ['APPDATA']).joinpath('Mozilla\\Firefox\\Profiles') | |
elif platform.system() == 'Darwin': | |
path = pathlib.Path.home().joinpath('Library/Application Support/Firefox/Profiles') | |
else: | |
path = pathlib.Path.home().joinpath('.mozilla/firefox') | |
files = path.glob('*default*/sessionstore-backups/recovery.js*') | |
try: | |
template = sys.argv[1] | |
except IndexError: | |
template = '%s (%s)' | |
for f in files: | |
b = f.read_bytes() | |
if b[:8] == b'mozLz40\0': | |
b = lz4.block.decompress(b[8:]) | |
j = json.loads(b) | |
for w in j['windows']: | |
for t in w['tabs']: | |
i = t['index'] - 1 | |
if len(t['entries']) > i: | |
print(template % ( | |
t['entries'][i]['title'], | |
t['entries'][i]['url'] | |
)) |
Thanks for this script.
Does someone know if and how it isposisble under Linux to get the workspace (number or name)
from a tab('s title)? Due to the nature of window hierarachy (so far I understand it), tools like
'xdotool' are unable to map a title to a workspace (works partially only) - can this script be of help?
Here's a version that builds on it to add JSON export as well as the history for each tab:
#! /usr/bin/env python3
import argparse
import pathlib
import lz4.block
import json
from urllib.parse import urlparse
import datetime
import sys
def parse_arguments():
parser = argparse.ArgumentParser(description="Extract open tab titles and URLs from Firefox sessionstore backups.")
parser.add_argument('--profile', type=str, default='default', help="The Firefox profile name (default: 'default').")
parser.add_argument('--mozilla-path', type=str, default='~/.var/app/org.mozilla.firefox/.mozilla/firefox', help="The base path to the Firefox profiles directory (default: '~/.var/app/org.mozilla.firefox/.mozilla/firefox').")
parser.add_argument('--output', type=str, default='tabs_export.json', help="Output JSON file path (default: 'tabs_export.json').")
parser.add_argument('--show-schema', action='store_true', help="Display the output JSON schema and exit.")
return parser.parse_args()
def get_session_files(mozilla_path, profile):
path = pathlib.Path(mozilla_path).expanduser()
return path.glob(f'*{profile}*/sessionstore-backups/recovery.*')
def read_and_decompress_file(file_path):
try:
b = file_path.read_bytes()
if b.startswith(b'mozLz40\0'):
return lz4.block.decompress(b[8:])
else:
print(f"Skipping non-LZ4 file: {file_path}")
return None
except (lz4.block.LZ4BlockError, FileNotFoundError) as e:
print(f"Error reading or decompressing file {file_path}: {e}")
return None
def parse_json_data(data, file_path):
try:
return json.loads(data)
except json.JSONDecodeError as e:
print(f"Error parsing JSON data from file {file_path}: {e}")
return None
def format_timestamp(timestamp):
"""Convert Firefox timestamp to readable format if available"""
if not timestamp:
return None
# Firefox uses microseconds since epoch
try:
return datetime.datetime.fromtimestamp(timestamp/1000000).isoformat()
except (ValueError, TypeError, OverflowError):
return str(timestamp) # Return original if conversion fails
def extract_tabs_from_session(session_data, file_path):
tabs_data = []
for window_idx, window in enumerate(session_data.get('windows', [])):
if not isinstance(window, dict):
print(f"Invalid window structure in file {file_path}")
continue
for tab_idx, tab in enumerate(window.get('tabs', [])):
if not isinstance(tab, dict):
print(f"Invalid tab structure in file {file_path}")
continue
tab_data = {
"window_index": window_idx,
"tab_index": tab_idx,
"history": []
}
# Extract last access time if available
if "lastAccessed" in tab:
tab_data["last_accessed"] = tab.get("lastAccessed")
# Get current position in history
current_index = tab.get('index', 0) - 1
tab_data["current_index"] = current_index
# Extract history entries
entries = tab.get('entries', [])
if not isinstance(entries, list):
print(f"Invalid entries structure in file {file_path}")
continue
for entry_idx, entry in enumerate(entries):
if not isinstance(entry, dict):
print(f"Invalid entry structure in file {file_path}")
continue
history_entry = {
"entry_index": entry_idx,
"title": entry.get('title', 'Untitled'),
"url": entry.get('url', ''),
"is_current": entry_idx == current_index
}
# Extract timestamp if available
if "lastAccessed" in entry:
history_entry["accessed_at"] = format_timestamp(entry.get("lastAccessed"))
# Extract scroll position
if "scroll" in entry and isinstance(entry["scroll"], dict):
scroll_data = entry.get("scroll", {})
history_entry["scroll_position"] = {
"x": scroll_data.get("scroll", {}).get("x", 0) if isinstance(scroll_data.get("scroll"), dict) else 0,
"y": scroll_data.get("scroll", {}).get("y", 0) if isinstance(scroll_data.get("scroll"), dict) else 0
}
tab_data["history"].append(history_entry)
# Set current page info
if 0 <= current_index < len(entries):
current_entry = entries[current_index]
tab_data["current_title"] = current_entry.get('title', 'Untitled')
tab_data["current_url"] = current_entry.get('url', '')
# Domain for convenience
try:
domain = urlparse(current_entry.get('url', '')).netloc
if domain.startswith("www."):
domain = domain[4:]
tab_data["domain"] = domain
except ValueError:
tab_data["domain"] = ""
tabs_data.append(tab_data)
return tabs_data
def display_schema():
"""Display the JSON schema of the output."""
schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Firefox Tab Export",
"type": "object",
"properties": {
"export_date": {
"type": "string",
"format": "date-time",
"description": "ISO8601 timestamp of when the export was generated"
},
"total_tabs": {
"type": "integer",
"description": "Total number of tabs exported"
},
"tabs": {
"type": "array",
"items": {
"type": "object",
"properties": {
"window_index": {
"type": "integer",
"description": "Index of the window containing this tab"
},
"tab_index": {
"type": "integer",
"description": "Index of this tab within its window"
},
"last_accessed": {
"type": ["integer", "null"],
"format": "milliseconds",
"description": "Timestamp of when this tab was last accessed"
},
"current_index": {
"type": "integer",
"description": "Current position in the tab's history"
},
"current_title": {
"type": "string",
"description": "Title of the current page"
},
"current_url": {
"type": "string",
"format": "uri",
"description": "Full URL of the current page"
},
"domain": {
"type": "string",
"description": "Domain name of the current page (without www prefix)"
},
"history": {
"type": "array",
"description": "Navigation history of this tab",
"items": {
"type": "object",
"properties": {
"entry_index": {
"type": "integer",
"description": "Position of this entry in the history"
},
"title": {
"type": "string",
"description": "Page title"
},
"url": {
"type": "string",
"format": "uri",
"description": "Full page URL"
},
"is_current": {
"type": "boolean",
"description": "Whether this is the currently visible page in the tab"
},
"accessed_at": {
"type": ["string", "null"],
"format": "date-time",
"description": "ISO8601 timestamp of when this history entry was accessed"
},
"scroll_position": {
"type": "object",
"description": "Scroll position in pixels",
"properties": {
"x": {
"type": "integer",
"description": "Horizontal scroll position in pixels"
},
"y": {
"type": "integer",
"description": "Vertical scroll position in pixels"
}
}
}
}
}
}
}
}
}
}
}
print(json.dumps(schema, indent=2))
print("\nExample output structure:")
example = {
"export_date": "2025-05-17T10:30:45.123456",
"total_tabs": 2,
"tabs": [
{
"window_index": 0,
"tab_index": 0,
"last_accessed": 1681943790848,
"current_index": 1,
"current_title": "Example Page",
"current_url": "https://example.com/page",
"domain": "example.com",
"history": [
{
"entry_index": 0,
"title": "Example Home",
"url": "https://example.com/",
"is_current": False,
"accessed_at": "2025-05-17T10:10:25.123456",
"scroll_position": {
"x": 0,
"y": 0
}
},
{
"entry_index": 1,
"title": "Example Page",
"url": "https://example.com/page",
"is_current": True,
"accessed_at": "2025-05-17T10:15:30.123456",
"scroll_position": {
"x": 0,
"y": 1250
}
}
]
},
{
"window_index": 0,
"tab_index": 1,
"last_accessed": 1681943790848,
"current_index": 0,
"current_title": "Another Page",
"current_url": "https://another-example.org/",
"domain": "another-example.org",
"history": [
{
"entry_index": 0,
"title": "Another Page",
"url": "https://another-example.org/",
"is_current": True,
"accessed_at": "2025-05-17T10:20:15.123456",
"scroll_position": {
"x": 0,
"y": 500
}
}
]
}
]
}
print(json.dumps(example, indent=2))
def main():
args = parse_arguments()
# If show-schema flag is set, display schema and exit
if args.show_schema:
display_schema()
sys.exit(0)
files = get_session_files(args.mozilla_path, args.profile)
all_tabs = []
for f in files:
if not f.exists() or not f.is_file():
print(f"Skipping invalid or inaccessible file: {f}")
continue
data = read_and_decompress_file(f)
if data is None:
continue
session_data = parse_json_data(data, f)
if session_data is None:
continue
tabs = extract_tabs_from_session(session_data, f)
if tabs:
all_tabs.extend(tabs)
# Create final output with metadata
output_data = {
"export_date": datetime.datetime.now().isoformat(),
"total_tabs": len(all_tabs),
"tabs": all_tabs
}
# Write to JSON file
with open(args.output, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=2, ensure_ascii=False)
print(f"Exported {len(all_tabs)} tabs to {args.output}")
if __name__ == "__main__":
main()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I was looking for a way to list my open Firefox tabs and found this after trying other solutions which were outdated. I use the Firefox flatpak with single window so I asked AI to refactor @RanTalbott's script and this seems to do what I want: