Created
April 4, 2025 07:09
-
-
Save IntendedConsequence/b3579792d91b409d0b0bec6d8d34e163 to your computer and use it in GitHub Desktop.
Parse urls and titles from chrome browser sessions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| import os | |
| import csv | |
| import json | |
| import struct | |
| import argparse | |
| from io import BytesIO | |
| from enum import Enum, EnumMeta | |
| from urllib.parse import unquote | |
| from pathlib import Path | |
| """ | |
| SNSS Session File Parser for PyBrinf | |
| This module parses SNSS files and returns a list of current tabs. | |
| Script made by Manuel Cabral | |
| Modified by YourMom for proper string padding alignment and String16 support | |
| """ | |
| class MetaEnum(EnumMeta): | |
| """This class is used for creating an enum with a custom __contains__ method""" | |
| def __contains__(cls, item: int) -> bool: | |
| try: | |
| cls(item) | |
| except ValueError: | |
| return False | |
| return True | |
| class SNSSTypeCommand(Enum, metaclass=MetaEnum): | |
| """Enum for SNSS command types""" | |
| CommandSetTabWindow = 0 | |
| CommandSetTabIndexInWindow = 2 | |
| CommandTabClosed = 3 | |
| CommandWindowClosed = 4 | |
| CommandTabNavigationPathPrunedFromBack = 5 | |
| CommandUpdateTabNavigation = 6 | |
| CommandSetSelectedNavigationIndex = 7 | |
| CommandSetSelectedTabInIndex = 8 | |
| CommandSetWindowType = 9 | |
| CommandTabNavigationPathPrunedFromFront = 11 | |
| CommandSetPinnedState = 12 | |
| CommandSetExtensionAppID = 13 | |
| CommandSetWindowBounds3 = 14 | |
| class SNSSCommand: | |
| """Command class core""" | |
| def __init__(self, id: int, content: bytes): | |
| self.id = id | |
| self.content = content | |
| def parse_commands(commands): | |
| output = [] | |
| for command in commands: | |
| if command.id in SNSSTypeCommand: | |
| if command.id == SNSSTypeCommand.CommandUpdateTabNavigation.value: | |
| # Extracting the pickle payload | |
| content = BytesIO(command.content) | |
| # Get the size of the pickle | |
| content.seek(0, os.SEEK_END) | |
| pickle_size = content.tell() | |
| content.seek(0, os.SEEK_SET) | |
| # Get the size of the payload, unpacking uint32 | |
| (payload_size,) = struct.unpack("I", content.read(4)) | |
| payloard_start = pickle_size - payload_size | |
| # Get the payload, unpacking uint32 | |
| (tab_id,) = struct.unpack("I", content.read(4)) | |
| (index,) = struct.unpack("I", content.read(4)) | |
| def read_str8(): | |
| # Get the url size, unpacking uint32 | |
| (str_length,) = struct.unpack("I", content.read(4)) | |
| padding = 4 - (str_length % 4) if str_length % 4 else 0 | |
| # If the url size is more than the difference between | |
| # the pickle size and the current position, it's invalid | |
| if str_length > pickle_size - content.tell(): | |
| raise Exception("Invalid string length") | |
| return content.read(str_length + padding)[:str_length].decode( | |
| "utf-8", "ignore" | |
| ) | |
| def read_str16(): | |
| # Get the url size, unpacking uint32 | |
| (str_length,) = struct.unpack("I", content.read(4)) | |
| str_length = str_length * 2 | |
| padding = 4 - (str_length % 4) if str_length % 4 else 0 | |
| # if str_length % 4 != 0: | |
| # str_length += 4 - (str_length % 4) | |
| # If the url size is more than the difference between | |
| # the pickle size and the current position, it's invalid | |
| if str_length > pickle_size - content.tell(): | |
| # raise Exception("Invalid string length") | |
| return "undefined" | |
| return content.read(str_length + padding)[:str_length].decode( | |
| "utf-16", "ignore" | |
| ) | |
| # else: | |
| url = read_str8() | |
| title = read_str16() | |
| output.append((tab_id, index, url, title)) | |
| return output | |
| class InvalidSNSSFileException(Exception): | |
| pass | |
| def parse(path): | |
| # Parse a SNSS Session File to get all commands | |
| commands = [] | |
| file = open(path, "rb") | |
| # Getting file size | |
| file.seek(0, os.SEEK_END) | |
| end = file.tell() | |
| file.seek(0, os.SEEK_SET) | |
| # Reading signature, unpacking int32 | |
| (signature,) = struct.unpack("i", file.read(4)) | |
| if signature != 0x53534E53: # SNSS | |
| raise InvalidSNSSFileException("Invalid SNSS file") | |
| # Reading version version, unpacking int32 | |
| (version,) = struct.unpack("i", file.read(4)) | |
| while end - file.tell() > 0: | |
| # Read command size, unpacking uint16 | |
| (command_size,) = struct.unpack("H", file.read(2)) | |
| if command_size == 0: | |
| raise Exception("Invalid command size, maybe corrupted file") | |
| # Read command id, unpacking uint8 | |
| (id,) = struct.unpack("B", file.read(1)) | |
| # Read content, NOTE: command id is included in command_size | |
| content = file.read(command_size - 1) | |
| command = SNSSCommand(id, content) | |
| commands.append(command) | |
| file.close() | |
| return commands | |
| def main(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("path", type=Path) | |
| parser.add_argument("--json", action="store_true") | |
| parser.add_argument("-0", action="store_true", help="Multiline output with 0 as entry separator") | |
| args = parser.parse_args() | |
| is_json = args.json | |
| is_multiline = getattr(args, '0') | |
| path = args.path | |
| if path.exists(): | |
| paths = [path] if not path.is_dir() else [x for x in path.glob("*.*") if x.is_file() and x.stat().st_size > 4] | |
| data = {} | |
| for path in paths: | |
| try: | |
| commands = parse(path) | |
| except InvalidSNSSFileException: | |
| print(f"File is not a valid SNSS format, skipping: {path}", file=sys.stderr) | |
| continue | |
| # for now only gets the tab navigation | |
| tabs = parse_commands(commands) | |
| data[path] = [] | |
| for _, _, url, title in tabs: | |
| also_title = "" | |
| if url.startswith( | |
| "chrome-extension://noogafoofpebimajpfpamcfhoaifemoa/suspended.html" | |
| ): | |
| # parse suspender format of chrome-extension://xxxxxxxx/suspended.html#ttl=<TITLE>&pos=_&uri=<URL> | |
| # 1. drop everything before first #ttl= | |
| # 2. put everything after last &uri= into url, and everything before into also_title list | |
| *also_title, url = "".join(url.split("#ttl=")[1:]).split("&uri=") | |
| # 3. rejoin also_title into string | |
| # 4. drop everything after &pos= | |
| # 5. url unquote | |
| also_title = unquote("".join(also_title).split("&pos=")[0]) | |
| row = (unquote(url), also_title if not title else title) | |
| data[path].append(row) | |
| if is_json: | |
| json.dump([{'url': url, 'title': title} for v in data.values() for url, title in v], sys.stdout) | |
| elif is_multiline: | |
| for v in data.values(): | |
| for url, title in v: | |
| sys.stdout.buffer.write(f"{url}\n{title}\n\0".encode('utf8')) | |
| # csv | |
| else: | |
| # NOTE: windows shenanigans. we're supposed to open("file.csv", newline='') | |
| # to prevent \r\r\n line endings, but sys.stdout? so we do this instead | |
| csvwriter = csv.writer(sys.stdout, lineterminator='\n') | |
| csvwriter.writerow(("URL", "Title")) | |
| for filename, entries in data.items(): | |
| csvwriter.writerows(entries) | |
| else: | |
| print(f"File not found: {path}", file=sys.stderr) | |
| # NO WARRANTY. Was tested only on windows with vivaldi browser (should work for any chrome-based one in theory) | |
| # Can be used with fzf to search all sessions like so: | |
| # | |
| # python -X utf8 read-chrome-sessions.py "%LOCALAPPDATA%\Vivaldi\User Data\Default\Sessions" -0 | fzf --read0 | |
| # | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment