Last active
March 16, 2026 13:36
-
-
Save Dobby233Liu/05e643c7034c1a14840d8ffa5e3ae9e1 to your computer and use it in GitHub Desktop.
SoonerXTR if it was good and honest + FixLater
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # FixLater: Reconstructs YAFFS1 spare/oob data for image files that don't have it | |
| # Copyright (C) 2026 Liu Wenyuan | |
| # Derived from code in YAFFS; copyright (C) 2002-2018 Aleph One Ltd. | |
| # | |
| # This program is free software; you can redistribute it and/or modify it under | |
| # the terms of the GNU General Public License version 2 as published by the Free | |
| # Software Foundation. | |
| # | |
| # This program is distributed in the hope that it will be useful, but WITHOUT | |
| # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. | |
| # This is not very well documented rn, especially since the output is still too weird to be | |
| # supported by most tools; sorry | |
| # For now, just know that this: | |
| # - Is not a substitute to actually having the OOB data, especially not for archival purposes | |
| # - Makes the same assumptions as SoonerXTR "v2" about the input | |
| # See https://betawiki.net/wiki/User:OstupBurtik/Google_Sooner_(EXCA300)_proper_dumping | |
| # if you're interested in a more proper dumping methodology | |
| import os | |
| import sys | |
| from ctypes import ( | |
| LittleEndianStructure, | |
| byref, | |
| c_byte, | |
| c_char, | |
| c_int32, | |
| c_ubyte, | |
| c_uint16, | |
| c_uint32, | |
| c_uint64, | |
| memset, | |
| sizeof, | |
| ) | |
| from bitfield import make_bf # ctypes_bitfield | |
| from tqdm import tqdm | |
| CHUNK_SIZE = 0x200 | |
| YAFFS_OBJECTID_ROOT = 1 | |
| YAFFS_NOBJECT_BUCKETS = 0x100 | |
| YAFFS_OBJECT_TYPE_FILE = 1 | |
| class yaffs_tags( | |
| make_bf( | |
| "_yaffs_tags_base", | |
| [ | |
| ("chunk_id", c_uint32, 20), | |
| ("serial_number", c_uint32, 2), | |
| ("n_bytes", c_uint32, 10), | |
| ("obj_id", c_uint32, 18), | |
| ("ecc", c_uint32, 12), | |
| ("unused", c_uint32, 2), | |
| ], | |
| basetype=c_uint64, # pyright: ignore[reportArgumentType] | |
| ) | |
| ): | |
| def __init__(self): | |
| super().__init__() | |
| self._clear() | |
| self.unused = 3 | |
| def _clear(self): | |
| memset(byref(self), 0xFF, sizeof(self)) | |
| def calc_ecc(self): | |
| self.ecc = 0 | |
| b = bytearray(self) | |
| bit = 0 | |
| for i in range(8): | |
| byte = b[i] | |
| mask = 1 | |
| for _ in range(8): | |
| bit += 1 | |
| if byte & mask: | |
| self.ecc ^= bit | |
| mask <<= 1 | |
| return self.ecc | |
| assert sizeof(yaffs_tags) == 0x8 | |
| class yaffs_packed_tags1(LittleEndianStructure): | |
| _pack_ = 1 | |
| _fields_ = [("_tags", yaffs_tags), ("should_be_ff", c_uint32)] | |
| _anonymous_ = ["_tags"] | |
| def __init__(self, tags: yaffs_tags) -> None: | |
| super().__init__() | |
| self._tags = tags | |
| self.should_be_ff = 0xFFFFFFFF | |
| # fmt: off | |
| column_parity_table = ( | |
| 0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69, 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00, | |
| 0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc, 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95, | |
| 0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0, 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99, | |
| 0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65, 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c, | |
| 0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc, 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5, | |
| 0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59, 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30, | |
| 0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55, 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c, | |
| 0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0, 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9, | |
| 0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0, 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9, | |
| 0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55, 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c, | |
| 0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59, 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30, | |
| 0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc, 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5, | |
| 0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65, 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c, | |
| 0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0, 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99, | |
| 0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc, 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95, | |
| 0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69, 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00 | |
| ) | |
| # fmt: on | |
| def calc_ecc(data): | |
| ecc = bytearray(3) | |
| col_parity = 0 | |
| line_parity = 0 | |
| line_parity_prime = 0 | |
| for i in range(256): | |
| b = column_parity_table[data[i]] | |
| col_parity ^= b | |
| if b & 0x01: # odd number of bits in the byte | |
| line_parity ^= i | |
| line_parity_prime ^= (~i) & 0xFF | |
| ecc[2] = (~col_parity) & 0xFF | 0x03 | |
| t = 0 | |
| if line_parity & 0x80: | |
| t |= 0x80 | |
| if line_parity_prime & 0x80: | |
| t |= 0x40 | |
| if line_parity & 0x40: | |
| t |= 0x20 | |
| if line_parity_prime & 0x40: | |
| t |= 0x10 | |
| if line_parity & 0x20: | |
| t |= 0x08 | |
| if line_parity_prime & 0x20: | |
| t |= 0x04 | |
| if line_parity & 0x10: | |
| t |= 0x02 | |
| if line_parity_prime & 0x10: | |
| t |= 0x01 | |
| ecc[1] = (~t) & 0xFF | |
| t = 0 | |
| if line_parity & 0x08: | |
| t |= 0x80 | |
| if line_parity_prime & 0x08: | |
| t |= 0x40 | |
| if line_parity & 0x04: | |
| t |= 0x20 | |
| if line_parity_prime & 0x04: | |
| t |= 0x10 | |
| if line_parity & 0x02: | |
| t |= 0x08 | |
| if line_parity_prime & 0x02: | |
| t |= 0x04 | |
| if line_parity & 0x01: | |
| t |= 0x02 | |
| if line_parity_prime & 0x01: | |
| t |= 0x01 | |
| ecc[0] = (~t) & 0xFF | |
| return ecc | |
| class yaffs_spare(LittleEndianStructure): | |
| _pack_ = 1 | |
| _fields_ = [ | |
| ("tb0", c_ubyte), # 0 | |
| ("tb1", c_ubyte), # 1 | |
| ("tb2", c_ubyte), # 2 | |
| ("tb3", c_ubyte), # 3 | |
| ("page_status", c_ubyte), # 4 | |
| ("block_status", c_ubyte), # 5 | |
| ("tb4", c_ubyte), # 6 | |
| ("tb5", c_ubyte), # 7 | |
| ("ecc1", c_ubyte * 3), # 8 9 10 | |
| ("tb6", c_ubyte), # 11 | |
| ("tb7", c_ubyte), # 12 | |
| ("ecc2", c_ubyte * 3), # 13 14 15 | |
| ] | |
| def __init__(self): | |
| super().__init__() | |
| self._clear() | |
| def _clear(self): | |
| memset(byref(self), 0xFF, sizeof(self)) | |
| def calc_ecc(self, data: bytes): | |
| data_view = bytearray(data) | |
| # this is cursed | |
| self.ecc1 = type(self.ecc1).from_buffer(calc_ecc(data_view[:256])) | |
| self.ecc2 = type(self.ecc2).from_buffer(calc_ecc(data_view[256:])) | |
| def load_tags(self, tags: yaffs_tags): | |
| self.tb0, self.tb1, self.tb2, self.tb3, self.tb4, self.tb5, self.tb6, self.tb7 = bytearray(tags) | |
| def get_tags_as_bytes(self): | |
| return bytes([self.tb0, self.tb1, self.tb2, self.tb3, self.tb4, self.tb5, self.tb6, self.tb7]) | |
| def get_tags(self): | |
| return yaffs_tags.from_buffer_copy(self.get_tags_as_bytes()) | |
| assert sizeof(yaffs_spare) == 0x10 | |
| YAFFS_MAX_NAME_LENGTH = 256 - 1 | |
| YAFFS_MAX_ALIAS_LENGTH = 160 - 1 | |
| class yaffs_obj_hdr(LittleEndianStructure): | |
| _pack_ = 1 | |
| _fields_ = [ | |
| ("type", c_uint32), | |
| # for everything | |
| ("parent_obj_id", c_uint32), | |
| ("sum_no_longer_used", c_uint16), | |
| ("name", c_char * (YAFFS_MAX_NAME_LENGTH + 1)), | |
| ("_name_padding", c_byte * 2), # not seen in yaffs_guts.h | |
| # for all except hardlinks | |
| ("yst_mode", c_uint32), | |
| ("yst_uid", c_uint32), | |
| ("yst_gid", c_uint32), | |
| ("yst_atime", c_uint32), | |
| ("yst_mtime", c_uint32), | |
| ("yst_ctime", c_uint32), | |
| # file only | |
| ("file_size_low", c_uint32), | |
| # hardlink only | |
| ("equiv_id", c_int32), | |
| # symlink only | |
| ("alias", c_char * (YAFFS_MAX_ALIAS_LENGTH + 1)), | |
| # | |
| ("yst_rdev", c_uint32), | |
| ] | |
| def roundr(n, step): | |
| return ((n - 1) // step + 1) * step | |
| fn = sys.argv[1] if len(sys.argv) > 1 else "mtd5_system.img" | |
| fn_se = os.path.splitext(fn) | |
| outfn = sys.argv[2] if len(sys.argv) > 2 else fn_se[0] + "_FIXED" + fn_se[1] | |
| orig_page_size = int(sys.argv[3]) if len(sys.argv) > 3 else 0x800 | |
| write_nand_oob = bool(int(sys.argv[4])) if len(sys.argv) > 4 else True # unyaffs2 from yaffs2utils expects this format | |
| EMPTY_CHUNK = b"\xff" * CHUNK_SIZE | |
| chunks = [] # TODO: don't read all of it to memory | |
| with open(fn, "rb") as f: | |
| obj_id = YAFFS_OBJECTID_ROOT | |
| while True: | |
| chunk = f.read(orig_page_size)[:CHUNK_SIZE] | |
| if len(chunk) != CHUNK_SIZE: | |
| assert len(chunk) == 0 | |
| break | |
| if chunk == EMPTY_CHUNK: | |
| break | |
| chunks.append((chunk, 0, 0xFFFF, obj_id)) | |
| oh = yaffs_obj_hdr.from_buffer_copy(chunk) | |
| if oh.type == YAFFS_OBJECT_TYPE_FILE: | |
| file_size = oh.file_size_low | |
| file_data = f.read(roundr(file_size, orig_page_size))[:file_size] | |
| for i, part_start in enumerate(range(0, file_size, CHUNK_SIZE)): | |
| part = file_data[part_start : part_start + CHUNK_SIZE] | |
| chunks.append((part, 1 + i, len(part), obj_id)) | |
| if obj_id < YAFFS_NOBJECT_BUCKETS: | |
| obj_id = YAFFS_NOBJECT_BUCKETS | |
| obj_id += 1 | |
| with open(outfn, "wb") as f: | |
| for chunk, chunk_id, n_bytes, obj_id in tqdm(chunks, desc="writing chunks", unit="chunk"): | |
| chunk += b"\xff" * (CHUNK_SIZE - len(chunk)) | |
| f.write(chunk) | |
| tags = yaffs_tags() | |
| tags.obj_id = obj_id | |
| tags.chunk_id = chunk_id | |
| tags.n_bytes = n_bytes | |
| tags.serial_number = 0 | |
| tags.calc_ecc() | |
| spare = yaffs_spare() | |
| spare.load_tags(tags) | |
| spare.calc_ecc(chunk) | |
| if write_nand_oob: | |
| # FIXME | |
| ecc = bytes( | |
| list(spare.ecc2) + [spare.ecc1[0], 0xFF, 0xFF] + spare.ecc1[1:] | |
| ) # ([0xEF, 0xBE, 0xAD, 0xDE] * 2) | |
| assert len(ecc) == 0x8 | |
| # that's enough for unyaffs2 | |
| nand_oob = ecc + bytes(tags)[:8] | |
| assert len(nand_oob) == 0x10 | |
| f.write(nand_oob) | |
| else: | |
| f.write(spare) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SoonerXTR: Recovers files from YAFFS1-formatted partition images that are dumped "raw" and lack | |
| # the spare areas, which is particularly a problem with current dumps of the HTC EXCA300 (sooner)'s | |
| # Android system partition | |
| # | |
| # This script doesn't properly parse the dumped filesystem, and will only work if the filesystem: | |
| # - Has a linear layout, particularly if it was originally built with mkyaffsimage | |
| # - And was never modified (aside from being fully rewritten) after flashing | |
| # It will never work with proper dumps. To work with them, use actual tools like yaffey instead | |
| # | |
| # Original author: AyeTSG | |
| # https://web.archive.org/web/20230201203251/https://github.com/AyeTSG/SoonerXTR/blob/main/SoonerXTR.py | |
| # Rewritten by: someone else who is now actually somewhat starting to understand how YAFFS1 | |
| # is laid out | |
| import os | |
| import sys | |
| from ctypes import LittleEndianStructure, c_byte, c_char, c_int32, c_uint16, c_uint32 | |
| from enum import IntEnum, auto | |
| from time import sleep | |
| CHUNK_SIZE = 0x800 | |
| CHUNK_DATA_SIZE = 0x200 | |
| # In most current dumps, spare areas don't follow each chunk | |
| # Could be because of bad dumping, but we don't have anything better as of now | |
| SPARE_SIZE = 0x10 | |
| def roundr(n, step): | |
| return ((n - 1) // step + 1) * step | |
| def decode_null_terminated_string(b, encoding="ascii"): | |
| return b.split(b"\x00")[0].decode(encoding) | |
| # https://github.com/kempniu/yaffs2/blob/master/yaffs_guts.h | |
| class yaffs_obj_type(IntEnum): | |
| YAFFS_OBJECT_TYPE_UNKNOWN = 0 | |
| YAFFS_OBJECT_TYPE_FILE = auto() | |
| YAFFS_OBJECT_TYPE_SYMLINK = auto() # not seen in dump | |
| YAFFS_OBJECT_TYPE_DIRECTORY = auto() | |
| YAFFS_OBJECT_TYPE_HARDLINK = auto() # not seen in dump | |
| YAFFS_OBJECT_TYPE_SPECIAL = auto() # not seen in dump | |
| YAFFS_OBJECT_TYPE_EOF = 0xFFFFFFFF # I made this up for cleanliness | |
| YAFFS_OBJECTID_ROOT = 1 | |
| YAFFS_OBJECTID_LOSTNFOUND = 2 | |
| YAFFS_OBJECTID_UNLINKED = 3 | |
| YAFFS_OBJECTID_DELETED = 4 | |
| YAFFS_NOBJECT_BUCKETS = 0x100 | |
| YAFFS_MAX_NAME_LENGTH = 256 - 1 | |
| YAFFS_MAX_ALIAS_LENGTH = 160 - 1 | |
| class yaffs_obj_hdr(LittleEndianStructure): | |
| _pack_ = 1 | |
| _fields_ = [ | |
| ("type", c_uint32), | |
| # for everything | |
| ("parent_obj_id", c_uint32), | |
| ("sum_no_longer_used", c_uint16), | |
| ("name", c_char * (YAFFS_MAX_NAME_LENGTH + 1)), | |
| ("_name_padding", c_byte * 2), # not seen in yaffs_guts.h | |
| # for all except hardlinks | |
| ("yst_mode", c_uint32), | |
| ("yst_uid", c_uint32), | |
| ("yst_gid", c_uint32), | |
| ("yst_atime", c_uint32), | |
| ("yst_mtime", c_uint32), | |
| ("yst_ctime", c_uint32), | |
| # file only | |
| ("file_size_low", c_uint32), | |
| # hardlink only | |
| ("equiv_id", c_int32), | |
| # symlink only | |
| ("alias", c_char * (YAFFS_MAX_ALIAS_LENGTH + 1)), | |
| # | |
| ("yst_rdev", c_uint32), | |
| ] | |
| PRESET_DIR_OBJ_PATHS = { | |
| YAFFS_OBJECTID_ROOT: "/", | |
| YAFFS_OBJECTID_LOSTNFOUND: "/[lost+found]/", | |
| YAFFS_OBJECTID_UNLINKED: "/[unlinked]/", | |
| YAFFS_OBJECTID_DELETED: "/[deleted]/", | |
| } | |
| def read_chunks(img, base_dir): | |
| step = "determining directory structure" | |
| print(step) | |
| chunks = [] | |
| dir_obj_paths = PRESET_DIR_OBJ_PATHS.copy() | |
| cur_obj_id = YAFFS_OBJECTID_ROOT # keep track of it ourselves since we don't have the spare areas | |
| while True: | |
| chunk_data = img.read(CHUNK_SIZE) | |
| if len(chunk_data) < CHUNK_SIZE: | |
| assert len(chunk_data) == 0 | |
| break | |
| oh = yaffs_obj_hdr.from_buffer_copy(chunk_data[:CHUNK_DATA_SIZE]) | |
| if oh.type == yaffs_obj_type.YAFFS_OBJECT_TYPE_EOF: # end of file system | |
| break | |
| chunk_ndx = len(chunks) | |
| chunks.append([oh, None, None]) | |
| parent_obj_id = oh.parent_obj_id | |
| print_obj_id = False | |
| if parent_obj_id == 0: | |
| # no parent? seen twice at the start of userdata, names are all 0xff as well | |
| # in system the first entry is root with 1 as parent | |
| # https://github.com/kempniu/yaffs2/blob/613a901a229e8a701c18f003dd0aee18453e0670/yaffs_guts.c#L3114 | |
| if cur_obj_id < YAFFS_NOBJECT_BUCKETS: | |
| cur_obj_id = YAFFS_NOBJECT_BUCKETS | |
| elif oh.type == yaffs_obj_type.YAFFS_OBJECT_TYPE_DIRECTORY: | |
| print_obj_id = True | |
| # -- GET DIR NAME | |
| dir_name = decode_null_terminated_string(oh.name) | |
| print(f"- DIR: {dir_name}") | |
| if cur_obj_id not in dir_obj_paths: | |
| dir_obj_paths[cur_obj_id] = dir_obj_paths[parent_obj_id] + dir_name + "/" | |
| elif oh.type == yaffs_obj_type.YAFFS_OBJECT_TYPE_FILE: | |
| # some files in userdata strangely tend to be zero-sized in header despite containing data, | |
| # so doing this won't work | |
| # -- GET FILE SIZE | |
| file_size_low = oh.file_size_low | |
| # -- READ CONTENT TO MEMORY (assuming it comes right after the OH) | |
| # We don't need to align, as we already read CHUNK_SIZE bytes above | |
| chunks[chunk_ndx][2] = img.read(roundr(file_size_low, CHUNK_SIZE))[:file_size_low] | |
| if print_obj_id: | |
| print(f" PARENT OBJ ID: {parent_obj_id}") | |
| print(f" OBJ ID: {str(cur_obj_id)}") | |
| chunks[chunk_ndx][1] = cur_obj_id | |
| yield step, cur_obj_id | |
| if cur_obj_id < YAFFS_NOBJECT_BUCKETS: | |
| cur_obj_id = YAFFS_NOBJECT_BUCKETS | |
| cur_obj_id += 1 | |
| print() | |
| step = "extracting files" | |
| print(step) | |
| for oh, cur_obj_id, file_data in chunks: | |
| if oh.type != yaffs_obj_type.YAFFS_OBJECT_TYPE_FILE: | |
| continue | |
| assert file_data is not None | |
| parent_obj_id = oh.parent_obj_id | |
| # -- GET FILE NAME | |
| file_name = decode_null_terminated_string(oh.name) | |
| print(f"- FILE: {file_name}") | |
| print(f" PARENT OBJ ID: {parent_obj_id}") | |
| print(f" OBJ ID: {str(cur_obj_id)}") | |
| path = base_dir + dir_obj_paths[parent_obj_id] + file_name | |
| if parent_obj_id in [YAFFS_OBJECTID_LOSTNFOUND, YAFFS_OBJECTID_UNLINKED, YAFFS_OBJECTID_DELETED]: | |
| path += f"_{cur_obj_id}" | |
| print(f" DESTINATION: {path}") | |
| os.makedirs(os.path.dirname(path), exist_ok=True) | |
| # -- DUMP THE CONTENT! | |
| with open(path, "wb") as f: | |
| f.write(file_data) | |
| yield "extracting files", cur_obj_id | |
| fn = sys.argv[1] if len(sys.argv) > 1 else "mtd5_system.img" | |
| with open(fn, "rb") as img: | |
| if os.path.getsize(fn) % CHUNK_SIZE != 0 and os.path.getsize(fn) % (CHUNK_SIZE + SPARE_SIZE) == 0: | |
| print("Warning: Image probably includes spare areas, try other tools like yaffey") | |
| print("I recommend pressing CTRL+C right now to cancel extraction") | |
| sleep(0.2) | |
| for _ in read_chunks(img, os.path.splitext(fn)[0]): | |
| pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment