Skip to content

Instantly share code, notes, and snippets.

@Dobby233Liu
Last active March 16, 2026 13:36
Show Gist options
  • Select an option

  • Save Dobby233Liu/05e643c7034c1a14840d8ffa5e3ae9e1 to your computer and use it in GitHub Desktop.

Select an option

Save Dobby233Liu/05e643c7034c1a14840d8ffa5e3ae9e1 to your computer and use it in GitHub Desktop.
SoonerXTR if it was good and honest + FixLater
# FixLater: Reconstructs YAFFS1 spare/oob data for image files that don't have it
# Copyright (C) 2026 Liu Wenyuan
# Derived from code in YAFFS; copyright (C) 2002-2018 Aleph One Ltd.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License version 2 as published by the Free
# Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
# This is not very well documented rn, especially since the output is still too weird to be
# supported by most tools; sorry
# For now, just know that this:
# - Is not a substitute to actually having the OOB data, especially not for archival purposes
# - Makes the same assumptions as SoonerXTR "v2" about the input
# See https://betawiki.net/wiki/User:OstupBurtik/Google_Sooner_(EXCA300)_proper_dumping
# if you're interested in a more proper dumping methodology
import os
import sys
from ctypes import (
LittleEndianStructure,
byref,
c_byte,
c_char,
c_int32,
c_ubyte,
c_uint16,
c_uint32,
c_uint64,
memset,
sizeof,
)
from bitfield import make_bf # ctypes_bitfield
from tqdm import tqdm
CHUNK_SIZE = 0x200
YAFFS_OBJECTID_ROOT = 1
YAFFS_NOBJECT_BUCKETS = 0x100
YAFFS_OBJECT_TYPE_FILE = 1
class yaffs_tags(
make_bf(
"_yaffs_tags_base",
[
("chunk_id", c_uint32, 20),
("serial_number", c_uint32, 2),
("n_bytes", c_uint32, 10),
("obj_id", c_uint32, 18),
("ecc", c_uint32, 12),
("unused", c_uint32, 2),
],
basetype=c_uint64, # pyright: ignore[reportArgumentType]
)
):
def __init__(self):
super().__init__()
self._clear()
self.unused = 3
def _clear(self):
memset(byref(self), 0xFF, sizeof(self))
def calc_ecc(self):
self.ecc = 0
b = bytearray(self)
bit = 0
for i in range(8):
byte = b[i]
mask = 1
for _ in range(8):
bit += 1
if byte & mask:
self.ecc ^= bit
mask <<= 1
return self.ecc
assert sizeof(yaffs_tags) == 0x8
class yaffs_packed_tags1(LittleEndianStructure):
_pack_ = 1
_fields_ = [("_tags", yaffs_tags), ("should_be_ff", c_uint32)]
_anonymous_ = ["_tags"]
def __init__(self, tags: yaffs_tags) -> None:
super().__init__()
self._tags = tags
self.should_be_ff = 0xFFFFFFFF
# fmt: off
column_parity_table = (
0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69, 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00,
0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc, 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95,
0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0, 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99,
0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65, 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c,
0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc, 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5,
0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59, 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30,
0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55, 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c,
0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0, 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9,
0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0, 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9,
0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55, 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c,
0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59, 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30,
0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc, 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5,
0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65, 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c,
0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0, 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99,
0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc, 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95,
0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69, 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00
)
# fmt: on
def calc_ecc(data):
ecc = bytearray(3)
col_parity = 0
line_parity = 0
line_parity_prime = 0
for i in range(256):
b = column_parity_table[data[i]]
col_parity ^= b
if b & 0x01: # odd number of bits in the byte
line_parity ^= i
line_parity_prime ^= (~i) & 0xFF
ecc[2] = (~col_parity) & 0xFF | 0x03
t = 0
if line_parity & 0x80:
t |= 0x80
if line_parity_prime & 0x80:
t |= 0x40
if line_parity & 0x40:
t |= 0x20
if line_parity_prime & 0x40:
t |= 0x10
if line_parity & 0x20:
t |= 0x08
if line_parity_prime & 0x20:
t |= 0x04
if line_parity & 0x10:
t |= 0x02
if line_parity_prime & 0x10:
t |= 0x01
ecc[1] = (~t) & 0xFF
t = 0
if line_parity & 0x08:
t |= 0x80
if line_parity_prime & 0x08:
t |= 0x40
if line_parity & 0x04:
t |= 0x20
if line_parity_prime & 0x04:
t |= 0x10
if line_parity & 0x02:
t |= 0x08
if line_parity_prime & 0x02:
t |= 0x04
if line_parity & 0x01:
t |= 0x02
if line_parity_prime & 0x01:
t |= 0x01
ecc[0] = (~t) & 0xFF
return ecc
class yaffs_spare(LittleEndianStructure):
_pack_ = 1
_fields_ = [
("tb0", c_ubyte), # 0
("tb1", c_ubyte), # 1
("tb2", c_ubyte), # 2
("tb3", c_ubyte), # 3
("page_status", c_ubyte), # 4
("block_status", c_ubyte), # 5
("tb4", c_ubyte), # 6
("tb5", c_ubyte), # 7
("ecc1", c_ubyte * 3), # 8 9 10
("tb6", c_ubyte), # 11
("tb7", c_ubyte), # 12
("ecc2", c_ubyte * 3), # 13 14 15
]
def __init__(self):
super().__init__()
self._clear()
def _clear(self):
memset(byref(self), 0xFF, sizeof(self))
def calc_ecc(self, data: bytes):
data_view = bytearray(data)
# this is cursed
self.ecc1 = type(self.ecc1).from_buffer(calc_ecc(data_view[:256]))
self.ecc2 = type(self.ecc2).from_buffer(calc_ecc(data_view[256:]))
def load_tags(self, tags: yaffs_tags):
self.tb0, self.tb1, self.tb2, self.tb3, self.tb4, self.tb5, self.tb6, self.tb7 = bytearray(tags)
def get_tags_as_bytes(self):
return bytes([self.tb0, self.tb1, self.tb2, self.tb3, self.tb4, self.tb5, self.tb6, self.tb7])
def get_tags(self):
return yaffs_tags.from_buffer_copy(self.get_tags_as_bytes())
assert sizeof(yaffs_spare) == 0x10
YAFFS_MAX_NAME_LENGTH = 256 - 1
YAFFS_MAX_ALIAS_LENGTH = 160 - 1
class yaffs_obj_hdr(LittleEndianStructure):
_pack_ = 1
_fields_ = [
("type", c_uint32),
# for everything
("parent_obj_id", c_uint32),
("sum_no_longer_used", c_uint16),
("name", c_char * (YAFFS_MAX_NAME_LENGTH + 1)),
("_name_padding", c_byte * 2), # not seen in yaffs_guts.h
# for all except hardlinks
("yst_mode", c_uint32),
("yst_uid", c_uint32),
("yst_gid", c_uint32),
("yst_atime", c_uint32),
("yst_mtime", c_uint32),
("yst_ctime", c_uint32),
# file only
("file_size_low", c_uint32),
# hardlink only
("equiv_id", c_int32),
# symlink only
("alias", c_char * (YAFFS_MAX_ALIAS_LENGTH + 1)),
#
("yst_rdev", c_uint32),
]
def roundr(n, step):
return ((n - 1) // step + 1) * step
fn = sys.argv[1] if len(sys.argv) > 1 else "mtd5_system.img"
fn_se = os.path.splitext(fn)
outfn = sys.argv[2] if len(sys.argv) > 2 else fn_se[0] + "_FIXED" + fn_se[1]
orig_page_size = int(sys.argv[3]) if len(sys.argv) > 3 else 0x800
write_nand_oob = bool(int(sys.argv[4])) if len(sys.argv) > 4 else True # unyaffs2 from yaffs2utils expects this format
EMPTY_CHUNK = b"\xff" * CHUNK_SIZE
chunks = [] # TODO: don't read all of it to memory
with open(fn, "rb") as f:
obj_id = YAFFS_OBJECTID_ROOT
while True:
chunk = f.read(orig_page_size)[:CHUNK_SIZE]
if len(chunk) != CHUNK_SIZE:
assert len(chunk) == 0
break
if chunk == EMPTY_CHUNK:
break
chunks.append((chunk, 0, 0xFFFF, obj_id))
oh = yaffs_obj_hdr.from_buffer_copy(chunk)
if oh.type == YAFFS_OBJECT_TYPE_FILE:
file_size = oh.file_size_low
file_data = f.read(roundr(file_size, orig_page_size))[:file_size]
for i, part_start in enumerate(range(0, file_size, CHUNK_SIZE)):
part = file_data[part_start : part_start + CHUNK_SIZE]
chunks.append((part, 1 + i, len(part), obj_id))
if obj_id < YAFFS_NOBJECT_BUCKETS:
obj_id = YAFFS_NOBJECT_BUCKETS
obj_id += 1
with open(outfn, "wb") as f:
for chunk, chunk_id, n_bytes, obj_id in tqdm(chunks, desc="writing chunks", unit="chunk"):
chunk += b"\xff" * (CHUNK_SIZE - len(chunk))
f.write(chunk)
tags = yaffs_tags()
tags.obj_id = obj_id
tags.chunk_id = chunk_id
tags.n_bytes = n_bytes
tags.serial_number = 0
tags.calc_ecc()
spare = yaffs_spare()
spare.load_tags(tags)
spare.calc_ecc(chunk)
if write_nand_oob:
# FIXME
ecc = bytes(
list(spare.ecc2) + [spare.ecc1[0], 0xFF, 0xFF] + spare.ecc1[1:]
) # ([0xEF, 0xBE, 0xAD, 0xDE] * 2)
assert len(ecc) == 0x8
# that's enough for unyaffs2
nand_oob = ecc + bytes(tags)[:8]
assert len(nand_oob) == 0x10
f.write(nand_oob)
else:
f.write(spare)
# SoonerXTR: Recovers files from YAFFS1-formatted partition images that are dumped "raw" and lack
# the spare areas, which is particularly a problem with current dumps of the HTC EXCA300 (sooner)'s
# Android system partition
#
# This script doesn't properly parse the dumped filesystem, and will only work if the filesystem:
# - Has a linear layout, particularly if it was originally built with mkyaffsimage
# - And was never modified (aside from being fully rewritten) after flashing
# It will never work with proper dumps. To work with them, use actual tools like yaffey instead
#
# Original author: AyeTSG
# https://web.archive.org/web/20230201203251/https://github.com/AyeTSG/SoonerXTR/blob/main/SoonerXTR.py
# Rewritten by: someone else who is now actually somewhat starting to understand how YAFFS1
# is laid out
import os
import sys
from ctypes import LittleEndianStructure, c_byte, c_char, c_int32, c_uint16, c_uint32
from enum import IntEnum, auto
from time import sleep
CHUNK_SIZE = 0x800
CHUNK_DATA_SIZE = 0x200
# In most current dumps, spare areas don't follow each chunk
# Could be because of bad dumping, but we don't have anything better as of now
SPARE_SIZE = 0x10
def roundr(n, step):
return ((n - 1) // step + 1) * step
def decode_null_terminated_string(b, encoding="ascii"):
return b.split(b"\x00")[0].decode(encoding)
# https://github.com/kempniu/yaffs2/blob/master/yaffs_guts.h
class yaffs_obj_type(IntEnum):
YAFFS_OBJECT_TYPE_UNKNOWN = 0
YAFFS_OBJECT_TYPE_FILE = auto()
YAFFS_OBJECT_TYPE_SYMLINK = auto() # not seen in dump
YAFFS_OBJECT_TYPE_DIRECTORY = auto()
YAFFS_OBJECT_TYPE_HARDLINK = auto() # not seen in dump
YAFFS_OBJECT_TYPE_SPECIAL = auto() # not seen in dump
YAFFS_OBJECT_TYPE_EOF = 0xFFFFFFFF # I made this up for cleanliness
YAFFS_OBJECTID_ROOT = 1
YAFFS_OBJECTID_LOSTNFOUND = 2
YAFFS_OBJECTID_UNLINKED = 3
YAFFS_OBJECTID_DELETED = 4
YAFFS_NOBJECT_BUCKETS = 0x100
YAFFS_MAX_NAME_LENGTH = 256 - 1
YAFFS_MAX_ALIAS_LENGTH = 160 - 1
class yaffs_obj_hdr(LittleEndianStructure):
_pack_ = 1
_fields_ = [
("type", c_uint32),
# for everything
("parent_obj_id", c_uint32),
("sum_no_longer_used", c_uint16),
("name", c_char * (YAFFS_MAX_NAME_LENGTH + 1)),
("_name_padding", c_byte * 2), # not seen in yaffs_guts.h
# for all except hardlinks
("yst_mode", c_uint32),
("yst_uid", c_uint32),
("yst_gid", c_uint32),
("yst_atime", c_uint32),
("yst_mtime", c_uint32),
("yst_ctime", c_uint32),
# file only
("file_size_low", c_uint32),
# hardlink only
("equiv_id", c_int32),
# symlink only
("alias", c_char * (YAFFS_MAX_ALIAS_LENGTH + 1)),
#
("yst_rdev", c_uint32),
]
PRESET_DIR_OBJ_PATHS = {
YAFFS_OBJECTID_ROOT: "/",
YAFFS_OBJECTID_LOSTNFOUND: "/[lost+found]/",
YAFFS_OBJECTID_UNLINKED: "/[unlinked]/",
YAFFS_OBJECTID_DELETED: "/[deleted]/",
}
def read_chunks(img, base_dir):
step = "determining directory structure"
print(step)
chunks = []
dir_obj_paths = PRESET_DIR_OBJ_PATHS.copy()
cur_obj_id = YAFFS_OBJECTID_ROOT # keep track of it ourselves since we don't have the spare areas
while True:
chunk_data = img.read(CHUNK_SIZE)
if len(chunk_data) < CHUNK_SIZE:
assert len(chunk_data) == 0
break
oh = yaffs_obj_hdr.from_buffer_copy(chunk_data[:CHUNK_DATA_SIZE])
if oh.type == yaffs_obj_type.YAFFS_OBJECT_TYPE_EOF: # end of file system
break
chunk_ndx = len(chunks)
chunks.append([oh, None, None])
parent_obj_id = oh.parent_obj_id
print_obj_id = False
if parent_obj_id == 0:
# no parent? seen twice at the start of userdata, names are all 0xff as well
# in system the first entry is root with 1 as parent
# https://github.com/kempniu/yaffs2/blob/613a901a229e8a701c18f003dd0aee18453e0670/yaffs_guts.c#L3114
if cur_obj_id < YAFFS_NOBJECT_BUCKETS:
cur_obj_id = YAFFS_NOBJECT_BUCKETS
elif oh.type == yaffs_obj_type.YAFFS_OBJECT_TYPE_DIRECTORY:
print_obj_id = True
# -- GET DIR NAME
dir_name = decode_null_terminated_string(oh.name)
print(f"- DIR: {dir_name}")
if cur_obj_id not in dir_obj_paths:
dir_obj_paths[cur_obj_id] = dir_obj_paths[parent_obj_id] + dir_name + "/"
elif oh.type == yaffs_obj_type.YAFFS_OBJECT_TYPE_FILE:
# some files in userdata strangely tend to be zero-sized in header despite containing data,
# so doing this won't work
# -- GET FILE SIZE
file_size_low = oh.file_size_low
# -- READ CONTENT TO MEMORY (assuming it comes right after the OH)
# We don't need to align, as we already read CHUNK_SIZE bytes above
chunks[chunk_ndx][2] = img.read(roundr(file_size_low, CHUNK_SIZE))[:file_size_low]
if print_obj_id:
print(f" PARENT OBJ ID: {parent_obj_id}")
print(f" OBJ ID: {str(cur_obj_id)}")
chunks[chunk_ndx][1] = cur_obj_id
yield step, cur_obj_id
if cur_obj_id < YAFFS_NOBJECT_BUCKETS:
cur_obj_id = YAFFS_NOBJECT_BUCKETS
cur_obj_id += 1
print()
step = "extracting files"
print(step)
for oh, cur_obj_id, file_data in chunks:
if oh.type != yaffs_obj_type.YAFFS_OBJECT_TYPE_FILE:
continue
assert file_data is not None
parent_obj_id = oh.parent_obj_id
# -- GET FILE NAME
file_name = decode_null_terminated_string(oh.name)
print(f"- FILE: {file_name}")
print(f" PARENT OBJ ID: {parent_obj_id}")
print(f" OBJ ID: {str(cur_obj_id)}")
path = base_dir + dir_obj_paths[parent_obj_id] + file_name
if parent_obj_id in [YAFFS_OBJECTID_LOSTNFOUND, YAFFS_OBJECTID_UNLINKED, YAFFS_OBJECTID_DELETED]:
path += f"_{cur_obj_id}"
print(f" DESTINATION: {path}")
os.makedirs(os.path.dirname(path), exist_ok=True)
# -- DUMP THE CONTENT!
with open(path, "wb") as f:
f.write(file_data)
yield "extracting files", cur_obj_id
fn = sys.argv[1] if len(sys.argv) > 1 else "mtd5_system.img"
with open(fn, "rb") as img:
if os.path.getsize(fn) % CHUNK_SIZE != 0 and os.path.getsize(fn) % (CHUNK_SIZE + SPARE_SIZE) == 0:
print("Warning: Image probably includes spare areas, try other tools like yaffey")
print("I recommend pressing CTRL+C right now to cancel extraction")
sleep(0.2)
for _ in read_chunks(img, os.path.splitext(fn)[0]):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment