Created
May 27, 2026 16:14
-
-
Save edsu/0eada612b6bc75d90f887ec689b4c4ff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Extract the final mixed track from a Tascam DAW backup disk image. | |
| Finds the audio start offset by detecting the end of the FAT region, then | |
| scans for silence gaps to locate track boundaries and extracts the last track | |
| (the stereo or mono mix) as a WAV file. | |
| Usage: | |
| python3 extract_mix.py <backup.bin> [output.wav] | |
| Options are tunable via constants below if silence detection needs adjustment. | |
| """ | |
| import sys | |
| import os | |
| import struct | |
| import wave | |
| SAMPLE_RATE = 44100 | |
| CHUNK_SAMPLES = 4096 # samples to read at a time during silence scan | |
| SILENCE_THRESH = 500 # 24-bit amplitude threshold for "silence" (out of 8388607) | |
| MIN_GAP_SECS = 0.5 # minimum silence duration to count as a track boundary | |
| TASCAM_SIG = b"dSNGMNG" | |
| FF_THRESHOLD = 0.70 | |
| FAT_SCAN_CHUNK = 0x800 | |
| def find_audio_offset(f, size): | |
| f.seek(0) | |
| header = f.read(0x10000) | |
| if TASCAM_SIG not in header: | |
| print("Warning: Tascam signature (dSNGMNG) not found — may not be a Tascam backup") | |
| in_fat = False | |
| offset = 0 | |
| while offset < size: | |
| f.seek(offset) | |
| chunk = f.read(FAT_SCAN_CHUNK) | |
| if not chunk: | |
| break | |
| ff_pct = sum(1 for b in chunk if b == 0xFF) / len(chunk) | |
| if ff_pct > FF_THRESHOLD: | |
| in_fat = True | |
| elif in_fat: | |
| return offset | |
| offset += FAT_SCAN_CHUNK | |
| return None | |
| def read_samples(f, n): | |
| """Read n 24-bit big-endian signed samples, return as list of ints.""" | |
| raw = f.read(n * 3) | |
| if not raw: | |
| return [] | |
| count = len(raw) // 3 | |
| samples = [] | |
| for i in range(count): | |
| b = raw[i*3:(i+1)*3] | |
| val = struct.unpack(">I", b"\x00" + b)[0] | |
| if val >= 0x800000: | |
| val -= 0x1000000 | |
| samples.append(val) | |
| return samples | |
| def find_track_boundaries(f, audio_offset, size): | |
| """Return list of (start_byte, end_byte) for each track, in file coordinates.""" | |
| min_gap_samples = int(MIN_GAP_SECS * SAMPLE_RATE) | |
| f.seek(audio_offset) | |
| boundaries = [] | |
| track_start = audio_offset | |
| silence_run = 0 | |
| byte_pos = audio_offset | |
| while byte_pos < size: | |
| samples = read_samples(f, CHUNK_SAMPLES) | |
| if not samples: | |
| break | |
| for s in samples: | |
| if abs(s) <= SILENCE_THRESH: | |
| silence_run += 1 | |
| else: | |
| if silence_run >= min_gap_samples: | |
| # End of a track — record it, start new one after the gap | |
| gap_start_byte = byte_pos - silence_run * 3 | |
| boundaries.append((track_start, gap_start_byte)) | |
| track_start = byte_pos | |
| silence_run = 0 | |
| byte_pos += 3 | |
| # Final track runs to end of file | |
| if byte_pos > track_start: | |
| boundaries.append((track_start, byte_pos)) | |
| return boundaries | |
| def write_wav(path, f, start_byte, end_byte, channels=1): | |
| num_samples = (end_byte - start_byte) // 3 | |
| f.seek(start_byte) | |
| with wave.open(path, "w") as w: | |
| w.setnchannels(channels) | |
| w.setsampwidth(3) # 24-bit = 3 bytes | |
| w.setframerate(SAMPLE_RATE) | |
| remaining = num_samples | |
| while remaining > 0: | |
| n = min(CHUNK_SAMPLES, remaining) | |
| raw = f.read(n * 3) | |
| if not raw: | |
| break | |
| # WAV 24-bit is little-endian; input is big-endian — swap bytes | |
| le = bytearray() | |
| for i in range(len(raw) // 3): | |
| b = raw[i*3:(i+1)*3] | |
| le += bytes([b[2], b[1], b[0]]) | |
| w.writeframes(bytes(le)) | |
| remaining -= n | |
| def seconds(byte_count): | |
| return byte_count / 3 / SAMPLE_RATE | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 2: | |
| print(f"Usage: {sys.argv[0]} <backup.bin> [output.wav]") | |
| sys.exit(1) | |
| src = sys.argv[1] | |
| dst = sys.argv[2] if len(sys.argv) > 2 else src.replace(".bin", "_mix.wav") | |
| size = os.path.getsize(src) | |
| with open(src, "rb") as f: | |
| print("Finding audio offset...") | |
| audio_offset = find_audio_offset(f, size) | |
| if audio_offset is None: | |
| print("Error: could not find audio region") | |
| sys.exit(1) | |
| print(f" Audio starts at 0x{audio_offset:08X} ({audio_offset})") | |
| print("Scanning for track boundaries (this may take a moment)...") | |
| tracks = find_track_boundaries(f, audio_offset, size) | |
| print(f"\nFound {len(tracks)} track(s):") | |
| for i, (start, end) in enumerate(tracks): | |
| dur = seconds(end - start) | |
| label = "← mix" if i == len(tracks) - 1 else "" | |
| print(f" Track {i+1}: 0x{start:08X}–0x{end:08X} ({dur:.1f}s) {label}") | |
| if not tracks: | |
| print("No tracks found — try lowering SILENCE_THRESH or MIN_GAP_SECS") | |
| sys.exit(1) | |
| mix_start, mix_end = tracks[-1] | |
| print(f"\nExtracting mix to {dst} ...") | |
| write_wav(dst, f, mix_start, mix_end) | |
| dur = seconds(mix_end - mix_start) | |
| print(f" Done: {dur:.1f}s, {(mix_end - mix_start) // 1024} KB") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment