Skip to content

Instantly share code, notes, and snippets.

@glowinthedark
Forked from RavuAlHemio/exmp3.py
Created August 2, 2020 16:38
Show Gist options
  • Save glowinthedark/d64e042fb388cb9458a2e3c64dfd6dd9 to your computer and use it in GitHub Desktop.
Save glowinthedark/d64e042fb388cb9458a2e3c64dfd6dd9 to your computer and use it in GitHub Desktop.
find and extract MP3 files from a binary file
#!/usr/bin/env python3
from math import floor
import struct
mp3_bit_rates = {
0b0001: 32000,
0b0010: 40000,
0b0011: 48000,
0b0100: 56000,
0b0101: 64000,
0b0110: 80000,
0b0111: 96000,
0b1000: 112000,
0b1001: 128000,
0b1010: 160000,
0b1011: 192000,
0b1100: 224000,
0b1101: 256000,
0b1110: 320000,
}
mp3_sampling_rates = {
0b00: 44100,
0b01: 48000,
0b10: 32000,
}
def find_mp3(stream, prefix):
counter = 0
is_mp3 = False
last_mp3_bytes = b""
my_bytes = b"\0" + stream.read(3)
while True:
if not is_mp3:
if len(last_mp3_bytes) > 0:
file_name = "{0}{1:04}.mp3".format(prefix, counter)
counter += 1
with open(file_name, "wb") as f:
f.write(last_mp3_bytes)
last_mp3_bytes = b""
is_mp3 = False
my_bytes = my_bytes[1:] + stream.read(1)
if len(my_bytes) != 4:
# it's over
return
(header_number,) = struct.unpack(">I", my_bytes)
# frame sync: 0b11111111 111xxxxx xxxxxxxx xxxxxxxx
if (header_number & 0xFFE00000) != 0xFFE00000:
# nope
continue
# MPEG version: 0bxxxxxxxx xxxVVxxx xxxxxxxx xxxxxxxx
mpeg_version = (header_number & 0x00180000) >> 19
if mpeg_version == 0b01:
# reserved value
continue
elif mpeg_version != 0b11:
# not MPEG1 (= not MP3)
continue
# MPEG Layer: 0bxxxxxxxx xxxxxLLx xxxxxxxx xxxxxxxx
mpeg_layer = (header_number & 0x00060000) >> 17
if mpeg_layer == 0b00:
# reserved value
continue
elif mpeg_layer != 0b01:
# not Layer 3 (= not MP3)
continue
# CRC? 0bxxxxxxxx xxxxxxxC xxxxxxxx xxxxxxxx
has_crc = (((header_number & 0x00010000) >> 16) == 0b1)
# bitrate: 0bxxxxxxxx xxxxxxxx BBBBxxxx xxxxxxxx
bit_rate_index = (header_number & 0x0000F000) >> 12
if bit_rate_index == 0b0000:
# weird bitrate
continue
elif bit_rate_index == 0b1111:
# reserved value
continue
bit_rate = mp3_bit_rates[bit_rate_index]
# sampling rate: 0bxxxxxxxx xxxxxxxx xxxxRRxx xxxxxxxx
sampling_rate_index = (header_number & 0x00000C00) >> 10
if sampling_rate_index == 0b11:
# reserved value
continue
sampling_rate = mp3_sampling_rates[sampling_rate_index]
# padding? 0bxxxxxxxx xxxxxxxx xxxxxxPx xxxxxxxx
has_padding = (((header_number & 0x00000200) >> 9) == 0b1)
# private bit is not interesting
# channel mode: 0bxxxxxxxx xxxxxxxx xxxxxxxx CCxxxxxx
channel_mode = (header_number & 0x000000C0) >> 6
# mode extension: 0bxxxxxxxx xxxxxxxx xxxxxxxx xxEExxxx
mode_extension = (header_number & 0x00000030) >> 4
# copyright bit and original bit are not interesting
# emphasis: 0bxxxxxxxx xxxxxxxx xxxxxxxx xxxxxxEE
emphasis = (header_number & 0x00000003)
if emphasis == 0b10:
# reserved value
continue
# at this point, it's an MP3 file
is_mp3 = True
# calculate the frame length
frame_length = floor(144 * bit_rate / sampling_rate)
if has_padding:
frame_length += 1
# ker-BLAM
last_mp3_bytes += my_bytes
last_mp3_bytes += stream.read(frame_length - 4)
# prepare for the next scan-read
my_bytes = b"\0" + stream.read(3)
if __name__ == '__main__':
import sys
for arg in sys.argv[1:]:
with open(arg, "rb") as f:
find_mp3(f, arg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment