-
-
Save glowinthedark/d64e042fb388cb9458a2e3c64dfd6dd9 to your computer and use it in GitHub Desktop.
find and extract MP3 files from a binary file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from math import floor | |
import struct | |
mp3_bit_rates = { | |
0b0001: 32000, | |
0b0010: 40000, | |
0b0011: 48000, | |
0b0100: 56000, | |
0b0101: 64000, | |
0b0110: 80000, | |
0b0111: 96000, | |
0b1000: 112000, | |
0b1001: 128000, | |
0b1010: 160000, | |
0b1011: 192000, | |
0b1100: 224000, | |
0b1101: 256000, | |
0b1110: 320000, | |
} | |
mp3_sampling_rates = { | |
0b00: 44100, | |
0b01: 48000, | |
0b10: 32000, | |
} | |
def find_mp3(stream, prefix): | |
counter = 0 | |
is_mp3 = False | |
last_mp3_bytes = b"" | |
my_bytes = b"\0" + stream.read(3) | |
while True: | |
if not is_mp3: | |
if len(last_mp3_bytes) > 0: | |
file_name = "{0}{1:04}.mp3".format(prefix, counter) | |
counter += 1 | |
with open(file_name, "wb") as f: | |
f.write(last_mp3_bytes) | |
last_mp3_bytes = b"" | |
is_mp3 = False | |
my_bytes = my_bytes[1:] + stream.read(1) | |
if len(my_bytes) != 4: | |
# it's over | |
return | |
(header_number,) = struct.unpack(">I", my_bytes) | |
# frame sync: 0b11111111 111xxxxx xxxxxxxx xxxxxxxx | |
if (header_number & 0xFFE00000) != 0xFFE00000: | |
# nope | |
continue | |
# MPEG version: 0bxxxxxxxx xxxVVxxx xxxxxxxx xxxxxxxx | |
mpeg_version = (header_number & 0x00180000) >> 19 | |
if mpeg_version == 0b01: | |
# reserved value | |
continue | |
elif mpeg_version != 0b11: | |
# not MPEG1 (= not MP3) | |
continue | |
# MPEG Layer: 0bxxxxxxxx xxxxxLLx xxxxxxxx xxxxxxxx | |
mpeg_layer = (header_number & 0x00060000) >> 17 | |
if mpeg_layer == 0b00: | |
# reserved value | |
continue | |
elif mpeg_layer != 0b01: | |
# not Layer 3 (= not MP3) | |
continue | |
# CRC? 0bxxxxxxxx xxxxxxxC xxxxxxxx xxxxxxxx | |
has_crc = (((header_number & 0x00010000) >> 16) == 0b1) | |
# bitrate: 0bxxxxxxxx xxxxxxxx BBBBxxxx xxxxxxxx | |
bit_rate_index = (header_number & 0x0000F000) >> 12 | |
if bit_rate_index == 0b0000: | |
# weird bitrate | |
continue | |
elif bit_rate_index == 0b1111: | |
# reserved value | |
continue | |
bit_rate = mp3_bit_rates[bit_rate_index] | |
# sampling rate: 0bxxxxxxxx xxxxxxxx xxxxRRxx xxxxxxxx | |
sampling_rate_index = (header_number & 0x00000C00) >> 10 | |
if sampling_rate_index == 0b11: | |
# reserved value | |
continue | |
sampling_rate = mp3_sampling_rates[sampling_rate_index] | |
# padding? 0bxxxxxxxx xxxxxxxx xxxxxxPx xxxxxxxx | |
has_padding = (((header_number & 0x00000200) >> 9) == 0b1) | |
# private bit is not interesting | |
# channel mode: 0bxxxxxxxx xxxxxxxx xxxxxxxx CCxxxxxx | |
channel_mode = (header_number & 0x000000C0) >> 6 | |
# mode extension: 0bxxxxxxxx xxxxxxxx xxxxxxxx xxEExxxx | |
mode_extension = (header_number & 0x00000030) >> 4 | |
# copyright bit and original bit are not interesting | |
# emphasis: 0bxxxxxxxx xxxxxxxx xxxxxxxx xxxxxxEE | |
emphasis = (header_number & 0x00000003) | |
if emphasis == 0b10: | |
# reserved value | |
continue | |
# at this point, it's an MP3 file | |
is_mp3 = True | |
# calculate the frame length | |
frame_length = floor(144 * bit_rate / sampling_rate) | |
if has_padding: | |
frame_length += 1 | |
# ker-BLAM | |
last_mp3_bytes += my_bytes | |
last_mp3_bytes += stream.read(frame_length - 4) | |
# prepare for the next scan-read | |
my_bytes = b"\0" + stream.read(3) | |
if __name__ == '__main__': | |
import sys | |
for arg in sys.argv[1:]: | |
with open(arg, "rb") as f: | |
find_mp3(f, arg) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment