Last active
January 20, 2025 02:39
-
-
Save JoshRosen/41bc11f1befae4e55d2fa16a90499f6a to your computer and use it in GitHub Desktop.
ChatGPT o1 generated code for dropping Info frames from MP3 files; largely untested and over-simplified, not production grade, YMMV, etc. See https://github.com/remsky/Kokoro-FastAPI/issues/57#issuecomment-2601202635 for motivating context
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
def is_mpeg2_l3_sync(header_int: int) -> bool: | |
""" | |
Returns True if the top 11 bits = 0x7FF (frame sync), | |
version_id == 2 (MPEG-2), and layer_index == 1 (Layer III). | |
""" | |
# Frame sync check | |
if ((header_int >> 21) & 0x7FF) != 0x7FF: | |
return False | |
version_id = (header_int >> 19) & 0x3 # 3=MPEG-1, 2=MPEG-2, 0=MPEG-2.5 | |
layer_index = (header_int >> 17) & 0x3 # 1=Layer III | |
return (version_id == 2 and layer_index == 1) | |
def strip_first_frame_if_info_mpeg2(data: bytes) -> bytes: | |
""" | |
1) Verify offset 0 is a valid MPEG-2 Layer III frame. | |
2) Scan forward to find the next valid MPEG-2 L3 sync, i.e. the start of the *second* frame. | |
3) If the *first* frame (offset [0:next_sync]) contains 'Info', we remove it entirely | |
by slicing from next_sync onward. | |
4) Otherwise, return the file as-is. | |
Assumes: | |
- offset 0 is indeed an MPEG-2 L3 header (no ID3 or junk). | |
- we only care about dropping the first frame if it has 'Info'. | |
- we do NOT handle MPEG-1 or 2.5 or ID3 in this snippet. | |
""" | |
# Must have at least 4 bytes for the first header | |
if len(data) < 4: | |
return data | |
# Check the first header | |
first_header = struct.unpack('>I', data[:4])[0] | |
if not is_mpeg2_l3_sync(first_header): | |
# Not MPEG-2 L3 => do nothing | |
return data | |
# Find next (second) sync by scanning from offset=1 onward | |
# This will define the boundary of the first frame. | |
i = 1 | |
end = len(data) - 4 | |
while i <= end: | |
cand_header = int.from_bytes(data[i:i+4], byteorder='big', signed=False) | |
if is_mpeg2_l3_sync(cand_header): | |
# Found the start of the second frame at offset i | |
first_frame = data[:i] | |
# Check if 'Info' is anywhere in that first frame | |
if b'Info' in first_frame: | |
# Drop the entire first frame | |
return data[i:] | |
else: | |
# Keep everything | |
return data | |
i += 1 | |
# If we never found a second sync, do nothing | |
return data | |
def strip_info_inplace(mp3_buf: BytesIO) -> None: | |
""" | |
Strips the first-frame 'Info' VBR header from the given mp3_buf IN PLACE (same BytesIO object). | |
""" | |
# 1) Read the entire content | |
mp3_buf.seek(0) | |
original_data = mp3_buf.read() | |
# 2) Strip the 'Info' VBR header from the first frame, if present | |
stripped_data = strip_first_frame_if_info_mpeg2(original_data) | |
# 3) Rewrite the BytesIO: | |
mp3_buf.seek(0) | |
mp3_buf.truncate(0) | |
mp3_buf.write(stripped_data) | |
# Optionally, seek back to start if you'd like to read from it later | |
mp3_buf.seek(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Code above was generated by ChatGPT o1 with essentially no edits from me. Comments, etc., are reflective of its code style, not mine.