Skip to content

Instantly share code, notes, and snippets.

@JoshRosen
Last active January 20, 2025 02:39
Show Gist options
  • Save JoshRosen/41bc11f1befae4e55d2fa16a90499f6a to your computer and use it in GitHub Desktop.
Save JoshRosen/41bc11f1befae4e55d2fa16a90499f6a to your computer and use it in GitHub Desktop.
ChatGPT o1 generated code for dropping Info frames from MP3 files; largely untested and over-simplified, not production grade, YMMV, etc. See https://github.com/remsky/Kokoro-FastAPI/issues/57#issuecomment-2601202635 for motivating context
import struct
def is_mpeg2_l3_sync(header_int: int) -> bool:
"""
Returns True if the top 11 bits = 0x7FF (frame sync),
version_id == 2 (MPEG-2), and layer_index == 1 (Layer III).
"""
# Frame sync check
if ((header_int >> 21) & 0x7FF) != 0x7FF:
return False
version_id = (header_int >> 19) & 0x3 # 3=MPEG-1, 2=MPEG-2, 0=MPEG-2.5
layer_index = (header_int >> 17) & 0x3 # 1=Layer III
return (version_id == 2 and layer_index == 1)
def strip_first_frame_if_info_mpeg2(data: bytes) -> bytes:
"""
1) Verify offset 0 is a valid MPEG-2 Layer III frame.
2) Scan forward to find the next valid MPEG-2 L3 sync, i.e. the start of the *second* frame.
3) If the *first* frame (offset [0:next_sync]) contains 'Info', we remove it entirely
by slicing from next_sync onward.
4) Otherwise, return the file as-is.
Assumes:
- offset 0 is indeed an MPEG-2 L3 header (no ID3 or junk).
- we only care about dropping the first frame if it has 'Info'.
- we do NOT handle MPEG-1 or 2.5 or ID3 in this snippet.
"""
# Must have at least 4 bytes for the first header
if len(data) < 4:
return data
# Check the first header
first_header = struct.unpack('>I', data[:4])[0]
if not is_mpeg2_l3_sync(first_header):
# Not MPEG-2 L3 => do nothing
return data
# Find next (second) sync by scanning from offset=1 onward
# This will define the boundary of the first frame.
i = 1
end = len(data) - 4
while i <= end:
cand_header = int.from_bytes(data[i:i+4], byteorder='big', signed=False)
if is_mpeg2_l3_sync(cand_header):
# Found the start of the second frame at offset i
first_frame = data[:i]
# Check if 'Info' is anywhere in that first frame
if b'Info' in first_frame:
# Drop the entire first frame
return data[i:]
else:
# Keep everything
return data
i += 1
# If we never found a second sync, do nothing
return data
def strip_info_inplace(mp3_buf: BytesIO) -> None:
"""
Strips the first-frame 'Info' VBR header from the given mp3_buf IN PLACE (same BytesIO object).
"""
# 1) Read the entire content
mp3_buf.seek(0)
original_data = mp3_buf.read()
# 2) Strip the 'Info' VBR header from the first frame, if present
stripped_data = strip_first_frame_if_info_mpeg2(original_data)
# 3) Rewrite the BytesIO:
mp3_buf.seek(0)
mp3_buf.truncate(0)
mp3_buf.write(stripped_data)
# Optionally, seek back to start if you'd like to read from it later
mp3_buf.seek(0)
@JoshRosen
Copy link
Author

JoshRosen commented Jan 20, 2025

Code above was generated by ChatGPT o1 with essentially no edits from me. Comments, etc., are reflective of its code style, not mine.

⚠️ this is likely buggy and likely does not properly implement the spec. This is shared only for the sake of illustrating the general directional viability of a "drop the info frame" approach and should NOT be interpreted as a correct or valid way to do this (audio stuff is outside my core areas of expertise).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment