JoshRosen · January 20, 2025 02:39 · JoshRosen · Jan 20, 2025
diff --git a/strip_info.py b/strip_info.py
 import struct

 def is_mpeg2_l3_sync(header_int: int) -> bool:
    """
    Returns True if the top 11 bits = 0x7FF (frame sync),
    version_id == 2 (MPEG-2), and layer_index == 1 (Layer III).
    """
    # Frame sync check
    if ((header_int >> 21) & 0x7FF) != 0x7FF:
        return False

    version_id = (header_int >> 19) & 0x3  # 3=MPEG-1, 2=MPEG-2, 0=MPEG-2.5
    layer_index = (header_int >> 17) & 0x3 # 1=Layer III
    return (version_id == 2 and layer_index == 1)


 def strip_first_frame_if_info_mpeg2(data: bytes) -> bytes:
    """
    1) Verify offset 0 is a valid MPEG-2 Layer III frame.
    2) Scan forward to find the next valid MPEG-2 L3 sync, i.e. the start of the *second* frame.
    3) If the *first* frame (offset [0:next_sync]) contains 'Info', we remove it entirely 
       by slicing from next_sync onward.
    4) Otherwise, return the file as-is.

    Assumes:
      - offset 0 is indeed an MPEG-2 L3 header (no ID3 or junk).
      - we only care about dropping the first frame if it has 'Info'.
      - we do NOT handle MPEG-1 or 2.5 or ID3 in this snippet.
    """
    # Must have at least 4 bytes for the first header
    if len(data) < 4:
        return data

    # Check the first header
    first_header = struct.unpack('>I', data[:4])[0]
    if not is_mpeg2_l3_sync(first_header):
        # Not MPEG-2 L3 => do nothing
        return data

    # Find next (second) sync by scanning from offset=1 onward
    # This will define the boundary of the first frame.
    i = 1
    end = len(data) - 4
    while i <= end:
        cand_header = int.from_bytes(data[i:i+4], byteorder='big', signed=False)
        if is_mpeg2_l3_sync(cand_header):
            # Found the start of the second frame at offset i
            first_frame = data[:i]
            # Check if 'Info' is anywhere in that first frame
            if b'Info' in first_frame:
                # Drop the entire first frame
                return data[i:]
            else:
                # Keep everything
                return data
        i += 1

    # If we never found a second sync, do nothing
    return data



 def strip_info_inplace(mp3_buf: BytesIO) -> None:
    """
    Strips the first-frame 'Info' VBR header from the given mp3_buf IN PLACE (same BytesIO object).
    """
    # 1) Read the entire content
    mp3_buf.seek(0)
    original_data = mp3_buf.read()

    # 2) Strip the 'Info' VBR header from the first frame, if present
    stripped_data = strip_first_frame_if_info_mpeg2(original_data)

    # 3) Rewrite the BytesIO:
    mp3_buf.seek(0)
    mp3_buf.truncate(0)
    mp3_buf.write(stripped_data)
    # Optionally, seek back to start if you'd like to read from it later
    mp3_buf.seek(0)
	import struct

	def is_mpeg2_l3_sync(header_int: int) -> bool:
	"""
	Returns True if the top 11 bits = 0x7FF (frame sync),
	version_id == 2 (MPEG-2), and layer_index == 1 (Layer III).
	"""
	# Frame sync check
	if ((header_int >> 21) & 0x7FF) != 0x7FF:
	return False

	version_id = (header_int >> 19) & 0x3 # 3=MPEG-1, 2=MPEG-2, 0=MPEG-2.5
	layer_index = (header_int >> 17) & 0x3 # 1=Layer III
	return (version_id == 2 and layer_index == 1)


	def strip_first_frame_if_info_mpeg2(data: bytes) -> bytes:
	"""
	1) Verify offset 0 is a valid MPEG-2 Layer III frame.
	2) Scan forward to find the next valid MPEG-2 L3 sync, i.e. the start of the second frame.
	3) If the first frame (offset [0:next_sync]) contains 'Info', we remove it entirely
	by slicing from next_sync onward.
	4) Otherwise, return the file as-is.

	Assumes:
	- offset 0 is indeed an MPEG-2 L3 header (no ID3 or junk).
	- we only care about dropping the first frame if it has 'Info'.
	- we do NOT handle MPEG-1 or 2.5 or ID3 in this snippet.
	"""
	# Must have at least 4 bytes for the first header
	if len(data) < 4:
	return data

	# Check the first header
	first_header = struct.unpack('>I', data[:4])[0]
	if not is_mpeg2_l3_sync(first_header):
	# Not MPEG-2 L3 => do nothing
	return data

	# Find next (second) sync by scanning from offset=1 onward
	# This will define the boundary of the first frame.
	i = 1
	end = len(data) - 4
	while i <= end:
	cand_header = int.from_bytes(data[i:i+4], byteorder='big', signed=False)
	if is_mpeg2_l3_sync(cand_header):
	# Found the start of the second frame at offset i
	first_frame = data[:i]
	# Check if 'Info' is anywhere in that first frame
	if b'Info' in first_frame:
	# Drop the entire first frame
	return data[i:]
	else:
	# Keep everything
	return data
	i += 1

	# If we never found a second sync, do nothing
	return data



	def strip_info_inplace(mp3_buf: BytesIO) -> None:
	"""
	Strips the first-frame 'Info' VBR header from the given mp3_buf IN PLACE (same BytesIO object).
	"""
	# 1) Read the entire content
	mp3_buf.seek(0)
	original_data = mp3_buf.read()

	# 2) Strip the 'Info' VBR header from the first frame, if present
	stripped_data = strip_first_frame_if_info_mpeg2(original_data)

	# 3) Rewrite the BytesIO:
	mp3_buf.seek(0)
	mp3_buf.truncate(0)
	mp3_buf.write(stripped_data)
	# Optionally, seek back to start if you'd like to read from it later
	mp3_buf.seek(0)