Skip to content

Instantly share code, notes, and snippets.

@Sunchock
Last active April 13, 2024 01:14
Show Gist options
  • Save Sunchock/285b4d17561126a508086aaa2243218a to your computer and use it in GitHub Desktop.
Save Sunchock/285b4d17561126a508086aaa2243218a to your computer and use it in GitHub Desktop.
A python module to help parsing PNG
#!/usr/bin/env python3
# This file represents the PNG file structure and its chunks
# Based on the PNG file format specification: https://www.w3.org/TR/PNG/
# NB: Some chunks are incomplete, feel free to complete them
#TODO: complete some chunks
PNG_SIGNATURE: bytes = b'\x89PNG\r\n\x1a\n'
#region CRC computation
CRC_TABLE: list[int] = [0] * 256
for n in range(256):
c = n
for _ in range(8):
if c & 1:
c = 0xEDB88320 ^ (c >> 1)
else:
c >>= 1
CRC_TABLE[n] = c
def crc(data: bytes) -> bytes:
crc = 0xFFFFFFFF
for byte in data:
crc = CRC_TABLE[(crc ^ byte) & 0xFF] ^ (crc >> 8)
return int(crc ^ 0xFFFFFFFF).to_bytes(4, byteorder='big')
#endregion
#region Base PNG Chunk
"""
PNG Chunk structure
"""
class PNGChunk:
chunk_length: int # 4 bytes
chunk_type: str # 4 bytes
chunk_data: bytes # chunk_length bytes
chunk_CRC: bytes # 4 bytes
def __init__(self, chunk: bytes) -> None:
self.chunk_length = int.from_bytes(chunk[:4], byteorder='big')
try:
self.chunk_type = chunk[4:8].decode('utf-8')
except UnicodeDecodeError:
self.chunk_type = "Unknown"
raise ValueError("Unable to decode chunk type", chunk[4:8])
self.chunk_data = chunk[8:8 + self.chunk_length]
self.chunk_CRC = chunk[8 + self.chunk_length:8 + self.chunk_length + 4]
def __bytes__(self) -> bytes:
bytesdata: bytes = self.chunk_length.to_bytes(4, byteorder='big')
bytesdata += self.chunk_type.encode('utf-8')
bytesdata += self.chunk_data
bytesdata += self.chunk_CRC
return bytesdata
def __str__(self) -> str:
retstr = f"Chunk type: {self.chunk_type}"
retstr += f", length: {self.chunk_length}"
retstr += f", data: {self.chunk_data}"
retstr += f", CRC: {self.chunk_CRC}"
return retstr
"""
Check if the chunk has valid values
"""
def check_format(self) -> bool:
if self.chunk_length < 0 or self.chunk_length > 2 ** 31 - 1:
print("Invalid chunk length")
return False
for byte in self.chunk_type:
if not ((byte >= '\x41' and byte <= '\x5A') or (byte >= '\x61' and byte <= '\x7A')):
print("Invalid chunk type")
return False
if len(self.chunk_data) != self.chunk_length:
print("Invalid chunk data length")
return False
if self.chunk_CRC != crc(self.chunk_type.encode('utf-8') + self.chunk_data):
print("Invalid CRC")
return False
return True
#endregion
#region Mandatory PNG Chunks
"""
IHDR Chunk structure
"""
class IHDRChunk(PNGChunk):
width: int
height: int
depth: int
color_type: int
compression: int
filter: int
interlace: int
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.width = int.from_bytes(self.chunk_data[:4], byteorder='big')
self.height = int.from_bytes(self.chunk_data[4:8], byteorder='big')
self.depth = self.chunk_data[8]
self.color_type = self.chunk_data[9]
self.compression = self.chunk_data[10]
self.filter = self.chunk_data[11]
self.interlace = self.chunk_data[12]
def __str__(self) -> str:
retstr = f"IHDR: width: {self.width}"
retstr += f", height: {self.height}"
retstr += f", depth: {self.depth}"
retstr += f", color_type: {self.color_type}"
retstr += f", compression: {self.compression}"
retstr += f", filter: {self.filter}"
retstr += f", interlace: {self.interlace}"
return retstr
"""
Check if the chunk has valid values
"""
def check_format(self) -> bool:
if self.width < 1 or self.height < 1:
print("Invalid image size")
return False
if self.depth not in [1, 2, 4, 8, 16]:
print("Invalid bit depth")
return False
if self.color_type not in [0, 2, 3, 4, 6]:
print("Invalid color type")
return False
if self.compression != 0:
print("Invalid compression method")
return False
if self.filter != 0:
print("Invalid filter method")
return False
if self.interlace not in [0, 1]:
print("Invalid interlace method")
return False
return True
"""
IDAT Chunk structure
"""
class IDATChunk(PNGChunk):
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
"""
IEND Chunk structure
"""
class IENDChunk(PNGChunk):
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
#endregion
#region Optional PNG Chunks
"""
bKGD Chunk structure
"""
class bKGDChunk(PNGChunk):
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
"""
cHRM Chunk structure
"""
class cHRMChunk(PNGChunk):
white_point_x: int
white_point_y: int
red_x: int
red_y: int
green_x: int
green_y: int
blue_x: int
blue_y: int
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.white_point_x = int.from_bytes(self.chunk_data[:4], byteorder='big')
self.white_point_y = int.from_bytes(self.chunk_data[4:8], byteorder='big')
self.red_x = int.from_bytes(self.chunk_data[8:12], byteorder='big')
self.red_y = int.from_bytes(self.chunk_data[12:16], byteorder='big')
self.green_x = int.from_bytes(self.chunk_data[16:20], byteorder='big')
self.green_y = int.from_bytes(self.chunk_data[20:24], byteorder='big')
self.blue_x = int.from_bytes(self.chunk_data[24:28], byteorder='big')
self.blue_y = int.from_bytes(self.chunk_data[28:32], byteorder='big')
def __str__(self) -> str:
retstr = f"cHRM: white_point_x: {self.white_point_x}"
retstr += f", white_point_y: {self.white_point_y}"
retstr += f", red_x: {self.red_x}"
retstr += f", red_y: {self.red_y}"
retstr += f", green_x: {self.green_x}"
retstr += f", green_y: {self.green_y}"
retstr += f", blue_x: {self.blue_x}"
retstr += f", blue_y: {self.blue_y}"
return retstr
"""
gAMA Chunk structure
"""
class gAMAChunk(PNGChunk):
gamma: int
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.gamma = int.from_bytes(self.chunk_data, byteorder='big')
def __str__(self) -> str:
return f"gAMA: gamma: {self.gamma}"
"""
Check if the chunk has valid values
"""
def check_format(self) -> bool:
if self.gamma < 1 or self.gamma > 100000:
print("Invalid gamma value")
return False
return True
"""
iCCP Chunk structure
"""
class iCCPChunk(PNGChunk):
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
"""
iTXt Chunk structure
"""
class iTXtChunk(PNGChunk):
keyword: str
compressed_flag: int
compression_method: int
language_tag: str
translated_keyword: str
text: str
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
raw_data = self.chunk_data.split(b'\x00')
self.keyword = raw_data[0].decode('utf-8')
self.compressed_flag = self.chunk_data[len(self.keyword) + 1]
self.compression_method = self.chunk_data[len(self.keyword) + 2]
try:
self.language_tag = raw_data[1].decode('utf-8')
self.translated_keyword = raw_data[2].decode('utf-8')
self.text = self.chunk_data[len(self.keyword) + 1:].decode('utf-8')
except UnicodeDecodeError:
raise ValueError("iTXtChunk: Unable to decode text")
def __str__(self) -> str:
retstr = f"iTXt: keyword: {self.keyword}"
retstr += f", compressed_flag: {self.compressed_flag}"
retstr += f", compression_method: {self.compression_method}"
retstr += f", language_tag: {self.language_tag}"
retstr += f", translated_keyword: {self.translated_keyword}"
retstr += f", text: {self.text}"
return retstr
"""
pHYs Chunk structure
"""
class pHYsChunk(PNGChunk):
pixels_per_unit_x: int
pixels_per_unit_y: int
unit_specifier: int
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.pixels_per_unit_x = int.from_bytes(self.chunk_data[:4], byteorder='big')
self.pixels_per_unit_y = int.from_bytes(self.chunk_data[4:8], byteorder='big')
self.unit_specifier = self.chunk_data[8]
def __str__(self) -> str:
retstr = f"pHYs: pixels_per_unit_x: {self.pixels_per_unit_x}"
retstr += f", pixels_per_unit_y: {self.pixels_per_unit_y}"
retstr += f", unit_specifier: {self.unit_specifier}"
return retstr
"""
Check if the chunk has valid values
"""
def check_format(self) -> bool:
if self.pixels_per_unit_x < 1 or self.pixels_per_unit_y < 1:
print("Invalid pixels per unit")
return False
if self.unit_specifier not in [0, 1]:
print("Invalid unit specifier")
return False
return True
"""
sRGB Chunk structure
"""
class sRGBChunk(PNGChunk):
rendering_intent: int
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.rendering_intent = self.chunk_data[0]
def __str__(self) -> str:
return f"sRGB: rendering_intent: {self.rendering_intent}"
"""
tEXt Chunk structure
"""
class tEXtChunk(PNGChunk):
keyword: str
text: str
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.keyword = self.chunk_data.split(b'\x00')[0].decode('utf-8')
try:
self.text = self.chunk_data[len(self.keyword) + 1:].decode('iso-8859-1')
except UnicodeDecodeError:
raise ValueError('tEXtChunk: Unable to decode text')
def __str__(self) -> str:
return f"tEXt: keyword: {self.keyword}, text: {self.text}"
"""
tIME Chunk structure
"""
class tIMEChunk(PNGChunk):
year: int
month: int
day: int
hour: int
minute: int
second: int
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.year = int.from_bytes(self.chunk_data[:2], byteorder='big')
self.month = self.chunk_data[2]
self.day = self.chunk_data[3]
self.hour = self.chunk_data[4]
self.minute = self.chunk_data[5]
self.second = self.chunk_data[6]
def __str__(self) -> str:
retstr = f"tIME: year: {self.year}"
retstr += f", month: {self.month}"
retstr += f", day: {self.day}"
retstr += f", hour: {self.hour}"
retstr += f", minute: {self.minute}"
retstr += f", second: {self.second}"
return retstr
"""
tpNG Chunk structure
"""
class tpNGChunk(PNGChunk):
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
"""
zTXt Chunk structure
"""
class zTXtChunk(PNGChunk):
keyword: str
compression_method: int
compressed_text: str
def __init__(self, chunk: bytes) -> None:
super().__init__(chunk)
self.keyword = self.chunk_data.split(b'\x00')[0].decode('utf-8')
self.compression_method = self.chunk_data[len(self.keyword) + 1]
try:
self.text = self.chunk_data[len(self.keyword) + 2:].decode('iso-8859-1')
except UnicodeDecodeError:
self.text = ""
raise ValueError('zTXtChunk: Unable to decode text')
def __str__(self) -> str:
retstr = f"zTXt: keyword: {self.keyword}"
retstr += f", compression_method: {self.compression_method}"
retstr += f", text: {self.text}"
return retstr
#endregion
#region PNG Structure
"""
PNG file structure
"""
class PNGFile:
signature: bytes
IHDR: IHDRChunk # Entête de 25 octets
optional_chunks: list[PNGChunk] # Optional chunks
IDAT: list[IDATChunk] # Données compressées, longueur variable
IEND: IENDChunk # Fin de l'image, 12 octets
def __init__(self) -> None:
self.signature = PNG_SIGNATURE
self.optional_chunks = []
self.IDAT = []
"""
Create a PNGFile object from a PNG file bytes
"""
@classmethod
def create_from_bytes(cls, bytes_data: bytes) -> 'PNGFile':
if bytes_data[:8] != PNG_SIGNATURE:
raise ValueError("Invalid PNG signature")
obj = cls()
obj.parse_bytes(bytes_data[8:])
return obj
"""
Create a PNGFile object from a list of PNG chunks
"""
@classmethod
def create_from_chunks(cls, chunks: list[PNGChunk]) -> 'PNGFile':
obj = cls()
for chunk in chunks:
match chunk.chunk_type:
case 'IHDR':
obj.IHDR = IHDRChunk(bytes(chunk))
case 'IDAT':
obj.IDAT.append(IDATChunk(bytes(chunk)))
case 'IEND':
obj.IEND = IENDChunk(bytes(chunk))
case _:
obj.optional_chunks.append(chunk)
return obj
"""
Parse PNG file bytes to extract PNG chunks
"""
def parse_bytes(self, bytes_data: bytes) -> None:
while len(bytes_data) > 0:
chunk = PNGChunk(bytes_data)
match chunk.chunk_type:
# Mandatory chunks
case 'IHDR':
self.IHDR = IHDRChunk(bytes_data)
case 'IDAT':
self.IDAT.append(IDATChunk(bytes_data))
case 'IEND':
self.IEND = IENDChunk(bytes_data)
# Optional chunks
case 'bKGD':
self.optional_chunks.append(bKGDChunk(bytes_data))
case 'cHRM':
self.optional_chunks.append(cHRMChunk(bytes_data))
case 'gAMA':
self.optional_chunks.append(gAMAChunk(bytes_data))
case 'iCCP':
self.optional_chunks.append(iCCPChunk(bytes_data))
case 'iTXt':
self.optional_chunks.append(iTXtChunk(bytes_data))
case 'pHYs':
self.optional_chunks.append(pHYsChunk(bytes_data))
case 'sRGB':
self.optional_chunks.append(sRGBChunk(bytes_data))
case 'tEXt':
self.optional_chunks.append(tEXtChunk(bytes_data))
case 'tIME':
self.optional_chunks.append(tIMEChunk(bytes_data))
case 'tpNG':
self.optional_chunks.append(tpNGChunk(bytes_data))
case 'zTXt':
self.optional_chunks.append(zTXtChunk(bytes_data))
case _:
# NB: I added this `if` because I had a PNG file ending with 2 NULL bytes, don't know why
# Skip empty chunks
if len(chunk.chunk_type) != 0:
self.optional_chunks.append(chunk)
print("Unknown chunk type was added:", chunk.chunk_type)
# Skip to next chunk
bytes_data = bytes_data[12 + chunk.chunk_length:]
"""
Checks if this object has the minimum chunks required for the PNG standard
Returns True if the PNG is valid, False otherwise
"""
def is_png_valid(self) -> bool:
try:
self.IHDR
self.IEND
except AttributeError:
return False
return len(self.IDAT) > 0
"""
Write this PNG oject to a file
"""
def to_file(self, output_path:str) -> None:
# Check for mandatory chunks
if not self.is_png_valid():
raise ValueError("PNG format incorrect")
# Write to file
with open(output_path, 'wb') as ofile:
ofile.write(self.signature)
ofile.write(bytes(self.IHDR))
for chunk in self.optional_chunks:
ofile.write(bytes(chunk))
for chunk in self.IDAT:
ofile.write(bytes(chunk))
ofile.write(bytes(self.IEND))
#endregion
@Sunchock
Copy link
Author

Sunchock commented Apr 7, 2024

V2 Changelog:

  • Add support for multiple IDAT chunks for bigger PNG files (max IDAT chunk size is 65,535 bytes)
  • Add some usefull check_format functions to few chunks based on my needs
  • Fix some typos

@Sunchock
Copy link
Author

V3 Changelog:

  • Add CRC check
  • Add classmethods to create PNG with a list of chunks or bytes data
  • Add more comments
  • Improve error handling
  • Remove useless types convertions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment