Last active
April 13, 2024 01:14
-
-
Save Sunchock/285b4d17561126a508086aaa2243218a to your computer and use it in GitHub Desktop.
A python module to help parsing PNG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# This file represents the PNG file structure and its chunks | |
# Based on the PNG file format specification: https://www.w3.org/TR/PNG/ | |
# NB: Some chunks are incomplete, feel free to complete them | |
#TODO: complete some chunks | |
PNG_SIGNATURE: bytes = b'\x89PNG\r\n\x1a\n' | |
#region CRC computation | |
CRC_TABLE: list[int] = [0] * 256 | |
for n in range(256): | |
c = n | |
for _ in range(8): | |
if c & 1: | |
c = 0xEDB88320 ^ (c >> 1) | |
else: | |
c >>= 1 | |
CRC_TABLE[n] = c | |
def crc(data: bytes) -> bytes: | |
crc = 0xFFFFFFFF | |
for byte in data: | |
crc = CRC_TABLE[(crc ^ byte) & 0xFF] ^ (crc >> 8) | |
return int(crc ^ 0xFFFFFFFF).to_bytes(4, byteorder='big') | |
#endregion | |
#region Base PNG Chunk | |
""" | |
PNG Chunk structure | |
""" | |
class PNGChunk: | |
chunk_length: int # 4 bytes | |
chunk_type: str # 4 bytes | |
chunk_data: bytes # chunk_length bytes | |
chunk_CRC: bytes # 4 bytes | |
def __init__(self, chunk: bytes) -> None: | |
self.chunk_length = int.from_bytes(chunk[:4], byteorder='big') | |
try: | |
self.chunk_type = chunk[4:8].decode('utf-8') | |
except UnicodeDecodeError: | |
self.chunk_type = "Unknown" | |
raise ValueError("Unable to decode chunk type", chunk[4:8]) | |
self.chunk_data = chunk[8:8 + self.chunk_length] | |
self.chunk_CRC = chunk[8 + self.chunk_length:8 + self.chunk_length + 4] | |
def __bytes__(self) -> bytes: | |
bytesdata: bytes = self.chunk_length.to_bytes(4, byteorder='big') | |
bytesdata += self.chunk_type.encode('utf-8') | |
bytesdata += self.chunk_data | |
bytesdata += self.chunk_CRC | |
return bytesdata | |
def __str__(self) -> str: | |
retstr = f"Chunk type: {self.chunk_type}" | |
retstr += f", length: {self.chunk_length}" | |
retstr += f", data: {self.chunk_data}" | |
retstr += f", CRC: {self.chunk_CRC}" | |
return retstr | |
""" | |
Check if the chunk has valid values | |
""" | |
def check_format(self) -> bool: | |
if self.chunk_length < 0 or self.chunk_length > 2 ** 31 - 1: | |
print("Invalid chunk length") | |
return False | |
for byte in self.chunk_type: | |
if not ((byte >= '\x41' and byte <= '\x5A') or (byte >= '\x61' and byte <= '\x7A')): | |
print("Invalid chunk type") | |
return False | |
if len(self.chunk_data) != self.chunk_length: | |
print("Invalid chunk data length") | |
return False | |
if self.chunk_CRC != crc(self.chunk_type.encode('utf-8') + self.chunk_data): | |
print("Invalid CRC") | |
return False | |
return True | |
#endregion | |
#region Mandatory PNG Chunks | |
""" | |
IHDR Chunk structure | |
""" | |
class IHDRChunk(PNGChunk): | |
width: int | |
height: int | |
depth: int | |
color_type: int | |
compression: int | |
filter: int | |
interlace: int | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.width = int.from_bytes(self.chunk_data[:4], byteorder='big') | |
self.height = int.from_bytes(self.chunk_data[4:8], byteorder='big') | |
self.depth = self.chunk_data[8] | |
self.color_type = self.chunk_data[9] | |
self.compression = self.chunk_data[10] | |
self.filter = self.chunk_data[11] | |
self.interlace = self.chunk_data[12] | |
def __str__(self) -> str: | |
retstr = f"IHDR: width: {self.width}" | |
retstr += f", height: {self.height}" | |
retstr += f", depth: {self.depth}" | |
retstr += f", color_type: {self.color_type}" | |
retstr += f", compression: {self.compression}" | |
retstr += f", filter: {self.filter}" | |
retstr += f", interlace: {self.interlace}" | |
return retstr | |
""" | |
Check if the chunk has valid values | |
""" | |
def check_format(self) -> bool: | |
if self.width < 1 or self.height < 1: | |
print("Invalid image size") | |
return False | |
if self.depth not in [1, 2, 4, 8, 16]: | |
print("Invalid bit depth") | |
return False | |
if self.color_type not in [0, 2, 3, 4, 6]: | |
print("Invalid color type") | |
return False | |
if self.compression != 0: | |
print("Invalid compression method") | |
return False | |
if self.filter != 0: | |
print("Invalid filter method") | |
return False | |
if self.interlace not in [0, 1]: | |
print("Invalid interlace method") | |
return False | |
return True | |
""" | |
IDAT Chunk structure | |
""" | |
class IDATChunk(PNGChunk): | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
""" | |
IEND Chunk structure | |
""" | |
class IENDChunk(PNGChunk): | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
#endregion | |
#region Optional PNG Chunks | |
""" | |
bKGD Chunk structure | |
""" | |
class bKGDChunk(PNGChunk): | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
""" | |
cHRM Chunk structure | |
""" | |
class cHRMChunk(PNGChunk): | |
white_point_x: int | |
white_point_y: int | |
red_x: int | |
red_y: int | |
green_x: int | |
green_y: int | |
blue_x: int | |
blue_y: int | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.white_point_x = int.from_bytes(self.chunk_data[:4], byteorder='big') | |
self.white_point_y = int.from_bytes(self.chunk_data[4:8], byteorder='big') | |
self.red_x = int.from_bytes(self.chunk_data[8:12], byteorder='big') | |
self.red_y = int.from_bytes(self.chunk_data[12:16], byteorder='big') | |
self.green_x = int.from_bytes(self.chunk_data[16:20], byteorder='big') | |
self.green_y = int.from_bytes(self.chunk_data[20:24], byteorder='big') | |
self.blue_x = int.from_bytes(self.chunk_data[24:28], byteorder='big') | |
self.blue_y = int.from_bytes(self.chunk_data[28:32], byteorder='big') | |
def __str__(self) -> str: | |
retstr = f"cHRM: white_point_x: {self.white_point_x}" | |
retstr += f", white_point_y: {self.white_point_y}" | |
retstr += f", red_x: {self.red_x}" | |
retstr += f", red_y: {self.red_y}" | |
retstr += f", green_x: {self.green_x}" | |
retstr += f", green_y: {self.green_y}" | |
retstr += f", blue_x: {self.blue_x}" | |
retstr += f", blue_y: {self.blue_y}" | |
return retstr | |
""" | |
gAMA Chunk structure | |
""" | |
class gAMAChunk(PNGChunk): | |
gamma: int | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.gamma = int.from_bytes(self.chunk_data, byteorder='big') | |
def __str__(self) -> str: | |
return f"gAMA: gamma: {self.gamma}" | |
""" | |
Check if the chunk has valid values | |
""" | |
def check_format(self) -> bool: | |
if self.gamma < 1 or self.gamma > 100000: | |
print("Invalid gamma value") | |
return False | |
return True | |
""" | |
iCCP Chunk structure | |
""" | |
class iCCPChunk(PNGChunk): | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
""" | |
iTXt Chunk structure | |
""" | |
class iTXtChunk(PNGChunk): | |
keyword: str | |
compressed_flag: int | |
compression_method: int | |
language_tag: str | |
translated_keyword: str | |
text: str | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
raw_data = self.chunk_data.split(b'\x00') | |
self.keyword = raw_data[0].decode('utf-8') | |
self.compressed_flag = self.chunk_data[len(self.keyword) + 1] | |
self.compression_method = self.chunk_data[len(self.keyword) + 2] | |
try: | |
self.language_tag = raw_data[1].decode('utf-8') | |
self.translated_keyword = raw_data[2].decode('utf-8') | |
self.text = self.chunk_data[len(self.keyword) + 1:].decode('utf-8') | |
except UnicodeDecodeError: | |
raise ValueError("iTXtChunk: Unable to decode text") | |
def __str__(self) -> str: | |
retstr = f"iTXt: keyword: {self.keyword}" | |
retstr += f", compressed_flag: {self.compressed_flag}" | |
retstr += f", compression_method: {self.compression_method}" | |
retstr += f", language_tag: {self.language_tag}" | |
retstr += f", translated_keyword: {self.translated_keyword}" | |
retstr += f", text: {self.text}" | |
return retstr | |
""" | |
pHYs Chunk structure | |
""" | |
class pHYsChunk(PNGChunk): | |
pixels_per_unit_x: int | |
pixels_per_unit_y: int | |
unit_specifier: int | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.pixels_per_unit_x = int.from_bytes(self.chunk_data[:4], byteorder='big') | |
self.pixels_per_unit_y = int.from_bytes(self.chunk_data[4:8], byteorder='big') | |
self.unit_specifier = self.chunk_data[8] | |
def __str__(self) -> str: | |
retstr = f"pHYs: pixels_per_unit_x: {self.pixels_per_unit_x}" | |
retstr += f", pixels_per_unit_y: {self.pixels_per_unit_y}" | |
retstr += f", unit_specifier: {self.unit_specifier}" | |
return retstr | |
""" | |
Check if the chunk has valid values | |
""" | |
def check_format(self) -> bool: | |
if self.pixels_per_unit_x < 1 or self.pixels_per_unit_y < 1: | |
print("Invalid pixels per unit") | |
return False | |
if self.unit_specifier not in [0, 1]: | |
print("Invalid unit specifier") | |
return False | |
return True | |
""" | |
sRGB Chunk structure | |
""" | |
class sRGBChunk(PNGChunk): | |
rendering_intent: int | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.rendering_intent = self.chunk_data[0] | |
def __str__(self) -> str: | |
return f"sRGB: rendering_intent: {self.rendering_intent}" | |
""" | |
tEXt Chunk structure | |
""" | |
class tEXtChunk(PNGChunk): | |
keyword: str | |
text: str | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.keyword = self.chunk_data.split(b'\x00')[0].decode('utf-8') | |
try: | |
self.text = self.chunk_data[len(self.keyword) + 1:].decode('iso-8859-1') | |
except UnicodeDecodeError: | |
raise ValueError('tEXtChunk: Unable to decode text') | |
def __str__(self) -> str: | |
return f"tEXt: keyword: {self.keyword}, text: {self.text}" | |
""" | |
tIME Chunk structure | |
""" | |
class tIMEChunk(PNGChunk): | |
year: int | |
month: int | |
day: int | |
hour: int | |
minute: int | |
second: int | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.year = int.from_bytes(self.chunk_data[:2], byteorder='big') | |
self.month = self.chunk_data[2] | |
self.day = self.chunk_data[3] | |
self.hour = self.chunk_data[4] | |
self.minute = self.chunk_data[5] | |
self.second = self.chunk_data[6] | |
def __str__(self) -> str: | |
retstr = f"tIME: year: {self.year}" | |
retstr += f", month: {self.month}" | |
retstr += f", day: {self.day}" | |
retstr += f", hour: {self.hour}" | |
retstr += f", minute: {self.minute}" | |
retstr += f", second: {self.second}" | |
return retstr | |
""" | |
tpNG Chunk structure | |
""" | |
class tpNGChunk(PNGChunk): | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
""" | |
zTXt Chunk structure | |
""" | |
class zTXtChunk(PNGChunk): | |
keyword: str | |
compression_method: int | |
compressed_text: str | |
def __init__(self, chunk: bytes) -> None: | |
super().__init__(chunk) | |
self.keyword = self.chunk_data.split(b'\x00')[0].decode('utf-8') | |
self.compression_method = self.chunk_data[len(self.keyword) + 1] | |
try: | |
self.text = self.chunk_data[len(self.keyword) + 2:].decode('iso-8859-1') | |
except UnicodeDecodeError: | |
self.text = "" | |
raise ValueError('zTXtChunk: Unable to decode text') | |
def __str__(self) -> str: | |
retstr = f"zTXt: keyword: {self.keyword}" | |
retstr += f", compression_method: {self.compression_method}" | |
retstr += f", text: {self.text}" | |
return retstr | |
#endregion | |
#region PNG Structure | |
""" | |
PNG file structure | |
""" | |
class PNGFile: | |
signature: bytes | |
IHDR: IHDRChunk # Entête de 25 octets | |
optional_chunks: list[PNGChunk] # Optional chunks | |
IDAT: list[IDATChunk] # Données compressées, longueur variable | |
IEND: IENDChunk # Fin de l'image, 12 octets | |
def __init__(self) -> None: | |
self.signature = PNG_SIGNATURE | |
self.optional_chunks = [] | |
self.IDAT = [] | |
""" | |
Create a PNGFile object from a PNG file bytes | |
""" | |
@classmethod | |
def create_from_bytes(cls, bytes_data: bytes) -> 'PNGFile': | |
if bytes_data[:8] != PNG_SIGNATURE: | |
raise ValueError("Invalid PNG signature") | |
obj = cls() | |
obj.parse_bytes(bytes_data[8:]) | |
return obj | |
""" | |
Create a PNGFile object from a list of PNG chunks | |
""" | |
@classmethod | |
def create_from_chunks(cls, chunks: list[PNGChunk]) -> 'PNGFile': | |
obj = cls() | |
for chunk in chunks: | |
match chunk.chunk_type: | |
case 'IHDR': | |
obj.IHDR = IHDRChunk(bytes(chunk)) | |
case 'IDAT': | |
obj.IDAT.append(IDATChunk(bytes(chunk))) | |
case 'IEND': | |
obj.IEND = IENDChunk(bytes(chunk)) | |
case _: | |
obj.optional_chunks.append(chunk) | |
return obj | |
""" | |
Parse PNG file bytes to extract PNG chunks | |
""" | |
def parse_bytes(self, bytes_data: bytes) -> None: | |
while len(bytes_data) > 0: | |
chunk = PNGChunk(bytes_data) | |
match chunk.chunk_type: | |
# Mandatory chunks | |
case 'IHDR': | |
self.IHDR = IHDRChunk(bytes_data) | |
case 'IDAT': | |
self.IDAT.append(IDATChunk(bytes_data)) | |
case 'IEND': | |
self.IEND = IENDChunk(bytes_data) | |
# Optional chunks | |
case 'bKGD': | |
self.optional_chunks.append(bKGDChunk(bytes_data)) | |
case 'cHRM': | |
self.optional_chunks.append(cHRMChunk(bytes_data)) | |
case 'gAMA': | |
self.optional_chunks.append(gAMAChunk(bytes_data)) | |
case 'iCCP': | |
self.optional_chunks.append(iCCPChunk(bytes_data)) | |
case 'iTXt': | |
self.optional_chunks.append(iTXtChunk(bytes_data)) | |
case 'pHYs': | |
self.optional_chunks.append(pHYsChunk(bytes_data)) | |
case 'sRGB': | |
self.optional_chunks.append(sRGBChunk(bytes_data)) | |
case 'tEXt': | |
self.optional_chunks.append(tEXtChunk(bytes_data)) | |
case 'tIME': | |
self.optional_chunks.append(tIMEChunk(bytes_data)) | |
case 'tpNG': | |
self.optional_chunks.append(tpNGChunk(bytes_data)) | |
case 'zTXt': | |
self.optional_chunks.append(zTXtChunk(bytes_data)) | |
case _: | |
# NB: I added this `if` because I had a PNG file ending with 2 NULL bytes, don't know why | |
# Skip empty chunks | |
if len(chunk.chunk_type) != 0: | |
self.optional_chunks.append(chunk) | |
print("Unknown chunk type was added:", chunk.chunk_type) | |
# Skip to next chunk | |
bytes_data = bytes_data[12 + chunk.chunk_length:] | |
""" | |
Checks if this object has the minimum chunks required for the PNG standard | |
Returns True if the PNG is valid, False otherwise | |
""" | |
def is_png_valid(self) -> bool: | |
try: | |
self.IHDR | |
self.IEND | |
except AttributeError: | |
return False | |
return len(self.IDAT) > 0 | |
""" | |
Write this PNG oject to a file | |
""" | |
def to_file(self, output_path:str) -> None: | |
# Check for mandatory chunks | |
if not self.is_png_valid(): | |
raise ValueError("PNG format incorrect") | |
# Write to file | |
with open(output_path, 'wb') as ofile: | |
ofile.write(self.signature) | |
ofile.write(bytes(self.IHDR)) | |
for chunk in self.optional_chunks: | |
ofile.write(bytes(chunk)) | |
for chunk in self.IDAT: | |
ofile.write(bytes(chunk)) | |
ofile.write(bytes(self.IEND)) | |
#endregion |
V3 Changelog:
- Add CRC check
- Add classmethods to create PNG with a list of chunks or bytes data
- Add more comments
- Improve error handling
- Remove useless types convertions
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
V2 Changelog: