Created
October 21, 2024 18:35
-
-
Save birkin/2cc6bb1da2e9eb5056b104aa49facc9e to your computer and use it in GitHub Desktop.
edit PDF DecodeParms dict
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pikepdf | |
def edit_decodeparms(pdf_path, output_path): | |
with pikepdf.open(pdf_path) as pdf: | |
for page_num, page in enumerate(pdf.pages, start=1): | |
resources = page.get('/Resources', {}) | |
xobjects = resources.get('/XObject', {}) | |
for xobj_name, xobj_ref in xobjects.items(): | |
xobj = xobj_ref # Use the object directly | |
filters = xobj.get('/Filter', []) | |
if not isinstance(filters, list): | |
filters = [filters] | |
if '/DCTDecode' in filters: | |
decode_parms = xobj.get('/DecodeParms', {}) | |
if decode_parms: | |
keys = list(decode_parms.keys()) | |
invalid_keys = [k for k in keys if k != '/ColorTransform'] | |
if invalid_keys: | |
print(f"Page {page_num}, XObject {xobj_name}: Removing invalid keys {invalid_keys} from DecodeParms") | |
for key in invalid_keys: | |
del decode_parms[key] # Remove the invalid key | |
# Save the modified PDF to a new file | |
pdf.save(output_path) | |
print(f"Modified PDF saved to {output_path}") | |
# Replace with your actual file paths | |
edit_decodeparms('/path/to/original/HH012060_1146.pdf', '/path/to/new/HH012060_1146_modified.pdf') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment