Created
January 24, 2024 10:11
-
-
Save remorses/3368603aa5e68063a935b3ac886e393f to your computer and use it in GitHub Desktop.
Remove Adobe watermarks from pdf with Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pypdf import PdfReader, PdfWriter | |
def remove_watermark(pdf_path, output_path): | |
reader = PdfReader(pdf_path) | |
writer = PdfWriter() | |
for page in reader.pages: | |
if '/Resources' in page and '/XObject' in page['/Resources']: | |
xobjects = page['/Resources']['/XObject'] | |
for obj in list(xobjects.keys()): | |
xobject = xobjects[obj] | |
try: | |
if xobject['/PieceInfo']['/ADBE_CompoundType']['/Private'] == '/Watermark': | |
print('found watermark') | |
del xobjects[obj] | |
except: | |
continue | |
writer.add_page(page) | |
with open(output_path, 'wb') as output_file: | |
writer.write(output_file) | |
# Usage | |
pdf_path = 'file.pdf' | |
output_path = 'path_to_output_pdf.pdf' | |
remove_watermark(pdf_path, output_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment