raphiz · September 28, 2015 19:52 · hossainel · Jan 10, 2024 · xilopaint · Jan 10, 2024
diff --git a/pdf_remove_watermark.py b/pdf_remove_watermark.py
 from PyPDF2 import PdfFileReader, PdfFileWriter
 from PyPDF2.pdf import ContentStream
 from PyPDF2.generic import TextStringObject, NameObject
 from PyPDF2.utils import b_

 wm_text = 'Persönliches Exemplar von'
 replace_with = ''

 # Load PDF into pyPDF
 source = PdfFileReader(open('input.pdf', "rb"))
 output = PdfFileWriter()

 # For each page
 for page in range(source.getNumPages()):
    # Get the current page and it's contents
    page = source.getPage(page)
    content_object = page["/Contents"].getObject()
    content = ContentStream(content_object, source)

    # Loop over all pdf elements
    for operands, operator in content.operations:
        # You might adapt this part depending on your PDF file
        if operator == b_("TJ"):
            text = operands[0][0]
            if isinstance(text, TextStringObject) and text.startswith(wm_text):
                operands[0] = TextStringObject(replace_with)


    # Set the modified content as content object on the page
    page.__setitem__(NameObject('/Contents'), content)

    # Add the page to the output
    output.addPage(page)

 # Write the stream
 outputStream = open("output.pdf", "wb")
 output.write(outputStream)
	from PyPDF2 import PdfFileReader, PdfFileWriter
	from PyPDF2.pdf import ContentStream
	from PyPDF2.generic import TextStringObject, NameObject
	from PyPDF2.utils import b_

	wm_text = 'Persönliches Exemplar von'
	replace_with = ''

	# Load PDF into pyPDF
	source = PdfFileReader(open('input.pdf', "rb"))
	output = PdfFileWriter()

	# For each page
	for page in range(source.getNumPages()):
	# Get the current page and it's contents
	page = source.getPage(page)
	content_object = page["/Contents"].getObject()
	content = ContentStream(content_object, source)

	# Loop over all pdf elements
	for operands, operator in content.operations:
	# You might adapt this part depending on your PDF file
	if operator == b_("TJ"):
	text = operands[0][0]
	if isinstance(text, TextStringObject) and text.startswith(wm_text):
	operands[0] = TextStringObject(replace_with)


	# Set the modified content as content object on the page
	page.__setitem__(NameObject('/Contents'), content)

	# Add the page to the output
	output.addPage(page)

	# Write the stream
	outputStream = open("output.pdf", "wb")
	output.write(outputStream)