arnesacnussem · July 4, 2023 15:02
diff --git a/MangaBatchRemoveAD.py b/MangaBatchRemoveAD.py
 #!python 3.10
 import argparse
 from scipy.spatial import distance_matrix
 from matplotlib import pyplot as plt
 import cv2
 import numpy as np
 import zipfile
 import os
 from tqdm import tqdm
 from PIL import Image
 from pathlib import Path
 import shutil


 def crop_blank_space(img, threshold=10):
    # Invert the image
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # To invert the text to white
    gray = 255*(gray < 255 - threshold).astype(np.uint8)
    gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, np.ones(
        (2, 2), dtype=np.uint8))  # Perform noise filtering
    coords = cv2.findNonZero(gray)  # Find all non-zero points (text)
    x, y, w, h = cv2.boundingRect(coords)  # Find minimum spanning bounding box
    cropped_image = img[y:y+h, x:x+w]

    return cropped_image


 def merge_points(points, threshold):
    # Calculate pairwise distance matrix
    dist_matrix = distance_matrix(points, points)

    # Initialize a boolean matrix to keep track of merged points
    merged = np.zeros(len(points), dtype=bool)

    # List to store the merged point groups
    merged_points = []

    # Iterate over each point
    for i in range(len(points)):
        # Skip the point if it has already been merged
        if merged[i]:
            continue

        # Iterate over the remaining unmerged points
        for j in range(i+1, len(points)):
            # If the distance between two points is less than the threshold
            if dist_matrix[i, j] < threshold:
                merged[j] = True

        # Add the group to the merged points list
        merged_points.append(points[i])

    # Return the merged point groups
    return merged_points


 def find_template(template, target, threshold=0.8, draw=False):
    if np.shape(template)[0] >= np.shape(target)[0]:
        return []
    # Perform template matching
    result = cv2.matchTemplate(target, template, cv2.TM_CCOEFF_NORMED)

    # Find locations with similarity above the threshold
    locations = np.where(result >= threshold)

    if np.shape(locations)[1] == 0:
        return []

    merged_points = merge_points(list(zip(*locations[::-1])), threshold=10)
    matches = []
    template_height, template_width = template.shape[:2]

    for pt in merged_points:
        top_left = pt
        bottom_right = (pt[0] + template_width, pt[1] + template_height)
        matches.append((top_left, bottom_right))

    if draw:
        for (top_left, bottom_right) in matches:
            cv2.rectangle(target, top_left, bottom_right, (0, 0, 255), 2)
        plt.imshow(target)
        plt.show()

    return matches


 def find_first_non_white_row(image, y, direction=1, threshold=25, borderSkip=5):
    # Get the height of the image
    height = image.shape[0]

    if direction == 1:
        # Start searching upward from y
        for row in range(y - borderSkip, 0, -1):
            if not (image[row, :] >= (255 - threshold)).all():
                return row - 1
        return 0
    else:
        # Start searching downward from y
        for row in range(y + borderSkip, height):
            if not (image[row, :] >= 255 - threshold).all():
                return row + 1
        return height

    return y


 def get_crop_top_bottoms(image, point_pairs):
    pairs = []
    for (top_left, bottom_right) in point_pairs:
        top_bottom = (find_first_non_white_row(
            image, top_left[1]), find_first_non_white_row(image, bottom_right[1], direction=0))
        pairs.append(top_bottom)

    return pairs


 def remove_image_part(img, y1, y2):
    # Get the width and height of the image
    width, height = img.size

    # Make sure y1 and y2 are within the image height boundaries
    y1 = max(0, min(y1, height))
    y2 = max(0, min(y2, height))

    # Calculate the new height after removing the specified part
    new_height = height - (y2 - y1)

    # Create a new blank image with the same width and the new height
    new_img = img.copy()
    img.paste((255, 255, 255), (0, 0, width, height))

    # Paste the top part of the original image (above the specified part)
    img.paste(new_img.crop((0, 0, width, y1)), (0, 0))

    # Paste the bottom part of the original image (below the specified part)
    img.paste(new_img.crop((0, y2, width, height)), (0, y1))

    remaining_height = height - (y2 - y1)
    img = img.crop((0, 0, width, new_height))

    return img


 def process_image(input_file, output_file, sample):
    img = cv2.imread(input_file)
    finds = find_template(sample, img, draw=False)

    if len(finds) == 0:
        os.rename(input_file, output_file)
        return

    if len(finds) > 1:
        tqdm.write(f"Found {len(finds)} watermark in {input_file}")
    crops = get_crop_top_bottoms(img, finds)

    img = Image.open(input_file)
    for (top, bottom) in crops:
        img = remove_image_part(img, top, bottom)

    # save image
    if img.height == 0:
        tqdm.write(
            f"Image {input_file} will be empty, creating 1px height blank image.")
        img = Image.new("RGB", (img.width, 1), (255, 255, 255))
    img.save(output_file, quality=90, resample=0)
    os.remove(input_file)


 if __name__ == '__main__':
    # Create the argument parser
    parser = argparse.ArgumentParser(
        description='Apply watermark to images in a directory')

    # Add the positional arguments
    parser.add_argument('watermark_sample_path', help='path to the watermark sample image',
                        default="watermark_sample.bmp", nargs='?')
    parser.add_argument(
        'directory_path', help='path to the directory containing the cbz', default="./", nargs='?')

    # Parse the command-line arguments
    args = parser.parse_args()

    cbz = []
    # find all cbz in dir
    for file_name in os.listdir(args.directory_path):
        # Check if the current item is a file (not a directory)
        if os.path.isfile(os.path.join(args.directory_path, file_name)):
            if file_name.endswith('.cbz'):
                cbz.append(file_name)

    tqdm.write(f"Found following file to process: \n{cbz}")
    tqdm.write(f"Reading watermark sample...")
    sample = cv2.imread("watermark_sample.bmp")
    sample = crop_blank_space(sample)
    plt.imshow(sample)
    plt.show()

    input_temp_dir = "temp_input"
    output_temp_dir = "temp_output"

    try:
        shutil.rmtree(input_temp_dir)
        shutil.rmtree(output_temp_dir)
    except:
        pass
    for c in tqdm(cbz, desc="cbz"):
        try:
            Path(output_temp_dir).mkdir()
        except:
            pass
        tqdm.write(f"Unpacking {c}...")
        with zipfile.ZipFile(c, 'r') as zip_ref:
            # Extract all the contents of the zip file
            zip_ref.extractall(input_temp_dir)

        for im in tqdm(os.listdir(input_temp_dir), desc="img"):
            im_path = os.path.join(input_temp_dir, im)
            ou_path = os.path.join(output_temp_dir, im)
            process_image(im_path, ou_path, sample)

        with zipfile.ZipFile(c+"_new", 'w') as zip_file:
            # Iterate over all the files in the directory
            for root, _, files in os.walk(output_temp_dir):
                for file in files:
                    # Get the full path of the file
                    file_path = os.path.join(root, file)
                    # Add the file to the ZIP archive
                    zip_file.write(file_path, file)

        shutil.rmtree(output_temp_dir)
	#!python 3.10
	import argparse
	from scipy.spatial import distance_matrix
	from matplotlib import pyplot as plt
	import cv2
	import numpy as np
	import zipfile
	import os
	from tqdm import tqdm
	from PIL import Image
	from pathlib import Path
	import shutil


	def crop_blank_space(img, threshold=10):
	# Invert the image
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	# To invert the text to white
	gray = 255*(gray < 255 - threshold).astype(np.uint8)
	gray = cv2.morphologyEx(gray, cv2.MORPH_OPEN, np.ones(
	(2, 2), dtype=np.uint8)) # Perform noise filtering
	coords = cv2.findNonZero(gray) # Find all non-zero points (text)
	x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
	cropped_image = img[y:y+h, x:x+w]

	return cropped_image


	def merge_points(points, threshold):
	# Calculate pairwise distance matrix
	dist_matrix = distance_matrix(points, points)

	# Initialize a boolean matrix to keep track of merged points
	merged = np.zeros(len(points), dtype=bool)

	# List to store the merged point groups
	merged_points = []

	# Iterate over each point
	for i in range(len(points)):
	# Skip the point if it has already been merged
	if merged[i]:
	continue

	# Iterate over the remaining unmerged points
	for j in range(i+1, len(points)):
	# If the distance between two points is less than the threshold
	if dist_matrix[i, j] < threshold:
	merged[j] = True

	# Add the group to the merged points list
	merged_points.append(points[i])

	# Return the merged point groups
	return merged_points


	def find_template(template, target, threshold=0.8, draw=False):
	if np.shape(template)[0] >= np.shape(target)[0]:
	return []
	# Perform template matching
	result = cv2.matchTemplate(target, template, cv2.TM_CCOEFF_NORMED)

	# Find locations with similarity above the threshold
	locations = np.where(result >= threshold)

	if np.shape(locations)[1] == 0:
	return []

	merged_points = merge_points(list(zip(*locations[::-1])), threshold=10)
	matches = []
	template_height, template_width = template.shape[:2]

	for pt in merged_points:
	top_left = pt
	bottom_right = (pt[0] + template_width, pt[1] + template_height)
	matches.append((top_left, bottom_right))

	if draw:
	for (top_left, bottom_right) in matches:
	cv2.rectangle(target, top_left, bottom_right, (0, 0, 255), 2)
	plt.imshow(target)
	plt.show()

	return matches


	def find_first_non_white_row(image, y, direction=1, threshold=25, borderSkip=5):
	# Get the height of the image
	height = image.shape[0]

	if direction == 1:
	# Start searching upward from y
	for row in range(y - borderSkip, 0, -1):
	if not (image[row, :] >= (255 - threshold)).all():
	return row - 1
	return 0
	else:
	# Start searching downward from y
	for row in range(y + borderSkip, height):
	if not (image[row, :] >= 255 - threshold).all():
	return row + 1
	return height

	return y


	def get_crop_top_bottoms(image, point_pairs):
	pairs = []
	for (top_left, bottom_right) in point_pairs:
	top_bottom = (find_first_non_white_row(
	image, top_left[1]), find_first_non_white_row(image, bottom_right[1], direction=0))
	pairs.append(top_bottom)

	return pairs


	def remove_image_part(img, y1, y2):
	# Get the width and height of the image
	width, height = img.size

	# Make sure y1 and y2 are within the image height boundaries
	y1 = max(0, min(y1, height))
	y2 = max(0, min(y2, height))

	# Calculate the new height after removing the specified part
	new_height = height - (y2 - y1)

	# Create a new blank image with the same width and the new height
	new_img = img.copy()
	img.paste((255, 255, 255), (0, 0, width, height))

	# Paste the top part of the original image (above the specified part)
	img.paste(new_img.crop((0, 0, width, y1)), (0, 0))

	# Paste the bottom part of the original image (below the specified part)
	img.paste(new_img.crop((0, y2, width, height)), (0, y1))

	remaining_height = height - (y2 - y1)
	img = img.crop((0, 0, width, new_height))

	return img


	def process_image(input_file, output_file, sample):
	img = cv2.imread(input_file)
	finds = find_template(sample, img, draw=False)

	if len(finds) == 0:
	os.rename(input_file, output_file)
	return

	if len(finds) > 1:
	tqdm.write(f"Found {len(finds)} watermark in {input_file}")
	crops = get_crop_top_bottoms(img, finds)

	img = Image.open(input_file)
	for (top, bottom) in crops:
	img = remove_image_part(img, top, bottom)

	# save image
	if img.height == 0:
	tqdm.write(
	f"Image {input_file} will be empty, creating 1px height blank image.")
	img = Image.new("RGB", (img.width, 1), (255, 255, 255))
	img.save(output_file, quality=90, resample=0)
	os.remove(input_file)


	if __name__ == '__main__':
	# Create the argument parser
	parser = argparse.ArgumentParser(
	description='Apply watermark to images in a directory')

	# Add the positional arguments
	parser.add_argument('watermark_sample_path', help='path to the watermark sample image',
	default="watermark_sample.bmp", nargs='?')
	parser.add_argument(
	'directory_path', help='path to the directory containing the cbz', default="./", nargs='?')

	# Parse the command-line arguments
	args = parser.parse_args()

	cbz = []
	# find all cbz in dir
	for file_name in os.listdir(args.directory_path):
	# Check if the current item is a file (not a directory)
	if os.path.isfile(os.path.join(args.directory_path, file_name)):
	if file_name.endswith('.cbz'):
	cbz.append(file_name)

	tqdm.write(f"Found following file to process: \n{cbz}")
	tqdm.write(f"Reading watermark sample...")
	sample = cv2.imread("watermark_sample.bmp")
	sample = crop_blank_space(sample)
	plt.imshow(sample)
	plt.show()

	input_temp_dir = "temp_input"
	output_temp_dir = "temp_output"

	try:
	shutil.rmtree(input_temp_dir)
	shutil.rmtree(output_temp_dir)
	except:
	pass
	for c in tqdm(cbz, desc="cbz"):
	try:
	Path(output_temp_dir).mkdir()
	except:
	pass
	tqdm.write(f"Unpacking {c}...")
	with zipfile.ZipFile(c, 'r') as zip_ref:
	# Extract all the contents of the zip file
	zip_ref.extractall(input_temp_dir)

	for im in tqdm(os.listdir(input_temp_dir), desc="img"):
	im_path = os.path.join(input_temp_dir, im)
	ou_path = os.path.join(output_temp_dir, im)
	process_image(im_path, ou_path, sample)

	with zipfile.ZipFile(c+"_new", 'w') as zip_file:
	# Iterate over all the files in the directory
	for root, _, files in os.walk(output_temp_dir):
	for file in files:
	# Get the full path of the file
	file_path = os.path.join(root, file)
	# Add the file to the ZIP archive
	zip_file.write(file_path, file)

	shutil.rmtree(output_temp_dir)