Last active
June 30, 2024 10:21
-
-
Save UserUnknownFactor/82f3ffc2b0e57946e4f0d621a11686cd to your computer and use it in GitHub Desktop.
Tools for mass image translation (text applying tested on PGMMV with images of the same type/size that only differ in text area)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, csv, glob, re | |
from PIL import Image, ImageDraw, ImageFont | |
""" | |
This tool can read translations from: | |
`original→translation→image_without_extension[;text_pos_x,text_pos_y;overlay_path;overlay_x,overlay_y;custom_font;font_size;font_color]` | |
formatted .csv file and apply it to all images in the specified folder, after replacing | |
background to a specified image at specified coordinates. Text supports colored borders | |
and style tags like <b>, <i> (not enabled now) or <color="#ff0000"> in the text. | |
All stuff is only configurable in the code. | |
""" | |
USE_CACHE = True | |
try: | |
if USE_CACHE: | |
import diskcache | |
except: | |
USE_CACHE = False | |
DELIMITER_CHAR = '→' | |
ESCAPE_CHAR = '¶' | |
DIALECT_TRANSLATION = "translation" | |
csv.register_dialect(DIALECT_TRANSLATION, delimiter=DELIMITER_CHAR, quotechar="\uffff", quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR, lineterminator='\n') | |
re_tags = re.compile('(</?[ib]>|<color=[\'"]?[^\'" >]+[\'"]?>|</color>)') | |
class StyledText: | |
def __init__(self, size, regular_font_path, italic_font_path=None, bold_font_path=None, line_height_mul = 1.2): | |
size = size * 1.33 | |
self.line_height = size * line_height_mul # Adjust line height as needed | |
self.size = size | |
self.current_font = self.regular_font = ImageFont.truetype(regular_font_path, size) | |
if italic_font_path: | |
self.italic_font = ImageFont.truetype(italic_font_path, size) | |
else: | |
self.italic_font = self.regular_font | |
if bold_font_path: | |
self.bold_font = ImageFont.truetype(bold_font_path, size) | |
else: | |
self.bold_font = self.regular_font | |
def get_text_width(self, text): | |
dummy_draw = ImageDraw.Draw(Image.new('RGB', (1, 1))) | |
bbox = dummy_draw.textbbox((0, 0), text, font=self.current_font) | |
return bbox[2] - bbox[0] | |
@staticmethod | |
def draw_text_with_border(draw, position, text, font, fill, border_color, border_width): | |
""" Draws text with a border around it. """ | |
if border_color is not None and border_width: | |
x, y = position | |
for dx in range(-border_width, border_width+1): | |
for dy in range(-border_width, border_width+1): | |
draw.text((x+dx, y+dy), text, font=font, fill=border_color) | |
draw.text(position, text, font=font, fill=fill) | |
def draw_text(self, position, text, img, base_color, border_color, border_width, alignment='center'): | |
draw = ImageDraw.Draw(img) | |
current_color = base_color # Initialize current color with base color | |
color_stack = [] # Stack to keep track of colors | |
text = text.replace('\\n', '\n') | |
text = text.replace('<br>', '\n').replace('<BR>', '\n') | |
lines = text.split('\n') # Split the text into lines | |
# Unpack the rectangle; used format is [x1, y1, x2, y2] instead of [x1, y1, w, h] | |
rect_left = position[0] | |
rect_top = position[1] | |
if len(position) > 2: | |
if len(position) == 3: | |
raise ValueError(f"Full rectangle must have 4 coordinates, not {position}") | |
rect_right = position[2] | |
rect_bottom = position[3] | |
rect_width = rect_right - rect_left | |
rect_height = rect_bottom - rect_top | |
else: | |
alignment = 'left top' | |
if len(position) < 2: | |
raise ValueError(f"Point must have at least 1 coordinate, not {position}") | |
is_left = 'left' in alignment | |
is_center = 'center' in alignment | |
is_right = 'right' in alignment | |
is_top = 'top' in alignment | |
is_bottom = 'bottom' in alignment | |
is_middle = is_center or 'middle' in alignment | |
total_line_height = len(lines) * self.line_height | |
if is_top: | |
vertical_offset = rect_top | |
elif is_bottom: | |
vertical_offset = rect_bottom - total_line_height | |
elif is_middle: | |
vertical_offset = rect_top + (rect_height - total_line_height) // 2 | |
else: | |
raise ValueError("Invalid vertical alignment specified") | |
for i, line in enumerate(lines): | |
clear_line = re_tags.sub('', line) | |
if is_left: | |
horizontal_offset = rect_left | |
elif is_center: | |
horizontal_offset = rect_left + (rect_width - self.get_text_width(clear_line)) // 2 | |
elif is_right: | |
horizontal_offset = rect_right - self.get_text_width(clear_line) | |
else: | |
raise ValueError("Invalid horizontal alignment specified") | |
vertical_line_offset = vertical_offset | |
for segment in re_tags.split(line): | |
if segment == '<i>': | |
self.current_font = self.italic_font | |
elif segment == '<b>': | |
self.current_font = self.bold_font | |
elif segment.startswith('<color='): | |
color = re.search(r'(?<=[<]color=)(?:[\'"]?)([^\'" >]+)', segment).group(1) | |
color_stack.append(current_color) # Push the current color onto the stack | |
current_color = color # Update current color | |
elif segment == '</color>': | |
current_color = color_stack.pop() if color_stack else base_color # Pop color from stack or revert to base color | |
elif segment == '</i>' or segment == '</b>': | |
self.current_font = self.regular_font | |
else: | |
if segment: # Check if segment is not empty | |
self.draw_text_with_border(draw, (horizontal_offset, vertical_line_offset), segment, font=self.current_font, fill=current_color, border_color=border_color, border_width=border_width) | |
horizontal_offset += self.get_text_width(segment) | |
vertical_offset += self.line_height | |
def get_font_path(font_name): | |
""" Search for a font in the system fonts directory. """ | |
font_paths = glob.glob(".\\" + os.path.splitext(font_name)[0] + "*ttf") | |
if font_paths: | |
return font_paths[0] | |
font_paths = glob.glob("C:/Windows/Fonts/" + os.path.splitext(font_name)[0] + "*ttf") | |
return font_paths[0] if font_paths else None | |
def read_text_data(csv_file): | |
""" Reads text data from a CSV file using the csv module. """ | |
with open(csv_file, 'r', newline='', encoding='utf-8-sig') as csvfile: | |
reader = csv.reader(csvfile, DIALECT_TRANSLATION) | |
return list(reader) | |
def make_area_transparent(image, box): | |
""" Makes the specified area of an image transparent | |
while preserving existing transparency. | |
Args: | |
image: A PIL.Image object with an alpha channel (RGBA). | |
box: A tuple (left, upper, right, lower) defining the rectangular area to make transparent. | |
""" | |
if image.mode != 'RGBA': | |
raise ValueError("Image must be in RGBA mode") | |
mask = Image.new("L", image.size, 0) | |
mask_draw = ImageDraw.Draw(mask) | |
mask_draw.rectangle(box, fill=255) | |
alpha = image.getchannel('A') | |
new_alpha = Image.composite(Image.new('L', image.size, 0), alpha, mask) | |
image.putalpha(new_alpha) | |
return image | |
CACHE_DIR = '__pycache__' | |
if USE_CACHE: | |
cache = diskcache.Cache(CACHE_DIR) | |
def is_file_changed(file_path): | |
changed = False | |
mod_time = os.path.getmtime(file_path) | |
cached_mod_time = cache.get(file_path) | |
if cached_mod_time is None or mod_time != cached_mod_time: | |
changed = True | |
cache.set(file_path, mod_time) | |
return changed | |
def is_line_changed(file_name, text_line): | |
changed = False | |
if cache.get(file_name) != text_line: | |
changed = True | |
cache.set(file_name, text_line) | |
return changed | |
def make_tuple_dim(tuple_string: str, dim: int=2, default: int|str|None=0, separator: str=',', to_int: bool=True): | |
if tuple_string is None or not isinstance(tuple_string, str): | |
return tuple() | |
tuple_string = tuple_string.split(separator) | |
tuple_string = tuple_string[:dim] + [default] * max(0, dim - len(tuple_string)) | |
if to_int: | |
tuple_string = tuple(int(v) for v in tuple_string) | |
if len(tuple_string) > 3 and tuple_string[2] == 0 and tuple_string[3]==0: | |
return tuple_string[:2] | |
return tuple_string | |
return tuple(tuple_string) | |
def process_images(input_folder, output_folder, rectangle, default_overlay_img_path=None, csv_file='data.csv', | |
font_name='arial.ttf', font_size=20, text_color=(0, 0, 0), | |
border_color=(255, 255, 255), border_width=2, | |
default_overlay_position=(0, 0)): | |
""" Processes each image in the input folder """ | |
#if USE_CACHE and not is_file_changed(csv_file): return | |
text_data = read_text_data(csv_file) | |
# Create a dictionary to possibly group multiple rows by filename | |
grouped_data = dict() | |
font_path = os.path.abspath(get_font_path(font_name)) | |
if not font_path: | |
raise FileNotFoundError(f"Font {font_path} not found in system.") | |
print(f"Default font: {font_path}") | |
default_font = StyledText(font_size, font_path) | |
for row in text_data: | |
if len(row) < 3: | |
print(f"Error in row: {row}") | |
continue | |
_, text, comment = row[:3] # this is standard translation format of filetranslate | |
if not comment: | |
print(f"Error in row: {row}") | |
continue | |
filename, text_position, overlay_img_path, overlay_position, custom_font, custom_font_size, custom_font_color = make_tuple_dim(comment, 7, None, ';', False) | |
text_position = make_tuple_dim(text_position, 4) | |
overlay_position = make_tuple_dim(overlay_position) | |
if filename not in grouped_data: | |
grouped_data[filename] = { | |
'texts': [], | |
'overlays': [], | |
'fonts': [], | |
} | |
if custom_font_color: | |
text_color = custom_font_color | |
grouped_data[filename]['texts'].append((text, text_position, text_color)) | |
grouped_data[filename]['overlays'].append((overlay_img_path, overlay_position)) | |
if not custom_font: | |
custom_font = font_path | |
else: | |
custom_font = os.path.abspath(get_font_path(custom_font)) | |
if not custom_font_size: | |
custom_font_size = font_size | |
else: | |
custom_font_size = int(custom_font_size) | |
if not custom_font: | |
raise FileNotFoundError(f"Font {font_path} not found in system.") | |
grouped_data[filename]['fonts'].append( | |
StyledText(custom_font_size, custom_font) if ( | |
custom_font_size or custom_font | |
) else default_font) | |
print("Applying translation and overlays...") | |
for filename, data in grouped_data.items(): | |
texts = data['texts'] | |
image_type = '.png' | |
image_path = os.path.join(input_folder, filename + image_type) | |
if not os.path.exists(image_path): | |
image_path = os.path.join(input_folder, filename + '.jpg') | |
image_type = '.jpg' | |
if os.path.exists(image_path): | |
if USE_CACHE and not any(is_line_changed(filename, text) for text, _ in texts): | |
continue | |
img = Image.open(image_path).convert("RGBA") | |
overlays = [(default_overlay_img_path, default_overlay_position)] if not data['overlays'] else data['overlays'] | |
for overlay_img_path, overlay_position in overlays: | |
if not overlay_img_path: | |
continue | |
overlay_img = Image.open(overlay_img_path).convert("RGBA") | |
overlay_position = overlay_position | |
""" | |
box = ( | |
overlay_position[0], overlay_position[1], | |
overlay_position[0] + overlay_img.size[0], overlay_position[1] + overlay_img.size[1] | |
) | |
img = make_area_transparent(img, box) | |
""" | |
img.paste(overlay_img, overlay_position)#, overlay_image) | |
i = 0 | |
for text, position, text_color in texts: | |
data['fonts'][i].draw_text(position or rectangle, text, img, text_color, | |
border_color=border_color, border_width=border_width) | |
i += 1 | |
output_path = os.path.join(output_folder, filename + image_type) | |
img.save(output_path) | |
print(f"{output_path}") | |
def main(): | |
input_folder = 'Resources\\img' | |
output_folder = 'translation_out\\Resources\\img' | |
overlay_img_path = 'overlay_image.png' | |
csv_file = 'overlay_image_strings.csv' | |
font_name = 'arial.ttf' # Specify the base name of the font | |
text_color = 'white' | |
border_color = '#00ff00' | |
border_width = 2 | |
font_size = 20 | |
overlay_position = (100, 50) # Position to paste the overlay image | |
# Coords to draw the text: 2 items = position, 4 = box | |
rectangle = (100, 20, 500, 80) # using a target rectangle instead | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
print(f"The translations file is: {csv_file}") | |
print(f"The overlay file is: {overlay_img_path}") | |
process_images(input_folder, output_folder, rectangle, overlay_img_path, csv_file, font_name, font_size, text_color, border_color, border_width, overlay_position) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, csv, re, glob | |
DELIMITER_CHAR = '→' | |
ESCAPE_CHAR = '¶' | |
DIALECT_TRANSLATION = "translation" | |
csv.register_dialect(DIALECT_TRANSLATION, delimiter=DELIMITER_CHAR, quotechar="\uffff", quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR, lineterminator='\n') | |
def natural_sort_key(s): | |
"""Natural sort key function for sorting filenames with embedded numbers""" | |
convert = lambda text: int(text) if text.isdigit() else text.lower() | |
return [convert(c) for c in re.split('([0-9]+)', s)] | |
def merge_csvs(directory, mask, output_file): | |
"""Merges CSV files in a directory into a single CSV file | |
Args: | |
directory: The directory to scan. | |
mask: The file mask to match (e.g., "*.csv"). | |
output_file: The name of the output CSV file. | |
""" | |
with open(output_file, 'w', newline='', encoding='utf-8-sig') as outfile: | |
writer = csv.writer(outfile, DIALECT_TRANSLATION) | |
files_found = glob.glob(os.path.join(directory, '**', mask), recursive=True) | |
files_found = sorted(files_found, key=natural_sort_key) | |
for filename in files_found: | |
with open(filename, 'r', newline='', encoding='utf-8-sig') as infile: | |
reader = csv.reader(infile, DIALECT_TRANSLATION) | |
writer.writerow(['\\n'.join([row[0] for row in reader]), '\\n'.join([row[1] for row in reader if len(row)>2]), os.path.basename(filename).replace('_strings.csv', '')]) | |
if __name__ == "__main__": | |
directory = '.' # current directory | |
mask = '*_strings.csv' # match all CSV string files | |
output_file = 'overlay_image_strings.csv' # output file name | |
merge_csvs(directory, mask, output_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import cv2 # pip install cv2 | |
import numpy as np # pip install numpy | |
def remove_watermark(image, template, threshold=0.8): | |
"""Removes a subimage from an image using template matching. | |
Args: | |
image (numpy.ndarray): The image from which to remove the subimage. | |
template (numpy.ndarray): The subimage template. | |
Returns: | |
numpy.ndarray: The image with subimages removed. | |
""" | |
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) | |
w, h = template_gray.shape[::-1] | |
# Match template | |
res = cv2.matchTemplate(image, template_gray, cv2.TM_CCOEFF_NORMED) | |
loc = np.where(res >= threshold) | |
# Remove watermark | |
for pt in zip(*loc[::-1]): # Switch x and y coordinates | |
cv2.rectangle(image, pt, (pt[0] + w, pt[1] + h), (255, 255, 255), -1) | |
return image | |
def preprocess_for_ocr(image_path): | |
"""Preprocesses an image with a colored background and borders for improved OCR | |
Args: | |
image_path (str): The path to the input image. | |
Returns: | |
numpy.ndarray: The preprocessed image. | |
""" | |
# 1. Load and convert to grayscale | |
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) | |
# 2. Thresholding for initial background removal | |
thresh_0, thresh_1 = 130, 255 | |
_, thresh = cv2.threshold(image, thresh_0, thresh_1, cv2.THRESH_BINARY_INV) | |
# 3. Morphological operations (opening to remove noise) | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) | |
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) | |
# 4. Find contours and create a mask for the text region | |
contours, _ = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
mask = np.zeros_like(image) | |
cv2.drawContours(mask, contours, -1, 255, -1) | |
# 5. Apply the mask to the original image | |
result = cv2.bitwise_and(image, image, mask=mask) | |
# 6. Denoising | |
""" | |
h Parameter regulating filter strength. Big h perfectly removes noise but also removes image details, smaller h preserves details but also preserves some noise. | |
templateWindowSize Size in pixels of the template patch that is used to compute weights. Should be odd. | |
searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. | |
""" | |
denoised_image = cv2.fastNlMeansDenoising(result, None, 7, 7, 21) | |
# 7. Binarization | |
_, thresholded_image = cv2.threshold(denoised_image, thresh_0, thresh_1, cv2.THRESH_BINARY) | |
thresholded_image = cv2.bitwise_not(thresholded_image) | |
return thresholded_image | |
# Specify the folder containing the images | |
input_folder = "img" | |
output_folder = "for_ocr" | |
os.makedirs(output_folder, exist_ok=True) | |
# Load watermark template | |
template_image_path = 'watermark_text.png' # Path to the bad image template (like series of dots that messes with OCR) | |
watermark_template = cv2.imread(template_image_path) | |
# Iterate through all files in the folder | |
for filename in os.listdir(input_folder): | |
if filename.endswith((".jpg", ".jpeg", ".png")): # Process common image formats | |
image_path = os.path.join(input_folder, filename) | |
preprocessed_image = preprocess_for_ocr(image_path) | |
cleaned_image = remove_watermark(preprocessed_image, watermark_template) | |
output_path = os.path.join(output_folder, filename) | |
cv2.imwrite(output_path, cleaned_image) | |
print(f"Preprocessed and saved: {output_path}") | |
print("Preprocessing complete!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment