Created
June 6, 2026 15:20
-
-
Save Kraballa/121b983dc39c75de9023cb166a7fb5b9 to your computer and use it in GitHub Desktop.
OCR Tool for japanese VNs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # OCR Town - A simple OCR application using MeikiOCR and Tkinter | |
| # ---------- | |
| # LICENSE: MIT | |
| # ---------- | |
| # HOW TO USE | |
| # ---------- | |
| # 0. put script in a location where you're ok with it generating 2 html files | |
| # 1. run script with `python ocrtown.py` | |
| # 2. gui opens, select program with the selector | |
| # 3. make sure the program is fully visible, then click clip | |
| # 4. the recognised text is written to an html file. I recommend opening it with a live server so it updates in browser automatically | |
| # 5. use yomitan/whatever to process the text further | |
| # ---------- | |
| # LIBRARIES | |
| # ---------- | |
| # the program uses the libraries below. explanation | |
| # 1. cv2 (OpenCV), Numpy and PIL for screenshot-taking and image processing | |
| # 2. the excellent meikiOCR for optical character recognition | |
| # 3. pywinctl (find windows), tkinter for the gui | |
| # 4. jinja2 to render html | |
| import cv2 | |
| import numpy as np | |
| from meikiocr import MeikiOCR | |
| import tkinter as tk | |
| import pywinctl as pwc | |
| from PIL import ImageGrab | |
| import os | |
| from jinja2 import Environment, FileSystemLoader | |
| ocr = MeikiOCR() | |
| if not os.path.exists('template.html'): | |
| with open('template.html', 'w') as f: | |
| f.write(""" | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>OCR Result</title> | |
| <style> | |
| body { | |
| font-family: "Segoe UI", sans-serif; | |
| margin: auto; | |
| max-width: 400px; | |
| padding: 5px; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <h3>OCR Result</h3> | |
| {% for line in lines %} | |
| <p>{{ line }}</p> | |
| {% endfor %} | |
| </body> | |
| </html> | |
| """) | |
| env = Environment(loader=FileSystemLoader('.')) | |
| template = env.get_template('template.html') | |
| if __name__ == "__main__": | |
| root = tk.Tk() | |
| root.title("OCR Town") | |
| root.geometry("400x300") | |
| label = tk.Label(root, text="OCR Town", font=("Arial", 24)) | |
| label.pack(pady=10) | |
| # tkinter add dropdown with all active programs | |
| programs = [w.title for w in pwc.getAllWindows()] | |
| programs = [t for t in programs if t] | |
| selected_program = tk.StringVar() | |
| dropdown = tk.OptionMenu(root, selected_program, *programs) | |
| dropdown.pack(pady=10) | |
| def on_button_click(): | |
| window = pwc.getWindowsWithTitle(selected_program.get())[0] | |
| results = [] | |
| if(window): | |
| print("taking screenshot of window ", selected_program.get()) | |
| # get window screenshot | |
| rect = window.getClientFrame() | |
| img = ImageGrab.grab(rect) | |
| if(not img): | |
| print("failed to take screenshot of window ", selected_program.get()) | |
| return | |
| img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) | |
| output = ocr.run_ocr(img) | |
| results = [line['text'] for line in output if line['text']] | |
| print('\n'.join(results)) | |
| else: | |
| print("no window found with title ", selected_program.get()) | |
| # render template | |
| rendered = template.render(lines=results) | |
| # save rendered html to file | |
| with open("output.html", "wb") as f: | |
| f.write(rendered.encode('utf-8')) | |
| pass | |
| button = tk.Button(root, text="Clip", command=on_button_click, font=("Arial", 36)) | |
| button.pack(pady=10) | |
| root.mainloop() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's my setup. As you can see there is a small issue with the ocr'd text. don't expect it to be perfect but MeikoOCR is best in class. the clearer the text is, the better. here in pcxs2 the text isn't super clean so rarely it may run into issues.
