Created
January 13, 2019 15:29
-
-
Save aonemd/a4895dbdbdf9b7a0bc47bcb051aae5b8 to your computer and use it in GitHub Desktop.
A Python implementation of https://gist.github.com/aonemd/7bb3c4760d9e47a9ce8e270198cb40a0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from PIL import Image # pip install Pillow | |
import pytesseract # pip install pytesseract | |
newly_renamed = 0 | |
not_renamed = 0 | |
for subdir, dirs, files in os.walk('./memes'): | |
for file in files: | |
src_path = os.path.join(subdir, file) | |
tmp_path = os.path.join('./tmp/', file) | |
img = Image.open(src_path).convert('L') | |
bw = img.point(lambda x: 0 if x < 249 else 250, '1') | |
bw.save(tmp_path) | |
img = Image.open(tmp_path) | |
print("************************************") | |
print(src_path) | |
new_text = pytesseract.image_to_string(img).strip() | |
if new_text: | |
newly_renamed += 1 | |
else: | |
not_renamed += 1 | |
new_text = src_path | |
print(new_text) | |
print("**********************************************") | |
print(f"{newly_renamed} files newly renamed vs. {not_renamed} still have the same old name.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment