Created
April 6, 2021 00:19
-
-
Save black7375/243f0179328d45d78e43fb56653c6302 to your computer and use it in GitHub Desktop.
Font File to hdf5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import h5py | |
import PIL, PIL.ImageFont, PIL.Image, PIL.ImageDraw, PIL.ImageChops, PIL.ImageOps | |
import os | |
import random | |
import string | |
import numpy | |
import sys | |
# -------------------- Convert to Numpy Array -------------------- | |
w, h = 64, 64 | |
w0, h0 = 256, 256 | |
s_ascii = string.ascii_uppercase + string.ascii_lowercase + string.digits | |
# s_hangul = "가나다라마바사아자차카타파하" | |
s_hangul = "독창적인스물네자로만들어진표음문자로서고유하고특별해지켜져야한다" | |
blank = PIL.Image.new('L', (w0*5, h0*3), 255) | |
def read_font(fn, chars=s_ascii): | |
font = PIL.ImageFont.truetype(fn, min(w0, h0)) | |
# We need to make sure we scale down the fonts but preserve the vertical alignment | |
min_ly = float('inf') | |
max_hy = float('-inf') | |
max_width = 0 | |
imgs = [] | |
for char in chars: | |
print('...', char) | |
# Draw character | |
img = PIL.Image.new("L", (w0*5, h0*3), 255) | |
draw = PIL.ImageDraw.Draw(img) | |
draw.text((w0, h0), char, font=font) | |
# Get bounding box | |
diff = PIL.ImageChops.difference(img, blank) | |
lx, ly, hx, hy = diff.getbbox() | |
min_ly = min(min_ly, ly) | |
max_hy = max(max_hy, hy) | |
max_width = max(max_width, hx - lx) | |
imgs.append((lx, hx, img)) | |
print('crop dims:', max_hy - min_ly, max_width) | |
scale_factor = min(1.0 * h / (max_hy - min_ly), 1.0 * w / max_width) | |
data = [] | |
for lx, hx, img in imgs: | |
img = img.crop((lx, min_ly, hx, max_hy)) | |
# Resize to smaller | |
new_width = (hx-lx) * scale_factor | |
new_height = (max_hy - min_ly) * scale_factor | |
img = img.resize((int(new_width), | |
int(new_height)), PIL.Image.ANTIALIAS) | |
# Expand to square | |
img_sq = PIL.Image.new('L', (w, h), 255) | |
offset_x = (w - new_width)/2 | |
offset_y = (h - new_height)/2 | |
print(offset_x, offset_y) | |
img_sq.paste(img, (int(offset_x), int(offset_y))) | |
# Convert to numpy array | |
matrix = numpy.array(img_sq.getdata()).reshape((h, w)) | |
matrix = 255 - matrix | |
data.append(matrix) | |
return numpy.array(data) | |
# -------------------- Get Files -------------------- | |
def get_ttfs(d='../dataset/fonts'): | |
for dirpath, dirname, filenames in os.walk(d): | |
for filename in filenames: | |
if filename.endswith('.ttf') or filename.endswith('.otf'): | |
yield os.path.join(dirpath, filename) | |
def new_dataset(f, dshape, label): | |
dmax = (None,) + dshape[1:] | |
f.create_dataset(label, | |
dshape, chunks=dshape, | |
maxshape=dmax, dtype='u1') | |
return f | |
def new_file(path, dshape, label): | |
f = h5py.File(path, 'w') | |
f = new_dataset(f, dshape, label) | |
return f | |
def get_h5py(path='fonts.hdf5', dshape=(1,), label=None): | |
if os.path.exists(path): | |
if os.path.exists('fonts.hdf5'): | |
f = h5py.File(path, 'r+') | |
else: | |
f = new_file(path, dshape, label) | |
else: | |
f = new_file(path, dshape, label) | |
return f | |
# -------------------- Create Dataset -------------------- | |
def create_dataset(path='../dataset/fonts', chars=s_ascii, label='ascii', | |
h5dfP = 'fonts.hdf5',): | |
dshape = (1, len(chars), h, w) | |
f = get_h5py(h5dfP, dshape, label) | |
try: | |
dset = f[label] | |
except KeyError: | |
f = new_dataset(f, dshape, label) | |
dset = f[label] | |
if dset.shape[0] == 1: | |
i = dset.shape[0] - 1 | |
else: | |
i = dset.shape[0] | |
for fn in get_ttfs(path): | |
print(fn) | |
try: | |
data = read_font(fn, chars=chars) | |
except: # IOError: | |
print('was not able to read', fn) | |
continue | |
print(data.shape) | |
dset.resize((i+1,) + dshape[1:]) | |
dset[i] = data | |
i += 1 | |
f.flush() | |
f.close() | |
def file_dataset(path, h5dfP = 'fonts.hdf5'): | |
dshape = (1, len(chars), h, w) | |
f = get_h5py(h5dfP, dshape) | |
dset = f['fonts'] | |
try: | |
data = read_font(path) | |
except: | |
print("Can't able to read") | |
dset[0] = data | |
f.flush() | |
f.close() | |
if __name__ == "__main__": | |
create_dataset(path='../font/fonts', chars=s_ascii, label='ascii') | |
create_dataset(path='../font/fonts', chars=s_hangul, label='hangul') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
worked on three years ago.
for https://gall.dcinside.com/mgallery/board/view/?id=github&no=20449
https://gall.dcinside.com/mgallery/board/view/?id=github&no=20460