Skip to content

Instantly share code, notes, and snippets.

@hiepph
Last active August 6, 2019 08:11
Show Gist options
  • Save hiepph/def4e6378c3cf17f1ecfa9d70d22dde7 to your computer and use it in GitHub Desktop.
Save hiepph/def4e6378c3cf17f1ecfa9d70d22dde7 to your computer and use it in GitHub Desktop.
pypeln sample
from PIL import Image
from tqdm import tqdm
import sys
import os
from pypeln import process as pr
# python filter.py train 8
phase = sys.argv[1]
l = []
for line in tqdm(open(f'{phase}.txt').readlines()):
im_name, label = line.strip().split('\t')
l.append((os.path.join(phase, im_name), label))
def try_to_read(pair):
im_path, label = pair
try:
_ = Image.open(im_path).convert('L')
open(f'{phase}_new.txt', 'a+').write(f'{im_path}\t{label}\n')
except FileNotFoundError:
print('ERROR: not found', im_path)
except OSError:
print('ERROR: corrupted image', im_path)
stage = pr.each(try_to_read, l, workers=int(sys.argv[2]))
pr.run(stage)
import sys
import text_renderer
import os
import random
import uuid
import cv2
# from pypeln import process as pr
from pypeln import thread as pr
# 100 copies, 30 cores
# python msynth.py 100 30
out_dir = './mjsynth/'
n_copy = int(sys.argv[1])
n_dir = 100
# for i in range(n_dir):
# idx = str(i+1)
# d = os.path.join(out_dir, idx)
# os.makedirs(d, exist_ok=True)
# open(os.path.join(out_dir, idx+'.txt'), 'w') # touch
words = list(map(lambda s: s[:-1], open('viet.txt').readlines()))[:-1] * n_copy
def gen_and_save(text):
d_id = str(random.randrange(0, n_dir)+1)
im_id = str(uuid.uuid4())
sys.stdout.write(d_id+' ')
im, label = text_renderer.gen(text)
if im is None:
return
im_path = os.path.join(d_id, im_id+'.jpg')
# im
cv2.imwrite(os.path.join(out_dir, im_path), im)
# label
open(os.path.join(out_dir, d_id+'.txt'), 'a+').write(f'{im_path}\t{label}\n')
stage = pr.each(gen_and_save, words, workers=int(sys.argv[2]))
pr.run(stage)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment