Last active
May 16, 2019 15:47
-
-
Save precious/cf978a70191d3f6fcfae18862ff64a08 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from shapely.geometry import Polygon, Point | |
from PIL import Image | |
import os | |
import sys | |
import csv | |
import pickle | |
import json | |
import random | |
import string | |
try: | |
source_data_file, image_base_dir, output_dir = sys.argv[1:] | |
except ValueError: | |
usage = '''=== | |
usage: python {0} <source_data_file> <image_base_dir> <output_dir> | |
--- | |
example 1: python3 {0} "/Users/vsevolodkulaga/Downloads/36a76ac517d1ae845c1b63e74a4cbf1aa2222a4e_P13 (1).txt" \ | |
/Users/vsevolodkulaga/projects/ct /Users/vsevolodkulaga/projects/ct/out | |
(so if your relative image path is "images/13/00021.png" and its absolute path is \ | |
"/Users/vsevolodkulaga/projects/ct/images/13/00021.png", you should specify image_base_dir \ | |
parameter "/Users/vsevolodkulaga/projects/ct") | |
==='''.format(sys.argv[0]) | |
print(usage, file=sys.stderr) | |
sys.exit(1) | |
def make_image_array(image_path): | |
image = Image.open(image_path) | |
image_array_2d = np.asarray(image) | |
if len(image_array_2d.shape) > 2: # need to convert to grayscale | |
image_array_2d = np.asarray(image.convert('L')) | |
return image_array_2d | |
def process_polygon(polygon_vertices, shape, target_array_2d=None): | |
polygon = Polygon(polygon_vertices) | |
minx, miny, maxx, maxy = polygon.bounds | |
if target_array_2d is None: | |
target_array_2d = np.zeros(shape).astype(np.int8) | |
for i in range(target_array_2d.shape[0]): | |
for j in range(target_array_2d.shape[1]): | |
if minx < i < maxx and miny < j < maxy: | |
target_array_2d[i][j] = int(polygon.contains(Point(i, j))) | |
return target_array_2d | |
def main(): | |
header = ['number', 'name', 'image_relative_path', 'json_data', 'timestamp'] | |
if not os.path.isdir(image_base_dir): | |
print('ERROR!', image_base_dir, 'is not a directory!', file=sys.stderr) | |
sys.exit(1) | |
with open(source_data_file) as input_file: | |
images_dicts = {} | |
reader = csv.DictReader(input_file, header, delimiter=';') | |
for row in reader: | |
data = json.loads(row['json_data']) | |
image_dict_key = row['image_relative_path'].replace('/', '_') | |
current_image_dict = images_dicts.setdefault(image_dict_key, {}) | |
if data['type'] == 'global': | |
current_image_dict['label'] = data['label'] | |
elif data['type'] == 'polygon': | |
print('processing polygon with label "{0}" for {1}'.format(data['label'], row['image_relative_path'])) | |
image_full_path = os.path.join(image_base_dir, row['image_relative_path']) | |
if not os.path.isfile(image_full_path): | |
print('ERROR!', 'No such file: "{0}". Is this a correct images dir: "{1}"?'.format(image_full_path, | |
image_base_dir), | |
file=sys.stderr) | |
sys.exit(1) | |
if 'volume' not in current_image_dict: | |
current_image_dict['volume'] = make_image_array(image_full_path) | |
current_image_dict[data['label']] = process_polygon(data['data'], | |
current_image_dict['volume'].shape, | |
current_image_dict.get(data['label'])) | |
else: | |
print('skipping unknown record type:', data['type'], file=sys.stderr) | |
# write everything to files | |
random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=7)) | |
for image_dict_key, current_image_dict in images_dicts.items(): | |
output_path = os.path.join(output_dir, '{0}__{1}.pkl'.format(image_dict_key, random_suffix)) | |
with open(output_path, 'wb') as output_file: | |
pickle.dump(current_image_dict, output_file) | |
print('=> generated file', output_path) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment