Last active
March 2, 2017 02:13
-
-
Save hiszpanski/3e27e698e2df2307236f92ee313f82d9 to your computer and use it in GitHub Desktop.
Simple tool for pulling images from an S3 bucket and cropping them
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Tool for fetching and cropping sight images | |
""" | |
import boto3 | |
import os | |
import sys | |
# S3 client | |
s3 = boto3.client('s3') | |
# Current working directory | |
cwd = os.path.dirname(os.path.realpath(__file__)) | |
# Sub-directory into which to fetch images | |
fetchdir = os.path.join(cwd, 'fetch') | |
# Sub-directory into which to place sight crops | |
cropdir = os.path.join(cwd, 'crop') | |
def fetch(json, bucket): | |
"""Fetch image corresponding to JSON object from S3 bucket | |
Arguments: | |
json: JSON object returned by Sight | |
""" | |
# Create sub-directory, if does not exist | |
if not os.path.exists(fetchdir): | |
os.makedirs(fetchdir) | |
# Download file | |
s3.download_file(bucket, | |
"{serial_number}/{tag}".format( | |
serial_number=json['serial_number'], | |
tag=json['tag']), | |
os.path.join(fetchdir, json['tag'])) | |
def crop(json, min_confidence=0, min_height=0): | |
"""Crop objects from image corresponding to JSON object""" | |
from PIL import Image | |
# Create sub-directory, if does not exist | |
if not os.path.exists(cropdir): | |
os.makedirs(cropdir) | |
try: | |
# Open image | |
with Image.open(os.path.join(fetchdir, json['tag'])) as im: | |
# Get image dimensions (in pixels) | |
width, height = im.size | |
# Iterate through objects (i.e. boxes) | |
for i,o in enumerate(json['objects']): | |
# Skip objects whose confidence is less than threshold | |
if int(o['confidence']) < min_confidence: | |
continue | |
# Get object coordinates (converting from string) | |
x = float(o['x']) | |
y = float(o['y']) | |
w = float(o['width']) | |
h = float(o['height']) | |
# Convert coordinates from fractional to pixel representation | |
x0 = max(0, int((x - w / 2) * width)) | |
y0 = max(0, int((y - h / 2) * height)) | |
x1 = min(int((x + w / 2) * width), width-1) | |
y1 = min(int((y + h / 2) * height), height-1) | |
# Skip objects shorter than 100 pixels | |
if (y1 - y0 < min_height): | |
continue | |
# Compute new filename for object crop | |
name = os.path.join(cropdir, | |
'{filename}-{x0}_{y0}_{x1}_{y1}.{ext}'.format( | |
filename=os.path.splitext(json['tag'])[0], | |
x0=x0, | |
y0=y0, | |
x1=x1, | |
y1=y1, | |
ext=os.path.splitext(json['tag'])[1])) | |
# Crop and save | |
im.crop((x0, y0, x1, y1)).save(name) | |
# Image does not exist | |
except IOError: | |
print 'File {0} not found'.format(json['tag']) | |
except TypeError: | |
pass | |
if __name__ == '__main__': | |
from argparse import ArgumentParser | |
from json import loads | |
parser = ArgumentParser() | |
subparsers = parser.add_subparsers(dest='subcommand') | |
# Fetch sub-command parser | |
fetch_parser = subparsers.add_parser('fetch') | |
fetch_parser.add_argument('-b', '--bucket', type=str) | |
fetch_parser.add_argument('-mc', '--min-confidence', type=int) | |
fetch_parser.add_argument('logfile') | |
# Crop sub-command parser | |
crop_parser = subparsers.add_parser('crop') | |
crop_parser.add_argument('-mc', '--min-confidence', type=int) | |
crop_parser.add_argument('-mh', '--min-height', type=int) | |
crop_parser.add_argument('logfile') | |
args = parser.parse_args() | |
if args.subcommand == 'fetch': | |
with open(args.logfile) as log: | |
for line in log.readlines(): | |
_, _, _, _, _, data = line.split() | |
json = loads(data) | |
# Skip images without objects exceeding min confidence | |
if json['objects'] and len([o for o in json['objects'] if | |
int(o['confidence']) > args.min_confidence]) > 0: | |
fetch(json, args.bucket) | |
elif args.subcommand == 'crop': | |
with open(args.logfile) as log: | |
for line in log.readlines(): | |
_, _, _, _, _, data = line.split() | |
json = loads(data) | |
# Skip images without objects exceeding min confidence | |
if json['objects'] and len([o for o in json['objects'] if | |
int(o['confidence']) > args.min_confidence]) > 0: | |
crop(json, | |
min_confidence=args.min_confidence, | |
min_height=args.min_height) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment