Skip to content

Instantly share code, notes, and snippets.

@hiszpanski
Last active March 2, 2017 02:13
Show Gist options
  • Save hiszpanski/3e27e698e2df2307236f92ee313f82d9 to your computer and use it in GitHub Desktop.
Save hiszpanski/3e27e698e2df2307236f92ee313f82d9 to your computer and use it in GitHub Desktop.
Simple tool for pulling images from an S3 bucket and cropping them
"""
Tool for fetching and cropping sight images
"""
import boto3
import os
import sys
# S3 client
s3 = boto3.client('s3')
# Current working directory
cwd = os.path.dirname(os.path.realpath(__file__))
# Sub-directory into which to fetch images
fetchdir = os.path.join(cwd, 'fetch')
# Sub-directory into which to place sight crops
cropdir = os.path.join(cwd, 'crop')
def fetch(json, bucket):
"""Fetch image corresponding to JSON object from S3 bucket
Arguments:
json: JSON object returned by Sight
"""
# Create sub-directory, if does not exist
if not os.path.exists(fetchdir):
os.makedirs(fetchdir)
# Download file
s3.download_file(bucket,
"{serial_number}/{tag}".format(
serial_number=json['serial_number'],
tag=json['tag']),
os.path.join(fetchdir, json['tag']))
def crop(json, min_confidence=0, min_height=0):
"""Crop objects from image corresponding to JSON object"""
from PIL import Image
# Create sub-directory, if does not exist
if not os.path.exists(cropdir):
os.makedirs(cropdir)
try:
# Open image
with Image.open(os.path.join(fetchdir, json['tag'])) as im:
# Get image dimensions (in pixels)
width, height = im.size
# Iterate through objects (i.e. boxes)
for i,o in enumerate(json['objects']):
# Skip objects whose confidence is less than threshold
if int(o['confidence']) < min_confidence:
continue
# Get object coordinates (converting from string)
x = float(o['x'])
y = float(o['y'])
w = float(o['width'])
h = float(o['height'])
# Convert coordinates from fractional to pixel representation
x0 = max(0, int((x - w / 2) * width))
y0 = max(0, int((y - h / 2) * height))
x1 = min(int((x + w / 2) * width), width-1)
y1 = min(int((y + h / 2) * height), height-1)
# Skip objects shorter than 100 pixels
if (y1 - y0 < min_height):
continue
# Compute new filename for object crop
name = os.path.join(cropdir,
'{filename}-{x0}_{y0}_{x1}_{y1}.{ext}'.format(
filename=os.path.splitext(json['tag'])[0],
x0=x0,
y0=y0,
x1=x1,
y1=y1,
ext=os.path.splitext(json['tag'])[1]))
# Crop and save
im.crop((x0, y0, x1, y1)).save(name)
# Image does not exist
except IOError:
print 'File {0} not found'.format(json['tag'])
except TypeError:
pass
if __name__ == '__main__':
from argparse import ArgumentParser
from json import loads
parser = ArgumentParser()
subparsers = parser.add_subparsers(dest='subcommand')
# Fetch sub-command parser
fetch_parser = subparsers.add_parser('fetch')
fetch_parser.add_argument('-b', '--bucket', type=str)
fetch_parser.add_argument('-mc', '--min-confidence', type=int)
fetch_parser.add_argument('logfile')
# Crop sub-command parser
crop_parser = subparsers.add_parser('crop')
crop_parser.add_argument('-mc', '--min-confidence', type=int)
crop_parser.add_argument('-mh', '--min-height', type=int)
crop_parser.add_argument('logfile')
args = parser.parse_args()
if args.subcommand == 'fetch':
with open(args.logfile) as log:
for line in log.readlines():
_, _, _, _, _, data = line.split()
json = loads(data)
# Skip images without objects exceeding min confidence
if json['objects'] and len([o for o in json['objects'] if
int(o['confidence']) > args.min_confidence]) > 0:
fetch(json, args.bucket)
elif args.subcommand == 'crop':
with open(args.logfile) as log:
for line in log.readlines():
_, _, _, _, _, data = line.split()
json = loads(data)
# Skip images without objects exceeding min confidence
if json['objects'] and len([o for o in json['objects'] if
int(o['confidence']) > args.min_confidence]) > 0:
crop(json,
min_confidence=args.min_confidence,
min_height=args.min_height)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment