Nelsonochoam · August 28, 2017 21:15
diff --git a/coding_challenge.py b/coding_challenge.py
 """
 Osprey Code Challenge

 We have a report of ~1000 images taken by 10 cameras in the past month.
 Each image metadata object contains a 'camera_id' and 'size' property.
 For example:

    {
        "camera_id": 1,
        "size": 42048
    }

 We would like to be able to query this data to answer some simple
 questions:
    - Which cameras have used the most data?
    - Which cameras have the highest number of images?
    - What are the largest images per camera?

 Please create an ImageManager class to support these needs. The class
 should take in a path to a local JSON file when instantiated. The
 ImageManager class should implement the following methods:
    - get_cameras_with_most_images(reverse=False): returns an iterable
        of cameras sorted by the number of images taken per camera
        (descending).
    - get_cameras_with_most_data_usage(reverse=False): returns an
        iterable of cameras sorted by the sum of sizes of images
        taken per camera (descending).
    - get_largest_images(reverse=False): returns an iterable of the
        largest images taken by each camera, sorted by size (descending).
        Note that only one image will be returned per camera.

 Each method takes an optional reverse keyword argument. If set to True, the
 iterable returned will be in reversed order.

 Please do your best to consider performance and legibility in your
 implementation. Assume that the solution might be used on datasets up to
 10 million images. Additionally, assume that this code will be going in
 to production.  You may choose to use either Python 2.7.x or Python
 3.x in your solution.

 Please provide unit tests to confirm your solution works.
 """

 import json
 from collections import Counter, defaultdict


 # Write your implementation here:
 class ImageManager(object):

    def __init__(self, path):
        with open(path) as p:
            self.data = json.load(p)['images']

    def get_cameras_with_most_images(self, reverse=False):
        dic = defaultdict(Counter)
        for item in self.data:
            key = item['camera_id']
            dic[key].update('count')

        iterable = [{'camera_id': k, 'count': v.values()[0]} for k, v in dic.iteritems()]
        return sorted(iterable, key=lambda k: k['count'] if reverse else -k['count'])

    def get_cameras_with_most_data_usage(self, reverse=False):
        dic = defaultdict(Counter)
        for item in self.data:
            key = item['camera_id']
            dic[key].update({key: item['size']})

        iterable = [{'camera_id': k, 'size': v.values()[0]} for k, v in dic.iteritems()]
        return sorted(iterable, key=lambda k: k['size'] if reverse else -k['size'])

    def get_largest_images(self, reverse=False):
        dic = {}
        for item in self.data:
            key = item['camera_id']
            dic[key] = item['size'] if dic.get(key, 0) < item['size'] else dic.get(key, 0)

        iterable = [{'camera_id': k, 'size': v} for k, v in dic.iteritems()]
        return sorted(iterable, key=lambda k: k['size'] if reverse else -k['size'])
	"""
	Osprey Code Challenge

	We have a report of ~1000 images taken by 10 cameras in the past month.
	Each image metadata object contains a 'camera_id' and 'size' property.
	For example:

	{
	"camera_id": 1,
	"size": 42048
	}

	We would like to be able to query this data to answer some simple
	questions:
	- Which cameras have used the most data?
	- Which cameras have the highest number of images?
	- What are the largest images per camera?

	Please create an ImageManager class to support these needs. The class
	should take in a path to a local JSON file when instantiated. The
	ImageManager class should implement the following methods:
	- get_cameras_with_most_images(reverse=False): returns an iterable
	of cameras sorted by the number of images taken per camera
	(descending).
	- get_cameras_with_most_data_usage(reverse=False): returns an
	iterable of cameras sorted by the sum of sizes of images
	taken per camera (descending).
	- get_largest_images(reverse=False): returns an iterable of the
	largest images taken by each camera, sorted by size (descending).
	Note that only one image will be returned per camera.

	Each method takes an optional reverse keyword argument. If set to True, the
	iterable returned will be in reversed order.

	Please do your best to consider performance and legibility in your
	implementation. Assume that the solution might be used on datasets up to
	10 million images. Additionally, assume that this code will be going in
	to production. You may choose to use either Python 2.7.x or Python
	3.x in your solution.

	Please provide unit tests to confirm your solution works.
	"""

	import json
	from collections import Counter, defaultdict


	# Write your implementation here:
	class ImageManager(object):

	def __init__(self, path):
	with open(path) as p:
	self.data = json.load(p)['images']

	def get_cameras_with_most_images(self, reverse=False):
	dic = defaultdict(Counter)
	for item in self.data:
	key = item['camera_id']
	dic[key].update('count')

	iterable = [{'camera_id': k, 'count': v.values()[0]} for k, v in dic.iteritems()]
	return sorted(iterable, key=lambda k: k['count'] if reverse else -k['count'])

	def get_cameras_with_most_data_usage(self, reverse=False):
	dic = defaultdict(Counter)
	for item in self.data:
	key = item['camera_id']
	dic[key].update({key: item['size']})

	iterable = [{'camera_id': k, 'size': v.values()[0]} for k, v in dic.iteritems()]
	return sorted(iterable, key=lambda k: k['size'] if reverse else -k['size'])

	def get_largest_images(self, reverse=False):
	dic = {}
	for item in self.data:
	key = item['camera_id']
	dic[key] = item['size'] if dic.get(key, 0) < item['size'] else dic.get(key, 0)

	iterable = [{'camera_id': k, 'size': v} for k, v in dic.iteritems()]
	return sorted(iterable, key=lambda k: k['size'] if reverse else -k['size'])