rmela · August 28, 2020 03:22
diff --git a/coding challenge b/coding challenge
 Coding challenge.

 Create a DataCapture class that queries statistics about an input stream of numbers.

 Assume that the input stream consists of positive integers in the range 0 to 1000.

 Class must implement the following methods:

    add(n) - add an input element to collection in O(1) time
    build() - create summary information on the collection in O(log(n)) time
    less(n) - report the number of input elements less than N in O(1) time
    greater(n) - report the number of input elements less than N in O(1) time
    between(n1,n2) - report the number of input between N1 and N1 in O(1) time

 I actually completed this one in well under the required time because it's not that far
 off from things I sometimes need to do at work.

 To run the code:

  python3 runme.py

 or

  python3 test.py
diff --git a/solution.py b/solution.py
 import collections

 class Stats:
    def __init__(self):
        self.occurrences = 0
        self.greater = 0
        self.less = 0

    def __str__(self):
        return "occurrences={0} greater={1} less={2}".format( self.occurrences, self.greater, self.less )

 class DataCapture:
    def __init__(self):
        self.stats = collections.defaultdict( Stats )
        self.size = 0
        self.max = 0

    def add(self, n):
        """Add a number to the collection to be analyzed"""
        stats = self.stats[n]
        stats.occurrences += 1
        self.size += 1
        if n > self.max:
            self.max = n

    def build_stats( self ):
        """Generate cached statistics information for O(1) statistics queries"""
        less = 0
        greater = self.size
        for n in range( self.max + 1  ):
            stats = self.stats[n]
            greater = greater - stats.occurrences
            stats.greater = greater
            stats.less = less
            less = less + stats.occurrences
            
    def __str__(self):
        """Print string representation of built DataCapture stats.   Not practical for much beyond coding challenge example"""
        arr = map( lambda key: "n={0} {1}".format( key, self.stats[key]  ), sorted(self.stats ) )
        return "\n".join(arr)

    def between(self,start,finish):
        """Return the number of entries with a value between start and finish"""
        start = self.stats[start]
        finish = self.stats[finish]
        return self.size - finish.greater - start.less

    def less( self, n ):
        """Return the number of entries with a value less than n"""
        return self.stats[n].less

    def greater( self, n ):
        """Return the number of entries with a value greater than n"""
        return self.stats[n].greater

diff --git a/test.py b/test.py
 #!/usr/bin/env python3
 import sys
 sys.path.append('.')
 import unittest
 import stats



 def createDataCollector( datasource ):
    dc = stats.DataCapture()
    for n in map(int, datasource): dc.add( n )
    dc.build_stats()
    return dc

 class Foo( unittest.TestCase ):

    def setUp(self):
        self.dc = createDataCollector( [3, 9, 3, 4, 6 ] )

    def test_less(self):
        self.assertEqual( self.dc.less(4), 2 )

    def test_greater(self):
        self.assertEqual( self.dc.greater(5), 2 )

    def test_between(self):
        self.assertEqual( self.dc.between( 3, 6 ), 4  )

 if __name__ == '__main__':
    unittest.main()
	Coding challenge.

	Create a DataCapture class that queries statistics about an input stream of numbers.

	Assume that the input stream consists of positive integers in the range 0 to 1000.

	Class must implement the following methods:

	add(n) - add an input element to collection in O(1) time
	build() - create summary information on the collection in O(log(n)) time
	less(n) - report the number of input elements less than N in O(1) time
	greater(n) - report the number of input elements less than N in O(1) time
	between(n1,n2) - report the number of input between N1 and N1 in O(1) time

	I actually completed this one in well under the required time because it's not that far
	off from things I sometimes need to do at work.

	To run the code:

	python3 runme.py

	or

	python3 test.py
	import collections

	class Stats:
	def __init__(self):
	self.occurrences = 0
	self.greater = 0
	self.less = 0

	def __str__(self):
	return "occurrences={0} greater={1} less={2}".format( self.occurrences, self.greater, self.less )

	class DataCapture:
	def __init__(self):
	self.stats = collections.defaultdict( Stats )
	self.size = 0
	self.max = 0

	def add(self, n):
	"""Add a number to the collection to be analyzed"""
	stats = self.stats[n]
	stats.occurrences += 1
	self.size += 1
	if n > self.max:
	self.max = n

	def build_stats( self ):
	"""Generate cached statistics information for O(1) statistics queries"""
	less = 0
	greater = self.size
	for n in range( self.max + 1 ):
	stats = self.stats[n]
	greater = greater - stats.occurrences
	stats.greater = greater
	stats.less = less
	less = less + stats.occurrences

	def __str__(self):
	"""Print string representation of built DataCapture stats. Not practical for much beyond coding challenge example"""
	arr = map( lambda key: "n={0} {1}".format( key, self.stats[key] ), sorted(self.stats ) )
	return "\n".join(arr)

	def between(self,start,finish):
	"""Return the number of entries with a value between start and finish"""
	start = self.stats[start]
	finish = self.stats[finish]
	return self.size - finish.greater - start.less

	def less( self, n ):
	"""Return the number of entries with a value less than n"""
	return self.stats[n].less

	def greater( self, n ):
	"""Return the number of entries with a value greater than n"""
	return self.stats[n].greater
	#!/usr/bin/env python3
	import sys
	sys.path.append('.')
	import unittest
	import stats



	def createDataCollector( datasource ):
	dc = stats.DataCapture()
	for n in map(int, datasource): dc.add( n )
	dc.build_stats()
	return dc

	class Foo( unittest.TestCase ):

	def setUp(self):
	self.dc = createDataCollector( [3, 9, 3, 4, 6 ] )

	def test_less(self):
	self.assertEqual( self.dc.less(4), 2 )

	def test_greater(self):
	self.assertEqual( self.dc.greater(5), 2 )

	def test_between(self):
	self.assertEqual( self.dc.between( 3, 6 ), 4 )

	if __name__ == '__main__':
	unittest.main()