Created
September 22, 2022 14:00
-
-
Save alanbchristie/25dca2d4d0b43e0f195d88007ecc095b to your computer and use it in GitHub Desktop.
Optimised group compression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """A recursive Python 3 utility to combine ('contract') a numerical sequence of | |
| 'objects' ensuring that combinations of consecutive objects only occurs once. | |
| The sequence of objects is referred to by a numerical index with a minimum value of | |
| '1' and a contiguous unbroken sequence of values up to 'N'. | |
| This optimises the calculation of different combinations by ensuring that | |
| no two consecutive combinations are computed more than once. The user is expected to | |
| iteratively combine 'M' groups of objects using different 'partitions'. | |
| Given a sequence of 100 'objects' and a maximum of 3 groups the user can calculate | |
| the combination of objects got the combinations of objects form 1-25, 26-80 and | |
| 81-100 like this: - | |
| >>> contractor = Contractor(num_groups=3, max_objects=100) | |
| >>> contraction_1 = contractor.contract(1, 25) | |
| >>> contraction_2 = contractor.contract(26, 80) | |
| >>> contraction_3 = contractor.contract(81, 100) | |
| The advantage of the implementation provided here is that the contraction calculations | |
| for any two consecutive object pairs will only be performed once, on the understanding | |
| that the actual contraction calculation is too computationally expensive to repeat. | |
| This example stores combinations in memory. | |
| """ | |
| # -------------------------------------------------------------------------------------- | |
| # MIT License | |
| # | |
| # Copyright (c) 2022 Alan B. Christie | |
| # | |
| # Permission is hereby granted, free of charge, to any person obtaining a copy | |
| # of this software and associated documentation files (the "Software"), to deal | |
| # in the Software without restriction, including without limitation the rights | |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| # copies of the Software, and to permit persons to whom the Software is | |
| # furnished to do so, subject to the following conditions: | |
| # | |
| # The above copyright notice and this permission notice shall be included in all | |
| # copies or substantial portions of the Software. | |
| # | |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| # SOFTWARE. | |
| # -------------------------------------------------------------------------------------- | |
| from typing import Any, Dict, Tuple | |
| def get_object(index: int) -> Any: | |
| """Returns the objects at the given index (1..n). In this trivial case | |
| the object is simply a number which is the value of the index. | |
| """ | |
| assert index > 0 | |
| return index | |
| def contract(a: Any, b: int) -> Any: | |
| """Application-specific contraction logic. | |
| Here we simply add the two values. They could be objects that | |
| are contracted in other ways. Regardless, contract them and return the result. | |
| """ | |
| return a + get_object(b) | |
| class Contractor: | |
| """The contractor is a class to optimise tha calculation of the combination of | |
| groups of objects ensuring no two consecutive calculations are performed | |
| more than once. | |
| """ | |
| def __init__(self, *, groups: int, objects: int): | |
| """Initialise the contractor. | |
| __contractions is a map of contractions indexed by the combination range's | |
| start and finish value, i.e. '1-7'. | |
| """ | |
| self.__contractions: Dict[str, int] = {} | |
| self.__num_groups: int = groups | |
| self.__max_objects: int = objects | |
| # Start by build the 'backwards' contractions, e.g. for | |
| # a sequence with max_position of 10 and 3 groups | |
| # we build 7 contractions 9-10, 8-10, 7-10, 6-10, 5-10, 4-10, 3-10 | |
| # The 'start' for the final group cannot be less than groups, | |
| # i.e. if we have 3 groups then the earliest group 3 can start | |
| # is at index 3. | |
| start: int = groups | |
| finish: int = objects | |
| _ = self.contract(start, finish, reverse=True) | |
| def contract(self, a: int, b: int, *, reverse: bool = False) -> Any: | |
| """Calculate the contraction for the contiguous sequence | |
| 'a' to 'b' (inclusive). 'a' must be greater than 0 and not greater than 'b' | |
| and 'b' must be less than or equal to the 'max_objects'. | |
| 'reverse' build the right-hand side, and is run automatically | |
| when the object is initialised. | |
| """ | |
| assert a > 0 | |
| assert a <= b | |
| assert b <= self.__max_objects | |
| if a == b: | |
| # Only one member in the group. | |
| # Nothing to calculate, nothing to store. | |
| return get_object(a) | |
| target: str = f"{a}-{b}" | |
| if target in self.__contractions: | |
| # We've already calculated this range (a-b) | |
| return self.__contractions[target] | |
| if a + 1 == b: | |
| # A new consecutive pair (e.g. 4-5) | |
| # Doesn't matter if it's reverse or not | |
| value = contract(get_object(a), b) | |
| else: | |
| # A new non-consecutive range (e.g. 1-5) | |
| if reverse: | |
| # Shrink from the left | |
| new_a: int = a + 1 | |
| value = contract(self.contract(new_a, b, reverse=True), a) | |
| else: | |
| # Shrink from the right | |
| new_b: int = b - 1 | |
| value = contract(self.contract(a, new_b), b) | |
| self.__contractions[target] = value | |
| return value | |
| @property | |
| def size(self): | |
| """Return the size of the object. | |
| """ | |
| return len(self.__contractions) | |
| if __name__ == "__main__": | |
| # Prepare a contractor for three groups and a sequence of 1 to 100. | |
| # This results in 97 initial contractions. | |
| num_groups: int = 3 | |
| max_objects: int = 100 | |
| c: Contractor = Contractor(groups=num_groups, objects=max_objects) | |
| assert c.size == 97 | |
| # Now just calculate contraction groups. | |
| # No contraction combination should be calculated more than once. | |
| # Last (largest) group should already be done, | |
| # i.e. calculating anything from the last group shouldn't incur any | |
| # additional contractions... | |
| a: int = num_groups | |
| b: int = max_objects | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 5_047 | |
| assert c.size == 97 | |
| # In the next call (the largest left-hand sequence for a given number of | |
| # groups) will result in a large number of contractions being built. | |
| a: int = 1 | |
| b: int = max_objects - num_groups + 1 | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 4_851 | |
| assert c.size == 194 | |
| # Do the same thing again - no new contraction will be calculated, | |
| # we've done this sequence already. | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 4_851 | |
| assert c.size == 194 | |
| # Here sequence 4 to 10 has not been seen before | |
| # so 6 new contractions need to be calculated (4-5, 4-6, 4-7, 4-8, 4-9, 4-10) | |
| a = 4 | |
| b = 10 | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 49 | |
| assert c.size == 200 | |
| # Getting same group again but nothing is needed. | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 49 | |
| assert c.size == 200 | |
| # Here sequence 3 to 9 has not been seen before | |
| # so 4 new contractions need to be calculated (3-4, 3-5, 3-6, 3-7) | |
| a = 3 | |
| b = 7 | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 25 | |
| assert c.size == 204 | |
| # We've done 4-7 earlier... | |
| a = 4 | |
| b = 7 | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 22 | |
| assert c.size == 204 | |
| # We have not done 5-7 (that will add two new contractions)... | |
| a = 5 | |
| b = 7 | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == 18 | |
| assert c.size == 206 | |
| # Groups of 1 result in no contractions | |
| # Contract something we've not seen before | |
| a = max_objects | |
| b = max_objects | |
| result = c.contract(a, b) | |
| print(f"{a}-{b} = {result} ({c.size} contractions)") | |
| assert result == max_objects | |
| assert c.size == 206 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment