Created
August 14, 2024 16:43
-
-
Save davipatti/9f06ed8dae6c6da32d49d57a760d0e85 to your computer and use it in GitHub Desktop.
Find maximal subsets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import defaultdict | |
from itertools import chain | |
def find_maximal_subsets(sets: list[set]) -> set[frozenset]: | |
""" | |
Find the maximal subsets of items that always appear together in a group of sets. | |
Args: | |
sets (list[set]): Group of sets. | |
Returns: | |
set[frozenset]: A set of frozensets representing the maximal subsets of items that always | |
appear together. | |
""" | |
# Step 1: Create a dictionary to map each item to the sets it appears in | |
item_to_sets = defaultdict(set) | |
for i, s in enumerate(sets): | |
for item in s: | |
item_to_sets[item].add(i) | |
# Step 2: Identify all possible combinations of items and track their occurrences | |
comb_to_sets = defaultdict(set) | |
for s in sets: | |
for size in range(2, len(s) + 1): | |
for comb in combinations(s, size): | |
comb_set = frozenset(comb) | |
for i, s2 in enumerate(sets): | |
if comb_set.issubset(s2): | |
comb_to_sets[comb_set].add(i) | |
# Step 3: Identify maximal sets of items that always appear together | |
valid_subsets = set() | |
for comb, indices in comb_to_sets.items(): | |
if all(item_to_sets[item] == indices for item in comb): | |
valid_subsets.add(comb) | |
# Step 4: Filter out subsets that are part of larger valid subsets | |
maximal_subsets = set(valid_subsets) | |
for subset in valid_subsets: | |
for larger_subset in valid_subsets: | |
if subset != larger_subset and subset.issubset(larger_subset): | |
maximal_subsets.discard(subset) | |
# Step 5: Add singletons that are not part of any valid subset | |
all_items_in_subsets = set(chain.from_iterable(maximal_subsets)) | |
all_items = set(item for s in sets for item in s) | |
singletons = {item for item in all_items if item not in all_items_in_subsets} | |
# Convert singletons to frozensets and add to maximal_subsets | |
maximal_subsets.update(frozenset([singleton]) for singleton in singletons) | |
return maximal_subsets |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import unittest | |
from find_maximal_subsets import find_maximal_subsets | |
class TestFindMaximalSubsets(unittest.TestCase): | |
def test_empty(self): | |
self.assertEqual(find_maximal_subsets([]), set()) | |
def test_singletons(self): | |
self.assertEqual(find_maximal_subsets([{1}, {2}]), {frozenset([1]), frozenset([2])}) | |
def test_pair_found_together(self): | |
""" | |
1 and 2 are always found together. | |
""" | |
result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}]) | |
expect = {frozenset([1, 2]), frozenset([3]), frozenset([4])} | |
self.assertEqual(result, expect) | |
def test_pair_found_together_with_extra(self): | |
""" | |
1 and 2 are always found together, but sometimes with 5 as well. | |
""" | |
result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}, {1, 2, 5}]) | |
expect = {frozenset([1, 2]), frozenset([3]), frozenset([4]), frozenset([5])} | |
self.assertEqual(result, expect) | |
def test_pair_not_always_found_together(self): | |
""" | |
Here F and G are sometimes found together, but F occurs alone once. | |
""" | |
result = find_maximal_subsets([{"F"}, {"F", "G"}, {"F", "G"}]) | |
expect = {frozenset(["F"]), frozenset(["G"])} | |
self.assertEqual(result, expect) | |
def test_larger_example(self): | |
result = find_maximal_subsets([{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}]) | |
expect = { | |
frozenset([1, 2]), | |
frozenset([4, 5]), | |
frozenset([3]), | |
frozenset([6]), | |
frozenset([7]), | |
} | |
self.assertEqual(result, expect) | |
def test_larger_example_with_group_of_5(self): | |
""" | |
Same as `test_larger_example` except here there's also a group of (8, 9, 10, 11, 12) that is | |
found together. | |
""" | |
result = find_maximal_subsets( | |
[{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}, {3, 8, 9, 10, 11, 12}] | |
) | |
expect = { | |
frozenset([1, 2]), | |
frozenset([4, 5]), | |
frozenset([3]), | |
frozenset([6]), | |
frozenset([7]), | |
frozenset([8, 9, 10, 11, 12]), | |
} | |
self.assertEqual(result, expect) | |
def test_larger_example_with_group_of_5_twice(self): | |
""" | |
Like `test_larger_example_with_group_of_5` except here the group of 5 occurs twice. | |
""" | |
result = find_maximal_subsets( | |
[ | |
{1, 2, 3}, | |
{1, 2}, | |
{1, 2, 4, 5}, | |
{4, 5, 6}, | |
{6, 7}, | |
{6, 7}, | |
{3, 8, 9, 10, 11, 12}, | |
{8, 9, 10, 11, 12, 1, 2}, | |
] | |
) | |
expect = { | |
frozenset([1, 2]), | |
frozenset([4, 5]), | |
frozenset([3]), | |
frozenset([6]), | |
frozenset([7]), | |
frozenset([8, 9, 10, 11, 12]), | |
} | |
self.assertEqual(result, expect) | |
if __name__ == "__main__": | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment