Skip to content

Instantly share code, notes, and snippets.

@davipatti
Created August 14, 2024 16:43
Show Gist options
  • Save davipatti/9f06ed8dae6c6da32d49d57a760d0e85 to your computer and use it in GitHub Desktop.
Save davipatti/9f06ed8dae6c6da32d49d57a760d0e85 to your computer and use it in GitHub Desktop.
Find maximal subsets
#!/usr/bin/env python3
from collections import defaultdict
from itertools import chain
def find_maximal_subsets(sets: list[set]) -> set[frozenset]:
"""
Find the maximal subsets of items that always appear together in a group of sets.
Args:
sets (list[set]): Group of sets.
Returns:
set[frozenset]: A set of frozensets representing the maximal subsets of items that always
appear together.
"""
# Step 1: Create a dictionary to map each item to the sets it appears in
item_to_sets = defaultdict(set)
for i, s in enumerate(sets):
for item in s:
item_to_sets[item].add(i)
# Step 2: Identify all possible combinations of items and track their occurrences
comb_to_sets = defaultdict(set)
for s in sets:
for size in range(2, len(s) + 1):
for comb in combinations(s, size):
comb_set = frozenset(comb)
for i, s2 in enumerate(sets):
if comb_set.issubset(s2):
comb_to_sets[comb_set].add(i)
# Step 3: Identify maximal sets of items that always appear together
valid_subsets = set()
for comb, indices in comb_to_sets.items():
if all(item_to_sets[item] == indices for item in comb):
valid_subsets.add(comb)
# Step 4: Filter out subsets that are part of larger valid subsets
maximal_subsets = set(valid_subsets)
for subset in valid_subsets:
for larger_subset in valid_subsets:
if subset != larger_subset and subset.issubset(larger_subset):
maximal_subsets.discard(subset)
# Step 5: Add singletons that are not part of any valid subset
all_items_in_subsets = set(chain.from_iterable(maximal_subsets))
all_items = set(item for s in sets for item in s)
singletons = {item for item in all_items if item not in all_items_in_subsets}
# Convert singletons to frozensets and add to maximal_subsets
maximal_subsets.update(frozenset([singleton]) for singleton in singletons)
return maximal_subsets
#!/usr/bin/env python3
import unittest
from find_maximal_subsets import find_maximal_subsets
class TestFindMaximalSubsets(unittest.TestCase):
def test_empty(self):
self.assertEqual(find_maximal_subsets([]), set())
def test_singletons(self):
self.assertEqual(find_maximal_subsets([{1}, {2}]), {frozenset([1]), frozenset([2])})
def test_pair_found_together(self):
"""
1 and 2 are always found together.
"""
result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}])
expect = {frozenset([1, 2]), frozenset([3]), frozenset([4])}
self.assertEqual(result, expect)
def test_pair_found_together_with_extra(self):
"""
1 and 2 are always found together, but sometimes with 5 as well.
"""
result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}, {1, 2, 5}])
expect = {frozenset([1, 2]), frozenset([3]), frozenset([4]), frozenset([5])}
self.assertEqual(result, expect)
def test_pair_not_always_found_together(self):
"""
Here F and G are sometimes found together, but F occurs alone once.
"""
result = find_maximal_subsets([{"F"}, {"F", "G"}, {"F", "G"}])
expect = {frozenset(["F"]), frozenset(["G"])}
self.assertEqual(result, expect)
def test_larger_example(self):
result = find_maximal_subsets([{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}])
expect = {
frozenset([1, 2]),
frozenset([4, 5]),
frozenset([3]),
frozenset([6]),
frozenset([7]),
}
self.assertEqual(result, expect)
def test_larger_example_with_group_of_5(self):
"""
Same as `test_larger_example` except here there's also a group of (8, 9, 10, 11, 12) that is
found together.
"""
result = find_maximal_subsets(
[{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}, {3, 8, 9, 10, 11, 12}]
)
expect = {
frozenset([1, 2]),
frozenset([4, 5]),
frozenset([3]),
frozenset([6]),
frozenset([7]),
frozenset([8, 9, 10, 11, 12]),
}
self.assertEqual(result, expect)
def test_larger_example_with_group_of_5_twice(self):
"""
Like `test_larger_example_with_group_of_5` except here the group of 5 occurs twice.
"""
result = find_maximal_subsets(
[
{1, 2, 3},
{1, 2},
{1, 2, 4, 5},
{4, 5, 6},
{6, 7},
{6, 7},
{3, 8, 9, 10, 11, 12},
{8, 9, 10, 11, 12, 1, 2},
]
)
expect = {
frozenset([1, 2]),
frozenset([4, 5]),
frozenset([3]),
frozenset([6]),
frozenset([7]),
frozenset([8, 9, 10, 11, 12]),
}
self.assertEqual(result, expect)
if __name__ == "__main__":
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment