davipatti · August 14, 2024 16:43
diff --git a/find_maximal_subsets.py b/find_maximal_subsets.py
 #!/usr/bin/env python3

 from collections import defaultdict
 from itertools import chain


 def find_maximal_subsets(sets: list[set]) -> set[frozenset]:
    """
    Find the maximal subsets of items that always appear together in a group of sets.

    Args:
        sets (list[set]): Group of sets.

    Returns:
        set[frozenset]: A set of frozensets representing the maximal subsets of items that always
            appear together.
    """
    # Step 1: Create a dictionary to map each item to the sets it appears in
    item_to_sets = defaultdict(set)
    for i, s in enumerate(sets):
        for item in s:
            item_to_sets[item].add(i)

    # Step 2: Identify all possible combinations of items and track their occurrences
    comb_to_sets = defaultdict(set)

    for s in sets:
        for size in range(2, len(s) + 1):
            for comb in combinations(s, size):
                comb_set = frozenset(comb)
                for i, s2 in enumerate(sets):
                    if comb_set.issubset(s2):
                        comb_to_sets[comb_set].add(i)

    # Step 3: Identify maximal sets of items that always appear together
    valid_subsets = set()
    for comb, indices in comb_to_sets.items():
        if all(item_to_sets[item] == indices for item in comb):
            valid_subsets.add(comb)

    # Step 4: Filter out subsets that are part of larger valid subsets
    maximal_subsets = set(valid_subsets)
    for subset in valid_subsets:
        for larger_subset in valid_subsets:
            if subset != larger_subset and subset.issubset(larger_subset):
                maximal_subsets.discard(subset)

    # Step 5: Add singletons that are not part of any valid subset
    all_items_in_subsets = set(chain.from_iterable(maximal_subsets))
    all_items = set(item for s in sets for item in s)
    singletons = {item for item in all_items if item not in all_items_in_subsets}

    # Convert singletons to frozensets and add to maximal_subsets
    maximal_subsets.update(frozenset([singleton]) for singleton in singletons)

    return maximal_subsets
diff --git a/test.py b/test.py
 #!/usr/bin/env python3

 import unittest
 from find_maximal_subsets import find_maximal_subsets


 class TestFindMaximalSubsets(unittest.TestCase):
    def test_empty(self):
        self.assertEqual(find_maximal_subsets([]), set())

    def test_singletons(self):
        self.assertEqual(find_maximal_subsets([{1}, {2}]), {frozenset([1]), frozenset([2])})

    def test_pair_found_together(self):
        """
        1 and 2 are always found together.
        """
        result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}])
        expect = {frozenset([1, 2]), frozenset([3]), frozenset([4])}
        self.assertEqual(result, expect)

    def test_pair_found_together_with_extra(self):
        """
        1 and 2 are always found together, but sometimes with 5 as well.
        """
        result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}, {1, 2, 5}])
        expect = {frozenset([1, 2]), frozenset([3]), frozenset([4]), frozenset([5])}
        self.assertEqual(result, expect)

    def test_pair_not_always_found_together(self):
        """
        Here F and G are sometimes found together, but F occurs alone once.
        """
        result = find_maximal_subsets([{"F"}, {"F", "G"}, {"F", "G"}])
        expect = {frozenset(["F"]), frozenset(["G"])}
        self.assertEqual(result, expect)

    def test_larger_example(self):
        result = find_maximal_subsets([{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}])
        expect = {
            frozenset([1, 2]),
            frozenset([4, 5]),
            frozenset([3]),
            frozenset([6]),
            frozenset([7]),
        }
        self.assertEqual(result, expect)

    def test_larger_example_with_group_of_5(self):
        """
        Same as `test_larger_example` except here there's also a group of (8, 9, 10, 11, 12) that is
        found together.
        """
        result = find_maximal_subsets(
            [{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}, {3, 8, 9, 10, 11, 12}]
        )
        expect = {
            frozenset([1, 2]),
            frozenset([4, 5]),
            frozenset([3]),
            frozenset([6]),
            frozenset([7]),
            frozenset([8, 9, 10, 11, 12]),
        }
        self.assertEqual(result, expect)

    def test_larger_example_with_group_of_5_twice(self):
        """
        Like `test_larger_example_with_group_of_5` except here the group of 5 occurs twice.
        """
        result = find_maximal_subsets(
            [
                {1, 2, 3},
                {1, 2},
                {1, 2, 4, 5},
                {4, 5, 6},
                {6, 7},
                {6, 7},
                {3, 8, 9, 10, 11, 12},
                {8, 9, 10, 11, 12, 1, 2},
            ]
        )
        expect = {
            frozenset([1, 2]),
            frozenset([4, 5]),
            frozenset([3]),
            frozenset([6]),
            frozenset([7]),
            frozenset([8, 9, 10, 11, 12]),
        }
        self.assertEqual(result, expect)

 if __name__ == "__main__":
    unittest.main()
	#!/usr/bin/env python3

	from collections import defaultdict
	from itertools import chain


	def find_maximal_subsets(sets: list[set]) -> set[frozenset]:
	"""
	Find the maximal subsets of items that always appear together in a group of sets.

	Args:
	sets (list[set]): Group of sets.

	Returns:
	set[frozenset]: A set of frozensets representing the maximal subsets of items that always
	appear together.
	"""
	# Step 1: Create a dictionary to map each item to the sets it appears in
	item_to_sets = defaultdict(set)
	for i, s in enumerate(sets):
	for item in s:
	item_to_sets[item].add(i)

	# Step 2: Identify all possible combinations of items and track their occurrences
	comb_to_sets = defaultdict(set)

	for s in sets:
	for size in range(2, len(s) + 1):
	for comb in combinations(s, size):
	comb_set = frozenset(comb)
	for i, s2 in enumerate(sets):
	if comb_set.issubset(s2):
	comb_to_sets[comb_set].add(i)

	# Step 3: Identify maximal sets of items that always appear together
	valid_subsets = set()
	for comb, indices in comb_to_sets.items():
	if all(item_to_sets[item] == indices for item in comb):
	valid_subsets.add(comb)

	# Step 4: Filter out subsets that are part of larger valid subsets
	maximal_subsets = set(valid_subsets)
	for subset in valid_subsets:
	for larger_subset in valid_subsets:
	if subset != larger_subset and subset.issubset(larger_subset):
	maximal_subsets.discard(subset)

	# Step 5: Add singletons that are not part of any valid subset
	all_items_in_subsets = set(chain.from_iterable(maximal_subsets))
	all_items = set(item for s in sets for item in s)
	singletons = {item for item in all_items if item not in all_items_in_subsets}

	# Convert singletons to frozensets and add to maximal_subsets
	maximal_subsets.update(frozenset([singleton]) for singleton in singletons)

	return maximal_subsets
	#!/usr/bin/env python3

	import unittest
	from find_maximal_subsets import find_maximal_subsets


	class TestFindMaximalSubsets(unittest.TestCase):
	def test_empty(self):
	self.assertEqual(find_maximal_subsets([]), set())

	def test_singletons(self):
	self.assertEqual(find_maximal_subsets([{1}, {2}]), {frozenset([1]), frozenset([2])})

	def test_pair_found_together(self):
	"""
	1 and 2 are always found together.
	"""
	result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}])
	expect = {frozenset([1, 2]), frozenset([3]), frozenset([4])}
	self.assertEqual(result, expect)

	def test_pair_found_together_with_extra(self):
	"""
	1 and 2 are always found together, but sometimes with 5 as well.
	"""
	result = find_maximal_subsets([{1, 2}, {1, 2}, {3}, {4}, {1, 2, 5}])
	expect = {frozenset([1, 2]), frozenset([3]), frozenset([4]), frozenset([5])}
	self.assertEqual(result, expect)

	def test_pair_not_always_found_together(self):
	"""
	Here F and G are sometimes found together, but F occurs alone once.
	"""
	result = find_maximal_subsets([{"F"}, {"F", "G"}, {"F", "G"}])
	expect = {frozenset(["F"]), frozenset(["G"])}
	self.assertEqual(result, expect)

	def test_larger_example(self):
	result = find_maximal_subsets([{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}])
	expect = {
	frozenset([1, 2]),
	frozenset([4, 5]),
	frozenset([3]),
	frozenset([6]),
	frozenset([7]),
	}
	self.assertEqual(result, expect)

	def test_larger_example_with_group_of_5(self):
	"""
	Same as `test_larger_example` except here there's also a group of (8, 9, 10, 11, 12) that is
	found together.
	"""
	result = find_maximal_subsets(
	[{1, 2, 3}, {1, 2}, {1, 2, 4, 5}, {4, 5, 6}, {6, 7}, {6, 7}, {3, 8, 9, 10, 11, 12}]
	)
	expect = {
	frozenset([1, 2]),
	frozenset([4, 5]),
	frozenset([3]),
	frozenset([6]),
	frozenset([7]),
	frozenset([8, 9, 10, 11, 12]),
	}
	self.assertEqual(result, expect)

	def test_larger_example_with_group_of_5_twice(self):
	"""
	Like `test_larger_example_with_group_of_5` except here the group of 5 occurs twice.
	"""
	result = find_maximal_subsets(
	[
	{1, 2, 3},
	{1, 2},
	{1, 2, 4, 5},
	{4, 5, 6},
	{6, 7},
	{6, 7},
	{3, 8, 9, 10, 11, 12},
	{8, 9, 10, 11, 12, 1, 2},
	]
	)
	expect = {
	frozenset([1, 2]),
	frozenset([4, 5]),
	frozenset([3]),
	frozenset([6]),
	frozenset([7]),
	frozenset([8, 9, 10, 11, 12]),
	}
	self.assertEqual(result, expect)

	if __name__ == "__main__":
	unittest.main()