Skip to content

Instantly share code, notes, and snippets.

@dsoprea
Last active October 11, 2024 03:02
Show Gist options
  • Save dsoprea/3febfacc1b0093a2e1a95c86cf94e2b3 to your computer and use it in GitHub Desktop.
Save dsoprea/3febfacc1b0093a2e1a95c86cf94e2b3 to your computer and use it in GitHub Desktop.
Prevent dupicate keys in dictionaries
"""
Copyright 2024 Dustin Oprea
MIT LICENSE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
import yaml
class _UniqueCheckedLoader(yaml.SafeLoader):
def construct_yaml_map(self, node):
# If we're here, we're definitely constructing a map and have a list
# of child nodes
keys_s = set()
duplicated_keys_s = set()
for child in node.value:
child_key, child_value = child
before_count = len(keys_s)
keys_s.add(child_key.value)
after_count = len(keys_s)
if after_count == before_count:
duplicated_keys_s.add(child_key.value)
if duplicated_keys_s:
raise \
Exception(
"One or more map keys are duplicated: {}".format(
sorted(duplicated_keys_s)))
data = {}
value = self.construct_mapping(node)
data.update(value)
yield data
def load_and_assert_uniqueness(x):
# We'd like to detect duplicates. Since PyYAML both loads things depth-first
# *and* doesn't give us the parent when processing a child node, we'll index
# of all of the object IDs as we're constructing them, and then see which
# are disappeared from the final hierarchy. Since all we can do is pass a
# class, we need to inline the class in order to load into an index within
# our scope.
#
# We're only concerned about dictionary keys with dictionary values because
# a) this is our use-case, and b) we can stash additional information as
# dictionary keys without having to override any types.
_UniqueCheckedLoader.add_constructor(
'tag:yaml.org,2002:map',
_UniqueCheckedLoader.construct_yaml_map
)
# Load
blob = yaml.load(x, Loader=_UniqueCheckedLoader)
return blob
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment