Skip to content

Instantly share code, notes, and snippets.

@dsoprea
Last active September 13, 2024 18:12
Show Gist options
  • Save dsoprea/81989bc9502ffb3286946aba75f42840 to your computer and use it in GitHub Desktop.
Save dsoprea/81989bc9502ffb3286946aba75f42840 to your computer and use it in GitHub Desktop.
Tools to diff data
"""
Copyright 2024 Dustin Oprea
MIT LICENSE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
def dynamic_retrieve(cached_entities, want_ids, entities_getter):
"""Retrieve the entities that we don't yet have.
`cached_entities`: A dictionary of IDs to entities. It will start as an
empty dictionary but will fill over time between successive calls.
`want_ids`: A list of IDs
`entities_getter`:
A callback that takes a list of ID and returns a list/generator of 2-
tuples of IDs and entities
"""
# TODO(dustin): Add test
final_entities = {}
missing_entity_ids = []
for id_ in want_ids:
try:
entity = cached_entities[id_]
except KeyError:
pass
else:
final_entities[id_] = entity
continue
missing_entity_ids.append(id_)
if missing_entity_ids:
retrieved_entities = entities_getter(missing_entity_ids)
for id_, entity in retrieved_entities:
cached_entities[id_] = entity
final_entities[id_] = entity
return final_entities
def compare_lists(a, b):
a_s = set(a)
b_s = set(b)
created_s = b_s - a_s
removed_s = a_s - b_s
unchanged_s = a_s & b_s
return list(created_s), list(removed_s), list(unchanged_s)
def compare_dictionaries(a, b):
a_list = [
tuple(pair)
for pair
in a.items()
]
b_list = [
tuple(pair)
for pair
in b.items()
]
created_list, removed_list, unchanged_list = compare_lists(a_list, b_list)
# Results will be dictionaries
created = dict(created_list)
removed = dict(removed_list)
unchanged = dict(unchanged_list)
# Determine changed members
changed_list = set(created.keys()) & set(removed.keys())
changed = {}
if changed_list:
for changed_key in changed_list:
changed[changed_key] = (removed[changed_key], created[changed_key])
del removed[changed_key]
del created[changed_key]
return created, removed, changed, unchanged
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment