-
-
Save mahmoud/db02d16ac89fa401b968 to your computer and use it in GitHub Desktop.
""" | |
This is an extension of the technique first detailed here: | |
http://sedimental.org/remap.html#add_common_keys | |
In short, it calls remap on each container, back to front, using the accumulating | |
previous values as the default for the current iteration. | |
""" | |
from boltons.iterutils import remap, get_path, default_enter, default_visit | |
defaults = {'host': '127.0.0.1', | |
'port': 8000, | |
'endpoints': {'persistence': {'host': '127.0.0.1', | |
'port': 8888}, | |
'cache': {'host': '127.0.0.1', | |
'port': 8889}}, | |
'owners': {'secondary': ['alice']}, | |
'zones': [{'a': 1}], | |
'notes': ['this is the default']} | |
overlay = {'host': '127.0.0.1', | |
'port': 8080, | |
'endpoints': {'persistence': {'host': '10.2.2.2', | |
'port': 5433}}, | |
'overlay_version': '5.0', | |
'owners': {'primary': ['bob'], 'secondary': ['charles']}, | |
'zones': [{'a': 2}], | |
'notes': ['this is the overlay']} | |
cache_host_override = {'endpoints': {'cache': {'host': '127.0.0.2'}}} | |
def remerge(target_list, sourced=False): | |
"""Takes a list of containers (e.g., dicts) and merges them using | |
boltons.iterutils.remap. Containers later in the list take | |
precedence (last-wins). | |
By default, returns a new, merged top-level container. With the | |
*sourced* option, `remerge` expects a list of (*name*, container*) | |
pairs, and will return a source map: a dictionary mapping between | |
path and the name of the container it came from. | |
""" | |
if not sourced: | |
target_list = [(id(t), t) for t in target_list] | |
ret = None | |
source_map = {} | |
def remerge_enter(path, key, value): | |
new_parent, new_items = default_enter(path, key, value) | |
if ret and not path and key is None: | |
new_parent = ret | |
try: | |
cur_val = get_path(ret, path + (key,)) | |
except KeyError: | |
pass | |
else: | |
# TODO: type check? | |
new_parent = cur_val | |
if isinstance(value, list): | |
# lists are purely additive. See https://github.com/mahmoud/boltons/issues/81 | |
new_parent.extend(value) | |
new_items = [] | |
return new_parent, new_items | |
for t_name, target in target_list: | |
if sourced: | |
def remerge_visit(path, key, value): | |
source_map[path + (key,)] = t_name | |
return True | |
else: | |
remerge_visit = default_visit | |
ret = remap(target, enter=remerge_enter, visit=remerge_visit) | |
if not sourced: | |
return ret | |
return ret, source_map | |
def main(): | |
from pprint import pprint | |
merged, source_map = remerge([('defaults', defaults), | |
('overlay', overlay), | |
('cache_host_override', cache_host_override)], | |
sourced=True) | |
assert merged['host'] == '127.0.0.1' | |
assert merged['port'] == 8080 | |
assert merged['endpoints']['persistence']['host'] == '10.2.2.2' | |
assert merged['endpoints']['persistence']['port'] == 5433 | |
assert merged['endpoints']['cache']['host'] == '127.0.0.2' | |
assert merged['endpoints']['cache']['port'] == 8889 | |
assert merged['overlay_version'] == '5.0' | |
pprint(merged) | |
pprint(source_map) | |
print(len(source_map), 'paths') | |
if __name__ == '__main__': | |
main() |
{'endpoints': {'cache': {'host': '127.0.0.2', 'port': 8889}, | |
'persistence': {'host': '10.2.2.2', 'port': 5433}}, | |
'host': '127.0.0.1', | |
'notes': ['this is the default', 'this is the overlay'], | |
'overlay_version': '5.0', | |
'owners': {'primary': ['bob'], 'secondary': ['alice', 'charles']}, | |
'port': 8080, | |
'zones': [{'a': 1}, {'a': 2}]} | |
{('endpoints',): 'cache_host_override', | |
('endpoints', 'cache'): 'cache_host_override', | |
('endpoints', 'cache', 'host'): 'cache_host_override', | |
('endpoints', 'cache', 'port'): 'defaults', | |
('endpoints', 'persistence'): 'overlay', | |
('endpoints', 'persistence', 'host'): 'overlay', | |
('endpoints', 'persistence', 'port'): 'overlay', | |
('host',): 'overlay', | |
('notes',): 'overlay', | |
('overlay_version',): 'overlay', | |
('owners',): 'overlay', | |
('owners', 'primary'): 'overlay', | |
('owners', 'secondary'): 'overlay', | |
('port',): 'overlay', | |
('zones',): 'overlay'} | |
(15, 'paths') |
@pleasantone, that is very strange. I updated the gist with the case I tried and the behavior I see. Really not sure how this could have happened.
Oh wait, I see one difference. On Twitter, we'd said a dict with a list in it, but here we have a list with a dict in it. OK, let me try that.
(I got the reproduction, will debug this evening.)
Aaaand fixed. The description, process, etc. is all here in Boltons issue #81.
This is great, thank you!
Hi. This gist seems to work fine. It is possible to get in in boltons.dictutils?
Here my humble contribution (merge list flag + unit tests):
# Third Party Libraries
from boltons.iterutils import default_enter
from boltons.iterutils import default_visit
from boltons.iterutils import get_path
from boltons.iterutils import remap
from structlog import get_logger
log = get_logger()
__all__ = ["remerge"]
def remerge(target_list, sourced=False, replace_lists=False): # noqa: C901
"""Merge a list of dicts.
Takes a list of containers (e.g., dicts) and merges them using
boltons.iterutils.remap. Containers later in the list take
precedence (last-wins).
By default (``replace_lists=False``), items with the "list" type are not
replaced but items are appended. Setting ``replace_lists==True`` means
lists content are replaced when overriden.
By default, returns a new, merged top-level container.
With the *sourced* option, `remerge` expects a list of (*name*, container*)
pairs, and will return a source map: a dictionary mapping between
path and the name of the container it came from.
Example:
.. code-block:: python
merged, source_map = remerge([('defaults', defaults),
('overlay', overlay),
('cache_host_override', cache_host_override),
],
sourced=True)
"""
# Discusson in :
# https://gist.github.com/pleasantone/c99671172d95c3c18ed90dc5435ddd57
# Final gist in:
# https://gist.github.com/mahmoud/db02d16ac89fa401b968
if not sourced:
target_list = [(id(t), t) for t in target_list]
ret = None
source_map = {}
def remerge_enter(path, key, value):
new_parent, new_items = default_enter(path, key, value)
if ret and not path and key is None:
new_parent = ret
try:
cur_val = get_path(ret, path + (key, ))
except KeyError:
pass
else:
# TODO: type check?
new_parent = cur_val
if isinstance(value, list):
if replace_lists:
new_parent = value
else:
# lists are purely additive. See https://github.com/mahmoud/boltons/issues/81
new_parent.extend(value)
new_items = []
return new_parent, new_items
for t_name, target in target_list:
if sourced:
def remerge_visit(path, key, _value):
source_map[path + (key, )] = t_name # pylint: disable=cell-var-from-loop
return True
else:
remerge_visit = default_visit
ret = remap(target, enter=remerge_enter, visit=remerge_visit)
if not sourced:
return ret
return ret, source_map
Unit test:
# coding: utf-8
# Standard Library
from pprint import pprint
# Gitlab Project Configurator Modules
from gpc.helpers.remerge import remerge
def test_override_string():
defaults = {'key_to_override': 'value_from_defaults'}
first_override = {'key_to_override': 'value_from_first_override'}
merged, source_map = remerge([('defaults', defaults),
('first_override', first_override),
],
sourced=True)
expected_merged = {'key_to_override': 'value_from_first_override'}
assert merged == expected_merged
assert source_map == {('key_to_override', ): 'first_override'}
merged = remerge([defaults, first_override], sourced=False)
assert merged == expected_merged
def test_override_subdict():
defaults = {
'subdict': {
'other_subdict': {
'key_to_override': 'value_from_defaults',
'integer_to_override': 2222
}
}
}
first_override = {
'subdict': {
'other_subdict': {
'key_to_override': 'value_from_first_override',
'integer_to_override': 5555
}
}
}
expected_merge = {
'subdict': {
'other_subdict': {
'integer_to_override': 5555,
'key_to_override': 'value_from_first_override'
}
}
}
merged, source_map = remerge([('defaults', defaults),
('first_override', first_override),
],
sourced=True)
assert merged == expected_merge
assert source_map == {
('subdict',
): 'first_override',
('subdict',
'other_subdict'): 'first_override',
('subdict',
'other_subdict',
'integer_to_override'): 'first_override',
('subdict',
'other_subdict',
'key_to_override'): 'first_override'
}
merged = remerge([defaults, first_override], sourced=False)
assert merged == expected_merge
def test_override_list_append():
defaults = {'list_to_append': [{'a': 1}]}
first_override = {'list_to_append': [{'b': 1}]}
merged, source_map = remerge([('defaults', defaults),
('first_override', first_override),
],
sourced=True)
expected_merged = {'list_to_append': [{'a': 1}, {'b': 1}]}
assert merged == expected_merged
assert source_map == {('list_to_append', ): 'first_override'}
merged = remerge([defaults, first_override], sourced=False)
assert merged == expected_merged
def test_override_list_replace():
defaults = {'list_to_replace': [{'a': 1}]}
first_override = {'list_to_replace': [{'b': 1}]}
merged, source_map = remerge([('defaults', defaults),
('first_override', first_override),
],
sourced=True, replace_lists=True)
expected_merged = {'list_to_replace': [{'b': 1}]}
assert merged == expected_merged
assert source_map == {('list_to_replace', ): 'first_override'}
merged = remerge([defaults, first_override], sourced=False, replace_lists=True)
assert merged == expected_merged
def test_complex_dict():
defaults = {
'key_to_override': 'value_from_defaults',
'integer_to_override': 1111,
'list_to_append': [{
'a': 1
}],
'subdict': {
'other_subdict': {
'key_to_override': 'value_from_defaults',
'integer_to_override': 2222
},
'second_subdict': {
'key_to_override': 'value_from_defaults',
'integer_to_override': 3333
}
}
}
first_override = {
'key_to_override': 'value_from_first_override',
'integer_to_override': 4444,
'list_to_append': [{
'b': 2
}],
'subdict': {
'other_subdict': {
'key_to_override': 'value_from_first_override',
'integer_to_override': 5555
}
},
'added_in_first_override': 'some_string'
}
second_override = {
'subdict': {
'second_subdict': {
'key_to_override': 'value_from_second_override'
}
}
}
merged, source_map = remerge([('defaults', defaults),
('first_override', first_override),
('second_override', second_override),
],
sourced=True)
print("")
print("'merged' dictionary:")
pprint(merged)
print("")
pprint(source_map)
print(len(source_map), 'paths')
assert merged['key_to_override'] == 'value_from_first_override'
assert merged['integer_to_override'] == 4444
assert merged['subdict']['other_subdict']['key_to_override'] == 'value_from_first_override'
assert merged['subdict']['other_subdict']['integer_to_override'] == 5555
assert merged['subdict']['second_subdict']['key_to_override'] == 'value_from_second_override'
assert merged['subdict']['second_subdict']['integer_to_override'] == 3333
assert merged['added_in_first_override'] == 'some_string'
assert merged["list_to_append"] == [{'a': 1}, {'b': 2}]
Thank you very much for this very helpful gift. I'm out of my depth with respect to your code above but I did bolt on remerge to the cranky machine I'm making with python. My results were great until I passed it, as the first argument, a dictionary with keys of None type. That caused a breakdown. To get the thing running again I only had to create empty dictionaries for those keys first, but I thought you'd like to know that keys having value of None may need attention.
My problem dictionaries looked something like this:
- {'info': None, 'settings': None}
- {'info': {'measures': 3, 'clef': 'Treble'}, 'settings': {'format':{....}, 'processing': {....}}}
.
..but I'm not positive I had more than one level of nesting in the second dictionary, short on sleep.
@gsemet, I wouldn't mind having it in boltons (though probably in iterutils, just for ease of dependence), we could continue the review process there if you want to prepare a PR.
@JoanEliot, that's true, you need the structures to roughly match. Maybe it makes sense for None
s to be overridden, but by that same token, it might make sense to preprocess one side to remove None
s? I could go either way. I'm glad you got it to work!
Thank you for this utility.
I believe that functions that change their return-type based on their inputs,
are hard to work with, even if it's only for debugging aid.
Also needed provenance information for merged lists,
so i did the following changes:
- refact: feed the
source_map
when calling the function to be populated only if notNone
;
now the return type is always the same. - enh: when merging lists,
source_map
lists all mergers (important when debugging). - optimize: do not to create a new
remerge_visit()
closure on each(!) container to merge, but decide it up-front. - optimize: don't recreate input container with dummy
id()
just to fit theremerge_visit()
for whensource_map
is asked;
actually don't even override default enter-function when nosource_map
is asked, simply run a simpler loop. - refact: renamed
target_list
-->*containers
, so as not to have to create the list-of-containers when calling it. - refact: combine trivial
else
code withtry-except
. - Doc: terse sphinx docstring with a doctested example.
- Doc: explain that the input-dicts order is NOT preserved in the results when
source_map
is asked. - [edit:] dropped
replace_lists
argument (my apologies, just wasn't necessary to me, and copy pasted from my project),
def remerge(*containers, source_map: list = None):
"""
Merge recursively dicts or lists with :func:`boltons.iterutils.remap()`.
:param containers:
a list of dicts or lists to merge; later ones take precedence
(last-wins).
If `source_map` is given, these must be 2-tuples of ``(name: container)``.
:param source_map:
If given, it must be a dictionary, and `containers` arg must be 2-tuples
like ``(name: container)``.
The `source_map` will be populated with mappings between path and the name
of the container it came from.
.. Warning::
if source_map given, the order of input dictionaries is NOT preserved
is the results (important if your code rely on PY3.7 stable dictionaries).
:return:
returns a new, merged top-level container.
- Adapted from https://gist.github.com/mahmoud/db02d16ac89fa401b968
- Discusson in: https://gist.github.com/pleasantone/c99671172d95c3c18ed90dc5435ddd57
**Example**
>>> defaults = {
... 'subdict': {
... 'as_is': 'hi',
... 'overridden_key1': 'value_from_defaults',
... 'overridden_key1': 2222,
... 'merged_list': ['hi', {'untouched_subdict': 'v1'}],
... }
... }
>>> overrides = {
... 'subdict': {
... 'overridden_key1': 'overridden value',
... 'overridden_key2': 5555,
... 'merged_list': ['there'],
... }
... }
>>> source_map = {}
>>> remerge(
... ("defaults", defaults),
... ("overrides", overrides),
... source_map=source_map)
{'subdict': {'as_is': 'hi',
'overridden_key1': 'overridden value',
'merged_list': ['hi', {'untouched_subdict': 'v1'}, 'there'],
'overridden_key2': 5555}}
>>> source_map
{('subdict', 'as_is'): 'defaults',
('subdict', 'overridden_key1'): 'overrides',
('subdict', 'merged_list'): ['defaults', 'overrides'],
('subdict',): 'overrides',
('subdict', 'overridden_key2'): 'overrides'}
"""
ret = None
def remerge_enter(path, key, value):
new_parent, new_items = default_enter(path, key, value)
if ret and not path and key is None:
new_parent = ret
try:
# TODO: type check?
new_parent = get_path(ret, path + (key,))
except KeyError:
pass
if isinstance(value, list):
# lists are purely additive. See https://github.com/mahmoud/boltons/issues/81
new_parent.extend(value)
new_items = []
return new_parent, new_items
if source_map is not None:
def remerge_visit(path, key, value):
full_path = path + (key,)
if isinstance(value, list):
old = source_map.get(full_path)
if old:
old.append(t_name)
else:
source_map[full_path] = [t_name]
else:
source_map[full_path] = t_name
return True
for t_name, cont in containers:
ret = remap(cont, enter=remerge_enter, visit=remerge_visit)
else:
for cont in containers:
ret = remap(cont, enter=remerge_enter)
ret = remap(cont, enter=remerge_enter, visit=remerge_visit)
return ret
Unit-tests (without replace-list :-()
# coding: utf-8
# Standard Library
from pprint import pprint
# Gitlab Project Configurator Modules
# from gpc.helpers.remerge import remerge
def test_override_string():
defaults = {"key_to_override": "value_from_defaults"}
first_override = {"key_to_override": "value_from_first_override"}
source_map = {}
merged = remerge(
("defaults", defaults),
("first_override", first_override),
source_map=source_map,
)
expected_merged = {"key_to_override": "value_from_first_override"}
assert merged == expected_merged
assert source_map == {("key_to_override",): "first_override"}
merged = remerge(defaults, first_override, source_map=None)
assert merged == expected_merged
def test_override_subdict():
defaults = {
"subdict": {
"other_subdict": {
"key_to_override": "value_from_defaults",
"integer_to_override": 2222,
}
}
}
first_override = {
"subdict": {
"other_subdict": {
"key_to_override": "value_from_first_override",
"integer_to_override": 5555,
}
}
}
expected_merge = {
"subdict": {
"other_subdict": {
"integer_to_override": 5555,
"key_to_override": "value_from_first_override",
}
}
}
source_map = {}
merged = remerge(
("defaults", defaults),
("first_override", first_override),
source_map=source_map,
)
assert merged == expected_merge
assert source_map == {
("subdict",): "first_override",
("subdict", "other_subdict"): "first_override",
("subdict", "other_subdict", "integer_to_override"): "first_override",
("subdict", "other_subdict", "key_to_override"): "first_override",
}
merged = remerge(defaults, first_override, source_map=None)
assert merged == expected_merge
def test_override_list_append():
defaults = {"list_to_append": [{"a": 1}]}
first_override = {"list_to_append": [{"b": 1}]}
source_map = {}
merged = remerge(
("defaults", defaults),
("first_override", first_override),
source_map=source_map,
)
expected_merged = {"list_to_append": [{"a": 1}, {"b": 1}]}
assert merged == expected_merged
assert source_map == {("list_to_append",): "first_override"}
merged = remerge(defaults, first_override, source_map=None)
assert merged == expected_merged
def test_complex_dict():
defaults = {
"key_to_override": "value_from_defaults",
"integer_to_override": 1111,
"list_to_append": [{"a": 1}],
"subdict": {
"other_subdict": {
"key_to_override": "value_from_defaults",
"integer_to_override": 2222,
},
"second_subdict": {
"key_to_override": "value_from_defaults",
"integer_to_override": 3333,
},
},
}
first_override = {
"key_to_override": "value_from_first_override",
"integer_to_override": 4444,
"list_to_append": [{"b": 2}],
"subdict": {
"other_subdict": {
"key_to_override": "value_from_first_override",
"integer_to_override": 5555,
}
},
"added_in_first_override": "some_string",
}
second_override = {
"subdict": {"second_subdict": {"key_to_override": "value_from_second_override"}}
}
source_map = {}
merged = remerge(
("defaults", defaults),
("first_override", first_override),
("second_override", second_override),
source_map=source_map,
)
print("")
print("'merged' dictionary:")
pprint(merged)
print("")
pprint(source_map)
print(len(source_map), "paths")
assert merged["key_to_override"] == "value_from_first_override"
assert merged["integer_to_override"] == 4444
assert (
merged["subdict"]["other_subdict"]["key_to_override"]
== "value_from_first_override"
)
assert merged["subdict"]["other_subdict"]["integer_to_override"] == 5555
assert (
merged["subdict"]["second_subdict"]["key_to_override"]
== "value_from_second_override"
)
assert merged["subdict"]["second_subdict"]["integer_to_override"] == 3333
assert merged["added_in_first_override"] == "some_string"
assert merged["list_to_append"] == [{"a": 1}, {"b": 2}]
@ankostis Cool! Glad this old gem continues to provide utility. Thanks for sharing :)
Realized the code was a bit inefficient, so i did these two changes on the code above:
- optimize: do not to create a new
remerge_visit()
closure on each(!) container to merge, but decide it up-front. - optimize: don't recreate input container with dummy
id()
just to fit theremerge_visit()
for whensource_map
is asked;
I kept the edited code it in the same comment, above, for future reference.
[edit:] bu i'm still bugged by the handling of None
when extending lists :-(
There is a bug (limitation) in this implementation, it will not work for lists inside your configuration (python 3.5, boltons 16.4.1)
If you have a list of values, remap will create a circular reference:
for example, add
to
overlay
and you will get the following output: