Last active
February 3, 2022 19:04
-
-
Save tzaffi/c14aa3af79ea7d09170a8727029b5d95 to your computer and use it in GitHub Desktop.
YAYD - Yet Another YAML (or JSON) Differ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import OrderedDict | |
from copy import deepcopy | |
import json | |
from typing import List, Union | |
L, R = "left", "right" | |
def deep_diff( | |
x: Union[dict, list], | |
y: Union[dict, list], | |
exclude_keys: List[str] = [], | |
overlaps_only: bool = False, | |
extras_only: Union[L, R, None] = None, | |
arraysets: bool = False, | |
) -> Union[dict, list, None]: | |
""" | |
Take the deep diff of JSON-like dictionaries | |
""" | |
senseless = "it doesn't make sense to " | |
if overlaps_only: | |
assert ( | |
arraysets | |
), f"{senseless}diff overlaps only when not considering arrays as sets" | |
if extras_only: | |
assert ( | |
arraysets | |
), f"{senseless}have extras_only={extras_only} when not considering arrays as sets" | |
assert ( | |
not overlaps_only | |
), f"{senseless}have extras_only={extras_only} when diffing overlaps only" | |
right_extras = extras_only == R | |
left_extras = extras_only == L | |
def dd(x, y): | |
if x == y: | |
return None | |
# awkward, but handles subclasses of dict/list: | |
if not ( | |
isinstance(x, (list, dict)) | |
and (isinstance(x, type(y)) or isinstance(y, type(x))) | |
): | |
return [x, y] if not extras_only else None | |
if isinstance(x, dict): | |
d = type(x)() # handles OrderedDict's as well | |
for k in x.keys() ^ y.keys(): | |
if k in exclude_keys or overlaps_only: | |
continue | |
if (k in x and right_extras) or (k in y and left_extras): | |
continue | |
d[k] = [deepcopy(x[k]), None] if k in x else [None, deepcopy(y[k])] | |
for k in x.keys() & y.keys(): | |
if k in exclude_keys: | |
continue | |
next_d = dd(x[k], y[k]) | |
if next_d is None: | |
continue | |
d[k] = next_d | |
return d if d else None | |
# assume a list: | |
m, n = len(x), len(y) | |
if not arraysets: | |
d = [None] * max(m, n) | |
flipped = False | |
if m > n: | |
flipped = True | |
x, y = y, x | |
for i, x_val in enumerate(x): | |
d[i] = dd(y[i], x_val) if flipped else dd(x_val, y[i]) | |
if not overlaps_only: | |
for i in range(m, n): | |
d[i] = [y[i], None] if flipped else [None, y[i]] | |
else: # will raise error if contains a non-hashable element | |
sx, sy = set(x), set(y) | |
if extras_only: | |
d = list(sx - sy) if left_extras else list(sy - sx) | |
elif overlaps_only: | |
ox, oy = sorted(x), sorted(y) | |
d = [] | |
for e in ox: | |
if e not in oy: | |
d.append([e, None]) | |
for e in oy: | |
if e not in ox: | |
d.append([None, e]) | |
else: | |
d = [[e, None] if e in x else [None, e] for e in sx ^ sy] | |
return None if all(map(lambda x: x is None, d)) else d | |
return sort_json(dd(x, y)) | |
def is_diff_array(da: list) -> bool: | |
if len(da) != 2 or da == [None, None]: | |
return False | |
if None in da: | |
return True | |
def all_of_type(xs, t): | |
return all(map(lambda x: isinstance(x, t), xs)) | |
if all_of_type(da, list) or all_of_type(da, dict): | |
return False | |
return True | |
def sort_json(d: Union[dict, list], sort_lists: bool = False): | |
if isinstance(d, list): | |
return [sort_json(x) for x in (sorted(d) if sort_lists else d)] | |
if isinstance(d, dict): | |
return OrderedDict(**{k: sort_json(d[k]) for k in sorted(d.keys())}) | |
return d | |
def jdump(jd, only_objs=False): | |
if only_objs and not isinstance(jd, (list, dict, str)): | |
return jd | |
return json.dumps(jd, separators=(",", ":")) | |
def prettify_diff( | |
json_diff: Union[dict, list, int, str, None], | |
src: str = "", | |
tgt: str = "", | |
suppress_bs: bool = True, | |
value_limit: int = None, | |
): | |
def sup(x): | |
if not isinstance(x, str): | |
return x | |
if value_limit is not None and len(x) > value_limit: | |
x = x[:value_limit] + "..." | |
return x | |
def suppress(x, y): | |
x, y = jdump(x, only_objs=True), jdump(y, only_objs=True) | |
if None not in (x, y): | |
return x, y | |
return sup(x), sup(y) | |
def pd(jd): | |
if isinstance(jd, list): | |
if is_diff_array(jd): | |
x, y = jd | |
if suppress_bs: | |
x, y = suppress(x, y) | |
# return [f"[{tgt:^10}] --> {x}", f"[{src:^10}] --> {y}"] | |
return [{tgt: x}, {src: y}] | |
return [pd(x) for x in jd] | |
if isinstance(jd, dict): | |
return {k: pd(v) for k, v in jd.items()} | |
return jd | |
return sort_json(pd(json_diff)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import atexit | |
import json | |
from pathlib import Path | |
from typing import List | |
import yaml | |
from git import Repo | |
from .json_diff import deep_diff, prettify_diff | |
NEW, OVERLAP, DROPPED, FULL = "new", "overlap", "dropped", "full" | |
DIFF_TYPES = [NEW, OVERLAP, DROPPED, FULL] | |
# These are the diff reports that will be run and compared/asserted against: | |
ASSERTIONS = [DROPPED, FULL] | |
# Only compare swagger "definitions": | |
MODELS_ONLY = True | |
REPO_DIR = Path.cwd() | |
INDEXER_SWGR = REPO_DIR / "api" / "indexer.oas2.json" | |
GOAL_DIR = REPO_DIR / "third_party" / "go-algorand" | |
ALGOD_SWGR = GOAL_DIR / "daemon" / "algod" / "api" / "algod.oas2.json" | |
REPORTS_DIR = REPO_DIR / "parity" / "reports" | |
already_printed = False | |
def print_git_info_once(): | |
global already_printed | |
if already_printed: | |
return | |
already_printed = True | |
indexer = Repo(REPO_DIR) | |
indexer_commit = indexer.git.rev_parse("HEAD") | |
goal = Repo(GOAL_DIR) | |
goal_commit = goal.git.rev_parse("HEAD") | |
print(f"""Finished comparing: | |
* Indexer Swagger {INDEXER_SWGR} for commit hash {indexer_commit} | |
* Algod Swagger {ALGOD_SWGR} for commit hash {goal_commit} | |
""") | |
def tsetup(): | |
atexit.register(print_git_info_once) | |
exclude = [ | |
"basePath", | |
"consumes", | |
"host", | |
"info", | |
"paths", | |
"produces", | |
"security", | |
"securityDefinitions", | |
"schemes", | |
"diff_types", | |
"x-algorand-format", | |
"x-go-name", | |
] | |
with open(INDEXER_SWGR, "r") as f: | |
indexer = json.loads(f.read()) | |
if MODELS_ONLY: | |
indexer = indexer["definitions"] | |
with open(ALGOD_SWGR, "r") as f: | |
algod = json.loads(f.read()) | |
if MODELS_ONLY: | |
algod = algod["definitions"] | |
return exclude, indexer, algod | |
def get_report_path(diff_type, for_write=False): | |
suffix = "_OUT" if for_write else "" | |
yml_path = REPORTS_DIR / f"algod2indexer_{diff_type}{suffix}.yml" | |
return yml_path | |
def save_yaml(diff, diff_type): | |
yml_path = get_report_path(diff_type, for_write=True) | |
with open(yml_path, "w") as f: | |
f.write(yaml.dump(diff, indent=2, sort_keys=True, width=2000)) | |
print(f"\nsaved json diff to {yml_path}") | |
def yamlize(diff): | |
def ddize(d): | |
if isinstance(d, dict): | |
return {k: ddize(v) for k, v in d.items()} | |
if isinstance(d, list): | |
return [ddize(x) for x in d] | |
return d | |
return ddize(prettify_diff(diff, src="ALGOD", tgt="INDEXER", value_limit=30)) | |
def generate_diff(source, target, excludes, diff_type): | |
assert ( | |
diff_type in DIFF_TYPES | |
), f"Unrecognized diff_type [{diff_type}] not in {DIFF_TYPES}" | |
if diff_type == OVERLAP: | |
# Overlaps - existing fields that have been modified freom algod ---> indexer | |
overlaps_only = True | |
extras_only = None | |
elif diff_type == NEW: | |
# Additions - fields that have been introduced in indexer | |
overlaps_only = False | |
extras_only = "left" | |
elif diff_type == DROPPED: | |
# Removals - fields that have been deleted in indexer | |
overlaps_only = False | |
extras_only = "right" | |
else: | |
# Full Diff - anything that's different | |
assert diff_type == FULL | |
overlaps_only = False | |
extras_only = None | |
return yamlize( | |
deep_diff( | |
target, | |
source, | |
exclude_keys=excludes, | |
overlaps_only=overlaps_only, | |
extras_only=extras_only, | |
arraysets=True, | |
) | |
) | |
def save_reports(*reports) -> None: | |
""" | |
Generate a YAML report shoing differences between Algod's API and Indexer's API. | |
Possible `reports` diff_types are: | |
"overlap" - show only modifications to features that Algod and Indexer have in common | |
"new" - focus on features added to Indexer and missing from Algod | |
"dropped" (recommended) - focus on features that are present in Algod but dropped in Indexer | |
"full" (recommended) - show all differences | |
""" | |
excludes, indexer_swgr, algod_swgr = tsetup() | |
for diff_type in reports: | |
diff = generate_diff(algod_swgr, indexer_swgr, excludes, diff_type) | |
save_yaml(diff, diff_type) | |
def test_parity(reports: List[str] = ASSERTIONS, save_new: bool = True): | |
excludes, indexer_swgr, algod_swgr = tsetup() | |
""" | |
For each report in reports: | |
1. load the pre-existing yaml report into `old_diff` | |
2. re-generate the equivalent report by comparing `algod_swgr` with `indexer_swgr` | |
3. compute the `diff_of_diffs` between these two reports | |
4. assert that there is no diff | |
""" | |
if save_new: | |
save_reports(*reports) | |
for diff_type in reports: | |
ypath = get_report_path(diff_type, for_write=False) | |
with open(ypath, "r") as f: | |
old_diff = yaml.safe_load(f) | |
new_diff = generate_diff(algod_swgr, indexer_swgr, excludes, diff_type) | |
diff_of_diffs = deep_diff(old_diff, new_diff) | |
assert ( | |
diff_of_diffs is None | |
), f"""UNEXPECTED CHANGE IN {ypath}. Differences are: | |
{json.dumps(diff_of_diffs,indent=2)} | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from cmath import exp | |
from copy import deepcopy | |
from .json_diff import deep_diff | |
def test_deep_diff(): | |
d1 = { | |
"dad": 55, | |
"mom": 56, | |
} | |
d2 = { | |
"mom": 55, | |
"dad": 55, | |
} | |
actual = deep_diff(d1, d2) | |
expected = {"mom": [56, 55]} | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
actual = deep_diff(d1, deepcopy(d1)) | |
expected = None | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
mom_info = { | |
"age": 56, | |
"profession": "MD", | |
"hobbies": ["ballet", "opera", {"football": "american"}, "racecar driving"], | |
} | |
d3 = { | |
"dad": 55, | |
"mom": mom_info, | |
} | |
actual = deep_diff(d1, d3) | |
expected = {"mom": [56, mom_info]} | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
d4 = { | |
"mom": mom_info, | |
} | |
actual = deep_diff(d3, d4) | |
expected = {"dad": [55, None]} | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
d5 = { | |
"dad": 55, | |
"mom": { | |
"age": 56, | |
"profession": "Programmer", | |
"hobbies": ["ballet", "opera", {"football": "british"}, "racecar driving"], | |
}, | |
} | |
actual = deep_diff(d3, d5) | |
expected = { | |
"mom": { | |
"profession": ["MD", "Programmer"], | |
"hobbies": [None, None, {"football": ["american", "british"]}, None], | |
} | |
} | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
a1 = ["hello", "world", {"I": "wish"}, "you", {"all": "the best"}] | |
a2 = ["hello", "world", {"I": "wish"}, "you", {"all": "the very best"}] | |
actual = deep_diff(a1, a2) | |
expected = [None, None, None, None, {"all": ["the best", "the very best"]}] | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
a3 = ["hello", "world", "I", "wish", "you", "good", "times"] | |
a4 = ["world", "hello", "you", "good", "timesies", "wish"] | |
actual = deep_diff(a3, a4, overlaps_only=True, arraysets=True) | |
expected = [["I", None], ["times", None], [None, "timesies"]] | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
s1 = ["alice", "bob", "cassie", "deandrea", "elbaz"] | |
s2 = ["bob", "alice", "cassie", "deandrea", "elbaz", "farber"] | |
actual = deep_diff(s1, s2) | |
expected = [["alice", "bob"], ["bob", "alice"], None, None, None, [None, "farber"]] | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
actual = deep_diff(s1, s2, arraysets=True) | |
expected = [[None, "farber"]] | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
real1 = { | |
"definitions": { | |
"Account": { | |
"properties": { | |
"sig-type": { | |
"description": "Indicates what type of signature is used by this account, must be one of:\n* sig\n* msig\n* lsig\n* or null if unknown" | |
} | |
} | |
} | |
} | |
} | |
real2 = { | |
"definitions": { | |
"Account": { | |
"properties": { | |
"sig-type": { | |
"description": "Indicates what type of signature is used by this account, must be one of:\n* sig\n* msig\n* lsig", | |
} | |
} | |
} | |
} | |
} | |
expected = deepcopy(real2) | |
expected["definitions"]["Account"]["properties"]["sig-type"]["description"] = [ | |
real1["definitions"]["Account"]["properties"]["sig-type"]["description"], | |
real2["definitions"]["Account"]["properties"]["sig-type"]["description"], | |
] | |
actual = deep_diff(real1, real2) | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
actual = deep_diff(real1, real2, extras_only="left", arraysets=True) | |
expected = None | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" | |
fb1 = {"FANG": [{"Facebook": {"price": 330}}]} | |
fb2 = {"FANG": [{"Meta": {"price": 290}}]} | |
actual = deep_diff(fb1, fb2) | |
expected = { | |
"FANG": [{"Facebook": [{"price": 330}, None], "Meta": [None, {"price": 290}]}] | |
} | |
assert expected == actual, f"expected: {expected} v. actual: {actual}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Account: | |
properties: | |
min-balance: | |
- INDEXER: null | |
- ALGOD: '{"description":"MicroAlgo bala...' | |
required: | |
- min-balance | |
ApplicationParams: | |
required: | |
- creator | |
BuildVersion: | |
- INDEXER: null | |
- ALGOD: '{"properties":{"branch":{"type...' | |
DryrunRequest: | |
- INDEXER: null | |
- ALGOD: '{"description":"Request data t...' | |
DryrunSource: | |
- INDEXER: null | |
- ALGOD: '{"description":"DryrunSource i...' | |
DryrunState: | |
- INDEXER: null | |
- ALGOD: '{"description":"Stores the TEA...' | |
DryrunTxnResult: | |
- INDEXER: null | |
- ALGOD: '{"description":"DryrunTxnResul...' | |
ErrorResponse: | |
- INDEXER: null | |
- ALGOD: '{"description":"An error respo...' | |
ParticipationKey: | |
- INDEXER: null | |
- ALGOD: '{"description":"Represents a p...' | |
PendingTransactionResponse: | |
- INDEXER: null | |
- ALGOD: '{"description":"Details about ...' | |
Version: | |
- INDEXER: null | |
- ALGOD: '{"description":"algod version ...' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Notable:
deep_diff()
- the workhorseprettify_diff()
- trim long string values, unless they are a true diff (the values exist in both versions and are different)save_yaml()
- save the diff as yamlgenerate_diff()
- driver that handles all four diff output variantsNEW
- ignore values that were dropped, focusing only on values in the right but not the leftOVERLAP
- only common non-null values that differDROPPED
- ignore values that are new, focusing only on values that are in the left but not the rightFULL
- all of the above