|
from __future__ import annotations |
|
|
|
""" |
|
This module wraps difftastic's `--display json` to provide more output types: |
|
- easier consumable JSON |
|
- HTML table |
|
""" |
|
|
|
import dataclasses |
|
import html |
|
import json |
|
import os |
|
import subprocess |
|
import sys |
|
from pathlib import Path |
|
|
|
|
|
def run(args, env: dict | None = None, environ=True, **kwargs): |
|
if env is None: |
|
env = {} |
|
if environ: |
|
env = {**os.environ, **env} |
|
env.setdefault("DFT_SYNTAX_HIGHLIGHT", "off") |
|
env.setdefault("DFT_COLOR", "always") |
|
kwargs.setdefault("check", True) |
|
kwargs.setdefault("encoding", "utf8") |
|
args = ["difft", *args] |
|
return subprocess.run(args, env=env, **kwargs) |
|
|
|
|
|
def get_json(args, **kwargs): |
|
# example output |
|
"""{ |
|
"language": "yaml", |
|
"status": "changed", |
|
"path": "path/to/new/file", |
|
"chunks": [ |
|
[ // caution: this (second-level) list seems to be unordered, it is different each time! |
|
{ |
|
// either "lhs" or "rhs" or both are present |
|
// "lhs" is the old file |
|
// "rhs" is the new file (same as .path) |
|
// caution: there is nowhere to access the path to the old file from |
|
// caution: there is nowhere to access full content of the line from the output |
|
// it is required to read the input file separately and find the lines |
|
"lhs": { |
|
"line_number": 1, |
|
"changes": [ |
|
{ |
|
// caution: this can contain a lot of single-character entries |
|
// of same type aka highlight |
|
"content": "matching-content", |
|
"start": 1, |
|
"end": 2, |
|
"highlight": "string|normal|..." |
|
}, |
|
... |
|
] |
|
}, |
|
"rhs": { |
|
// see lhs |
|
} |
|
} |
|
], |
|
... |
|
] |
|
} |
|
""" |
|
proc = run( |
|
["--color=never", "--display=json", *args], |
|
env=dict(DFT_UNSTABLE="yes"), |
|
stdout=subprocess.PIPE, |
|
**kwargs, |
|
) |
|
return json.loads(proc.stdout) |
|
|
|
|
|
@dataclasses.dataclass |
|
class DiffRowPiece: |
|
content: str |
|
changed: bool |
|
highlight: str |
|
|
|
|
|
@dataclasses.dataclass |
|
class DiffRowEntry: |
|
line_number: int | None = None |
|
pieces: list[DiffRowPiece] = dataclasses.field(default_factory=list) |
|
|
|
@property |
|
def changed(self): |
|
return any(p.changed for p in self.pieces) |
|
|
|
|
|
@dataclasses.dataclass |
|
class DiffRow: |
|
old: DiffRowEntry |
|
new: DiffRowEntry |
|
change_distance: int |
|
|
|
@property |
|
def changed(self): |
|
return self.change_distance > 0 |
|
|
|
|
|
@dataclasses.dataclass |
|
class Diff: |
|
difft_data: dataclasses.InitVar[dict] |
|
old_path: Path |
|
new_path: Path |
|
|
|
language: str = dataclasses.field(init=False) |
|
status: str = dataclasses.field(init=False) |
|
diff_rows: list[DiffRow] = dataclasses.field(init=False) |
|
|
|
def __post_init__(self, difft_data): |
|
self.status = difft_data["status"] |
|
self.language = difft_data["language"] |
|
if self.old_path: |
|
self.old_path = Path(self.old_path) |
|
if self.new_path: |
|
self.new_path = Path(self.new_path) |
|
|
|
old_lines = self.old_path.read_text().splitlines(keepends=False) |
|
new_lines = self.new_path.read_text().splitlines(keepends=False) |
|
|
|
lhs_pieces: dict[int, list[DiffRowPiece]] = {} |
|
rhs_pieces: dict[int, list[DiffRowPiece]] = {} |
|
|
|
def register(entry, lines, pieces: list[DiffRowPiece]): |
|
if not entry: |
|
return |
|
lineno = entry["line_number"] |
|
pieces[lineno] = make_pieces(lines[lineno], entry.get("changes") or []) |
|
|
|
def get(lineno, lines, pieces: list[DiffRowPiece]): |
|
if lineno not in pieces: |
|
try: |
|
line = lines[lineno] |
|
except IndexError: |
|
pieces[lineno] = [] |
|
else: |
|
pieces[lineno] = make_pieces(line) |
|
return pieces[lineno], any(p.changed for p in pieces[lineno]) |
|
|
|
def make_pieces(line: str, changes: list[dict] = ()): |
|
pieces = [] |
|
idx = 0 |
|
prev_change = None |
|
for change in changes: |
|
start: int = change["start"] |
|
end: int = change["end"] |
|
content: str = change["content"] |
|
highlight: str = change["highlight"] |
|
if idx != start: |
|
pieces.append( |
|
DiffRowPiece( |
|
content=line[idx:start], |
|
changed=False, |
|
highlight="", |
|
) |
|
) |
|
if prev_change and prev_change.highlight == highlight: |
|
prev_change.content += content |
|
else: |
|
pieces.append( |
|
DiffRowPiece( |
|
content=content, |
|
changed=True, |
|
highlight=highlight, |
|
) |
|
) |
|
prev_change = pieces[-1] |
|
idx = end |
|
if idx < len(line): |
|
pieces.append( |
|
DiffRowPiece( |
|
content=line[idx:], |
|
changed=False, |
|
highlight="", |
|
) |
|
) |
|
return pieces |
|
|
|
for chunk in difft_data["chunks"]: |
|
for line in chunk: |
|
register(line.get("lhs"), old_lines, lhs_pieces) |
|
register(line.get("rhs"), new_lines, rhs_pieces) |
|
|
|
self.diff_rows = [] |
|
for i, content in enumerate(old_lines): |
|
if i in lhs_pieces: |
|
continue |
|
lhs_pieces[i] = make_pieces(line=old_lines[i]) |
|
|
|
lhs_lineno = 0 |
|
rhs_lineno = 0 |
|
change_distance = 0 |
|
max_lines = max(len(old_lines), len(new_lines)) |
|
while max(lhs_lineno, rhs_lineno) < max_lines: |
|
lhs, lhs_changed = get(lhs_lineno, old_lines, lhs_pieces) |
|
rhs, rhs_changed = get(rhs_lineno, new_lines, rhs_pieces) |
|
|
|
if rhs_changed or lhs_changed: |
|
change_distance = 0 |
|
else: |
|
change_distance += 1 |
|
|
|
if lhs_changed and not rhs_changed: |
|
rhs = [] |
|
rhs_lineno -= 1 |
|
|
|
if rhs_changed and not lhs_changed: |
|
lhs = [] |
|
lhs_lineno -= 1 |
|
|
|
self.diff_rows.append( |
|
DiffRow( |
|
old=DiffRowEntry( |
|
line_number=lhs_lineno if lhs else None, |
|
pieces=lhs, |
|
), |
|
new=DiffRowEntry( |
|
line_number=rhs_lineno if rhs else None, |
|
pieces=rhs, |
|
), |
|
change_distance=change_distance, |
|
) |
|
) |
|
rhs_lineno += 1 |
|
lhs_lineno += 1 |
|
|
|
change_distance = 0 |
|
for line in reversed(self.diff_rows): |
|
if line.changed: |
|
change_distance = 0 |
|
line.change_distance = min(line.change_distance, change_distance) |
|
change_distance += 1 |
|
self.diff_rows = self.diff_rows |
|
|
|
@classmethod |
|
def create(cls, old: Path, new: Path, args=(), **kwargs): |
|
data = get_json([old, new, *args], **kwargs) |
|
return cls(difft_data=data, old_path=old, new_path=new) |
|
|
|
def get_rows(self, context: int = None): |
|
if context is not None: |
|
return [row for row in self.diff_rows if row.change_distance >= context] |
|
return self.diff_rows |
|
|
|
def as_dict(self, *, context: int = None): |
|
return list(map(dataclasses.asdict, self.get_rows(context=context))) |
|
|
|
def as_html_table(self, context: int = None) -> str: |
|
def _gen(): |
|
# TODO: use HTML builder such as https://github.com/tvst/htbuilder ? |
|
|
|
yield "<!doctype html>" |
|
yield "<html>" |
|
yield "<head>" |
|
yield "<style>" |
|
yield """ |
|
* { |
|
font-family: monospace; |
|
} |
|
table, th, td { |
|
border-collapse: collapse; |
|
vertical-align: top; |
|
} |
|
table { |
|
width: 100%; |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
tbody { |
|
border: 1px solid grey; |
|
} |
|
.new.code, .old.code { |
|
width: min-content; |
|
} |
|
.new.lineno, .old.lineno { |
|
width: min-content; |
|
} |
|
code { |
|
display: block; |
|
white-space: pre-wrap; |
|
} |
|
.old .highlight { |
|
color: red; |
|
} |
|
.new .highlight { |
|
color: green; |
|
} |
|
.lineno .highlight { |
|
font-weight: bolder; |
|
} |
|
""" |
|
yield "</style>" |
|
yield "</head>" |
|
yield "<body>" |
|
yield "<table style='border-collapse: collapse'>" |
|
yield "<thead>" |
|
yield "<tr>" |
|
yield "<th>Old</th>" |
|
yield "<th>No.</th>" |
|
yield "<th>No.</th>" |
|
yield "<th>New</th>" |
|
yield "</tr>" |
|
yield "</thead>" |
|
yield "<tbody>" |
|
|
|
def render_pieces(pieces: list[DiffRowPiece], cls: str): |
|
yield f"<td class='{cls} code'><code>" |
|
for piece in pieces: |
|
if piece.changed: |
|
yield f"<span class='highlight highlight-{piece.highlight}'>" |
|
yield html.escape(piece.content) |
|
if piece.changed: |
|
yield "</span>" |
|
yield "</code></td>" |
|
|
|
def render_lineno(lineno: int, has_changes: bool, cls: str): |
|
entry = "." |
|
if lineno is not None: |
|
entry = lineno + 1 |
|
entry = str(entry) |
|
if has_changes: |
|
entry = f"<span class='highlight'>{entry}</span>" |
|
yield f"<td class='{cls} lineno'>{entry}</td>" |
|
|
|
for row in self.get_rows(context=context): |
|
yield "<tr>" |
|
yield from render_pieces(row.old.pieces, "old") |
|
yield from render_lineno( |
|
row.old.line_number, len(row.old.pieces) > 1, "old" |
|
) |
|
yield from render_lineno( |
|
row.new.line_number, len(row.new.pieces) > 1, "new" |
|
) |
|
yield from render_pieces(row.new.pieces, "new") |
|
yield "</tr>" |
|
yield "</tbody>" |
|
yield "</table>" |
|
yield "</body>" |
|
yield "</html>" |
|
|
|
return "".join(_gen()) |
|
|
|
|
|
def main(*args: str): |
|
_, old, new, output, *args = args or sys.argv |
|
diff = Diff.create(Path(old), Path(new), args) |
|
match output: |
|
case "html": |
|
print(diff.as_html_table()) |
|
case "json": |
|
json.dump(diff.as_dict(), sys.stdout, indent=2) |
|
case _: |
|
print(f"Unknown output: {output}", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
|
|
if __name__ == "__main__": |
|
main(*sys.argv) |