Skip to content

Instantly share code, notes, and snippets.

@nazarewk
Last active July 14, 2024 07:39
Show Gist options
  • Save nazarewk/4969a93669a9bb048409f306bdb1a3b0 to your computer and use it in GitHub Desktop.
Save nazarewk/4969a93669a9bb048409f306bdb1a3b0 to your computer and use it in GitHub Desktop.
difftastic wrapper providing HTML and easier consumable JSON outputs

This is a pure-Python script which builds on top of difftastics --display json to provide:

  • more ergonomic and ready to use JSON output
  • slightly styled HTML table (mimics side-by-side)

it addresses following shortcomings of current (v0.51.1) JSON output:

  • line-scoped changes within block-scoped chunks are unordered (different each time), had to be sorted by rhs.line_number then lhs.line_number
  • there seems to be a lot of conscutive single-character entries of same type (highlight) for some (probably unknown?) languages, those are merged

addresses Wilfred/difftastic#45

example HTML output:

image

from __future__ import annotations
"""
This module wraps difftastic's `--display json` to provide more output types:
- easier consumable JSON
- HTML table
"""
import dataclasses
import html
import json
import os
import subprocess
import sys
from pathlib import Path
def run(args, env: dict | None = None, environ=True, **kwargs):
if env is None:
env = {}
if environ:
env = {**os.environ, **env}
env.setdefault("DFT_SYNTAX_HIGHLIGHT", "off")
env.setdefault("DFT_COLOR", "always")
kwargs.setdefault("check", True)
kwargs.setdefault("encoding", "utf8")
args = ["difft", *args]
return subprocess.run(args, env=env, **kwargs)
def get_json(args, **kwargs):
# example output
"""{
"language": "yaml",
"status": "changed",
"path": "path/to/new/file",
"chunks": [
[ // caution: this (second-level) list seems to be unordered, it is different each time!
{
// either "lhs" or "rhs" or both are present
// "lhs" is the old file
// "rhs" is the new file (same as .path)
// caution: there is nowhere to access the path to the old file from
// caution: there is nowhere to access full content of the line from the output
// it is required to read the input file separately and find the lines
"lhs": {
"line_number": 1,
"changes": [
{
// caution: this can contain a lot of single-character entries
// of same type aka highlight
"content": "matching-content",
"start": 1,
"end": 2,
"highlight": "string|normal|..."
},
...
]
},
"rhs": {
// see lhs
}
}
],
...
]
}
"""
proc = run(
["--color=never", "--display=json", *args],
env=dict(DFT_UNSTABLE="yes"),
stdout=subprocess.PIPE,
**kwargs,
)
return json.loads(proc.stdout)
@dataclasses.dataclass
class DiffRowPiece:
content: str
changed: bool
highlight: str
@dataclasses.dataclass
class DiffRowEntry:
line_number: int | None = None
pieces: list[DiffRowPiece] = dataclasses.field(default_factory=list)
@property
def changed(self):
return any(p.changed for p in self.pieces)
@dataclasses.dataclass
class DiffRow:
old: DiffRowEntry
new: DiffRowEntry
change_distance: int
@property
def changed(self):
return self.change_distance > 0
@dataclasses.dataclass
class Diff:
difft_data: dataclasses.InitVar[dict]
old_path: Path
new_path: Path
language: str = dataclasses.field(init=False)
status: str = dataclasses.field(init=False)
diff_rows: list[DiffRow] = dataclasses.field(init=False)
def __post_init__(self, difft_data):
self.status = difft_data["status"]
self.language = difft_data["language"]
if self.old_path:
self.old_path = Path(self.old_path)
if self.new_path:
self.new_path = Path(self.new_path)
old_lines = self.old_path.read_text().splitlines(keepends=False)
new_lines = self.new_path.read_text().splitlines(keepends=False)
lhs_pieces: dict[int, list[DiffRowPiece]] = {}
rhs_pieces: dict[int, list[DiffRowPiece]] = {}
def register(entry, lines, pieces: list[DiffRowPiece]):
if not entry:
return
lineno = entry["line_number"]
pieces[lineno] = make_pieces(lines[lineno], entry.get("changes") or [])
def get(lineno, lines, pieces: list[DiffRowPiece]):
if lineno not in pieces:
try:
line = lines[lineno]
except IndexError:
pieces[lineno] = []
else:
pieces[lineno] = make_pieces(line)
return pieces[lineno], any(p.changed for p in pieces[lineno])
def make_pieces(line: str, changes: list[dict] = ()):
pieces = []
idx = 0
prev_change = None
for change in changes:
start: int = change["start"]
end: int = change["end"]
content: str = change["content"]
highlight: str = change["highlight"]
if idx != start:
pieces.append(
DiffRowPiece(
content=line[idx:start],
changed=False,
highlight="",
)
)
if prev_change and prev_change.highlight == highlight:
prev_change.content += content
else:
pieces.append(
DiffRowPiece(
content=content,
changed=True,
highlight=highlight,
)
)
prev_change = pieces[-1]
idx = end
if idx < len(line):
pieces.append(
DiffRowPiece(
content=line[idx:],
changed=False,
highlight="",
)
)
return pieces
for chunk in difft_data["chunks"]:
for line in chunk:
register(line.get("lhs"), old_lines, lhs_pieces)
register(line.get("rhs"), new_lines, rhs_pieces)
self.diff_rows = []
for i, content in enumerate(old_lines):
if i in lhs_pieces:
continue
lhs_pieces[i] = make_pieces(line=old_lines[i])
lhs_lineno = 0
rhs_lineno = 0
change_distance = 0
max_lines = max(len(old_lines), len(new_lines))
while max(lhs_lineno, rhs_lineno) < max_lines:
lhs, lhs_changed = get(lhs_lineno, old_lines, lhs_pieces)
rhs, rhs_changed = get(rhs_lineno, new_lines, rhs_pieces)
if rhs_changed or lhs_changed:
change_distance = 0
else:
change_distance += 1
if lhs_changed and not rhs_changed:
rhs = []
rhs_lineno -= 1
if rhs_changed and not lhs_changed:
lhs = []
lhs_lineno -= 1
self.diff_rows.append(
DiffRow(
old=DiffRowEntry(
line_number=lhs_lineno if lhs else None,
pieces=lhs,
),
new=DiffRowEntry(
line_number=rhs_lineno if rhs else None,
pieces=rhs,
),
change_distance=change_distance,
)
)
rhs_lineno += 1
lhs_lineno += 1
change_distance = 0
for line in reversed(self.diff_rows):
if line.changed:
change_distance = 0
line.change_distance = min(line.change_distance, change_distance)
change_distance += 1
self.diff_rows = self.diff_rows
@classmethod
def create(cls, old: Path, new: Path, args=(), **kwargs):
data = get_json([old, new, *args], **kwargs)
return cls(difft_data=data, old_path=old, new_path=new)
def get_rows(self, context: int = None):
if context is not None:
return [row for row in self.diff_rows if row.change_distance >= context]
return self.diff_rows
def as_dict(self, *, context: int = None):
return list(map(dataclasses.asdict, self.get_rows(context=context)))
def as_html_table(self, context: int = None) -> str:
def _gen():
# TODO: use HTML builder such as https://github.com/tvst/htbuilder ?
yield "<!doctype html>"
yield "<html>"
yield "<head>"
yield "<style>"
yield """
* {
font-family: monospace;
}
table, th, td {
border-collapse: collapse;
vertical-align: top;
}
table {
width: 100%;
margin-left: auto;
margin-right: auto;
}
tbody {
border: 1px solid grey;
}
.new.code, .old.code {
width: min-content;
}
.new.lineno, .old.lineno {
width: min-content;
}
code {
display: block;
white-space: pre-wrap;
}
.old .highlight {
color: red;
}
.new .highlight {
color: green;
}
.lineno .highlight {
font-weight: bolder;
}
"""
yield "</style>"
yield "</head>"
yield "<body>"
yield "<table style='border-collapse: collapse'>"
yield "<thead>"
yield "<tr>"
yield "<th>Old</th>"
yield "<th>No.</th>"
yield "<th>No.</th>"
yield "<th>New</th>"
yield "</tr>"
yield "</thead>"
yield "<tbody>"
def render_pieces(pieces: list[DiffRowPiece], cls: str):
yield f"<td class='{cls} code'><code>"
for piece in pieces:
if piece.changed:
yield f"<span class='highlight highlight-{piece.highlight}'>"
yield html.escape(piece.content)
if piece.changed:
yield "</span>"
yield "</code></td>"
def render_lineno(lineno: int, has_changes: bool, cls: str):
entry = "."
if lineno is not None:
entry = lineno + 1
entry = str(entry)
if has_changes:
entry = f"<span class='highlight'>{entry}</span>"
yield f"<td class='{cls} lineno'>{entry}</td>"
for row in self.get_rows(context=context):
yield "<tr>"
yield from render_pieces(row.old.pieces, "old")
yield from render_lineno(
row.old.line_number, len(row.old.pieces) > 1, "old"
)
yield from render_lineno(
row.new.line_number, len(row.new.pieces) > 1, "new"
)
yield from render_pieces(row.new.pieces, "new")
yield "</tr>"
yield "</tbody>"
yield "</table>"
yield "</body>"
yield "</html>"
return "".join(_gen())
def main(*args: str):
_, old, new, output, *args = args or sys.argv
diff = Diff.create(Path(old), Path(new), args)
match output:
case "html":
print(diff.as_html_table())
case "json":
json.dump(diff.as_dict(), sys.stdout, indent=2)
case _:
print(f"Unknown output: {output}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main(*sys.argv)
<!doctype html><html><head><style>
* {
font-family: monospace;
}
table, th, td {
border-collapse: collapse;
vertical-align: top;
}
table {
width: 100%;
margin-left: auto;
margin-right: auto;
}
tbody {
border: 1px solid grey;
}
.new.code, .old.code {
width: min-content;
}
.new.lineno, .old.lineno {
width: min-content;
}
code {
display: block;
white-space: pre-wrap;
}
.old .highlight {
color: red;
}
.new .highlight {
color: green;
}
.lineno .highlight {
font-weight: bolder;
}
</style></head><body><table style='border-collapse: collapse'><thead><tr><th>Old</th><th>No.</th><th>No.</th><th>New</th></tr></thead><tbody><tr><td class='old code'><code>&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;</code></td><td class='old lineno'>1</td><td class='new lineno'>1</td><td class='new code'><code>&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;</code></td></tr><tr><td class='old code'><code>&lt;module type=&quot;PYTHON_MODULE&quot; version=&quot;4&quot;&gt;</code></td><td class='old lineno'>2</td><td class='new lineno'>2</td><td class='new code'><code>&lt;module type=&quot;PYTHON_MODULE&quot; version=&quot;4&quot;&gt;</code></td></tr><tr><td class='old code'><code> &lt;component name=&quot;NewModuleRootManager&quot; inherit-compiler-output=&quot;true&quot;&gt;</code></td><td class='old lineno'>3</td><td class='new lineno'>3</td><td class='new code'><code> &lt;component name=&quot;NewModuleRootManager&quot; inherit-compiler-output=&quot;true&quot;&gt;</code></td></tr><tr><td class='old code'><code> &lt;exclude-output /&gt;</code></td><td class='old lineno'>4</td><td class='new lineno'>4</td><td class='new code'><code> &lt;exclude-output /&gt;</code></td></tr><tr><td class='old code'><code><span class='highlight highlight-normal'> &lt;content url=&quot;file://$MODULE_DIR$&quot;&gt;</span></code></td><td class='old lineno'>5</td><td class='new lineno'>5</td><td class='new code'><code><span class='highlight highlight-normal'> &lt;content url=&quot;file://$MODULE_DIR$&quot; /&gt;</span></code></td></tr><tr><td class='old code'><code><span class='highlight highlight-normal'> &lt;sourceFolder url=&quot;file://$MODULE_DIR$&quot; isTestSource=&quot;false&quot; /&gt;</span></code></td><td class='old lineno'>6</td><td class='new lineno'>.</td><td class='new code'><code></code></td></tr><tr><td class='old code'><code><span class='highlight highlight-normal'> &lt;/content&gt;</span></code></td><td class='old lineno'>7</td><td class='new lineno'>.</td><td class='new code'><code></code></td></tr><tr><td class='old code'><code> &lt;orderEntry type=&quot;jdk&quot; jdkName=&quot;devex-utils:eid&quot; jdkType=&quot;Python SDK&quot; /&gt;</code></td><td class='old lineno'>8</td><td class='new lineno'>6</td><td class='new code'><code> &lt;orderEntry type=&quot;jdk&quot; jdkName=&quot;devex-utils:eid&quot; jdkType=&quot;Python SDK&quot; /&gt;</code></td></tr><tr><td class='old code'><code> &lt;orderEntry type=&quot;sourceFolder&quot; forTests=&quot;false&quot; /&gt;</code></td><td class='old lineno'>9</td><td class='new lineno'>7</td><td class='new code'><code> &lt;orderEntry type=&quot;sourceFolder&quot; forTests=&quot;false&quot; /&gt;</code></td></tr><tr><td class='old code'><code> &lt;/component&gt;</code></td><td class='old lineno'>10</td><td class='new lineno'>8</td><td class='new code'><code> &lt;/component&gt;</code></td></tr><tr><td class='old code'><code>&lt;/module&gt;</code></td><td class='old lineno'>11</td><td class='new lineno'>9</td><td class='new code'><code>&lt;/module&gt;</code></td></tr></tbody></table></body></html>
[
{
"old": {
"line_number": 0,
"pieces": [
{
"content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 0,
"pieces": [
{
"content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 1,
"pieces": [
{
"content": "<module type=\"PYTHON_MODULE\" version=\"4\">",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 1,
"pieces": [
{
"content": "<module type=\"PYTHON_MODULE\" version=\"4\">",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 2,
"pieces": [
{
"content": " <component name=\"NewModuleRootManager\" inherit-compiler-output=\"true\">",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 2,
"pieces": [
{
"content": " <component name=\"NewModuleRootManager\" inherit-compiler-output=\"true\">",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 3,
"pieces": [
{
"content": " <exclude-output />",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 3,
"pieces": [
{
"content": " <exclude-output />",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 4,
"pieces": [
{
"content": " <content url=\"file://$MODULE_DIR$\">",
"changed": true,
"highlight": "normal"
}
]
},
"new": {
"line_number": 4,
"pieces": [
{
"content": " <content url=\"file://$MODULE_DIR$\" />",
"changed": true,
"highlight": "normal"
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 5,
"pieces": [
{
"content": " <sourceFolder url=\"file://$MODULE_DIR$\" isTestSource=\"false\" />",
"changed": true,
"highlight": "normal"
}
]
},
"new": {
"line_number": null,
"pieces": []
},
"change_distance": 0
},
{
"old": {
"line_number": 6,
"pieces": [
{
"content": " </content>",
"changed": true,
"highlight": "normal"
}
]
},
"new": {
"line_number": null,
"pieces": []
},
"change_distance": 0
},
{
"old": {
"line_number": 7,
"pieces": [
{
"content": " <orderEntry type=\"jdk\" jdkName=\"devex-utils:eid\" jdkType=\"Python SDK\" />",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 5,
"pieces": [
{
"content": " <orderEntry type=\"jdk\" jdkName=\"devex-utils:eid\" jdkType=\"Python SDK\" />",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 8,
"pieces": [
{
"content": " <orderEntry type=\"sourceFolder\" forTests=\"false\" />",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 6,
"pieces": [
{
"content": " <orderEntry type=\"sourceFolder\" forTests=\"false\" />",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 9,
"pieces": [
{
"content": " </component>",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 7,
"pieces": [
{
"content": " </component>",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
},
{
"old": {
"line_number": 10,
"pieces": [
{
"content": "</module>",
"changed": false,
"highlight": ""
}
]
},
"new": {
"line_number": 8,
"pieces": [
{
"content": "</module>",
"changed": false,
"highlight": ""
}
]
},
"change_distance": 0
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment