Skip to content

Instantly share code, notes, and snippets.

@pgundlach
Last active January 10, 2022 01:43
Show Gist options
  • Save pgundlach/556247 to your computer and use it in GitHub Desktop.
Save pgundlach/556247 to your computer and use it in GitHub Desktop.
LuaTeX nodelist visualization
--
-- viznodelist.lua
-- speedata publisher
--
-- Written 2010-2020 by Patrick Gundlach.
-- This file is released in the spirit of the well known MIT license
-- (see https://opensource.org/licenses/MIT for more information)
--
-- visualizes nodelists using graphviz
-- usage example:
-- \setbox0\hbox{\vbox{\hbox{abc}}\vbox{x}}
-- \directlua{
-- require("viznodelist")
-- viznodelist.nodelist_visualize(0,"mybox.gv")
-- }
--
-- \bye
-- and then open "mybox.gv" with graphviz
--
-- nodelist_visualize takes three arguments:
-- 1: the number of the box or the box itself (when called from Lua)
-- 2: the filename of the dot-file to create
-- 3: the options table (optional). Known keywords:
-- - showdisc = <boolean> (defaults to false)
--
--
-- Newest file is at https://gist.github.com/pgundlach/556247
local io,string,table = io,string,table
local assert,tostring,type = assert,tostring,type
local tex,texio,node,unicode,font,status=tex,texio,node,unicode,font,status
local pairs = pairs
local print = print
module(...)
local factor = 2^16
-- tostring(a_node) looks like "<node nil < 172 > nil : hlist 2>", so we can
-- grab the number in the middle (172 here) as a unique id. So the node
-- is named "node172"
local function get_nodename(n)
return "\"n" .. string.gsub(tostring(n), "^<node%s+%S+%s+<%s+(%d+).*","%1") .. "\""
end
local function link_to( n,nodename,label )
if n then
local t = node.type(n.id)
local nodename_n = get_nodename(n)
if t=="temp" or t=="nested_list" then return end
local ret
if label=="prev" then
-- ignore nodes where node.prev.next does not exist.
-- TODO: this should be more clever: ignore prev pointers of the first nodes in a list.
if not n.next then return end
ret = string.format("%s:%s:w -> %s:title\n",nodename,label,get_nodename(n))
elseif label=="head" then
ret = string.format("%s:%s -> %s:title\n",nodename,label,get_nodename(n))
else
ret = string.format("%s:%s -> %s:title\n",nodename,label,get_nodename(n))
end
return ret
end
end
local function get_subtype( n )
typ = node.type(n.id)
local subtypes = {
hlist = {
[0] = "unknown origin",
"created by linebreaking",
"explicit box command",
"parindent",
"alignment column or row",
"alignment cell",
},
glyph = {
[0] = "character",
"glyph",
"ligature",
},
disc = {
[0] = "\\discretionary",
"\\-",
"- (auto)",
"h&j (simple)",
"h&j (hard, first item)",
"h&j (hard, second item)",
},
glue = {
[0] = "skip",
[1] = "lineskip",
[2] = "baselineskip",
[3] = "parskip",
[4] = "abovedisplayskip",
[5] = "belowdisplayskip",
[6] = "abovedisplayshortskip",
[7] = "belowdisplayshortskip",
[8] = "leftskip",
[9] = "rightskip",
[10] = "topskip",
[11] = "splittopskip",
[12] = "tabskip",
[13] = "spaceskip",
[14] = "xspaceskip",
[15] = "parfillskip",
[16] = "thinmuskip",
[17] = "medmuskip",
[18] = "thickmuskip",
[100] = "leaders",
[101] = "cleaders",
[102] = "xleaders",
[103] = "gleaders"
},
rule = {
[2] = "image"
}
}
subtypes.whatsit = node.whatsits()
if subtypes[typ] then
return subtypes[typ][n.subtype] or tostring(n.subtype)
else
return tostring(n.subtype)
end
assert(false)
end
local function label(n,tab )
local typ = node.type(n.id)
local nodename = get_nodename(n)
local subtype = get_subtype(n)
local ret = string.format("%s [ label = \"<title> name: %s (id %s) | <sub> type: %s | { <prev> prev |<next> next }",nodename or "??",typ or "??",string.gsub(nodename,"\"","") or "??",subtype or "?")
if tab then
for i=1,#tab do
if tab[i][1] then
ret = ret .. string.format("|<%s> %s",tab[i][1],tab[i][2])
end
end
end
return ret .. "\"]\n"
end
local function draw_node( n,tab )
local ret = {}
if not tab then
tab = {}
end
local nodename = get_nodename(n)
local attlist = n.attr
if attlist then
attlist = attlist.next
while attlist do
tab[#tab + 1] = { "", string.format("attr%d=%d",attlist.number, attlist.value) }
attlist = attlist.next
end
end
local properties = node.getproperty(n)
if properties then
for k,v in pairs(properties) do
tab[#tab + 1] = { "", string.format("%s=%s\\l",k, v) }
end
end
ret[#ret + 1] = label(n,tab)
ret[#ret + 1] = link_to(n.next,nodename,"next")
ret[#ret + 1] = link_to(n.prev,nodename,"prev")
return table.concat(ret)
end
local function sanitize(num)
if num > 0x110000 then num = 65533 end
local c = unicode.utf8.char(num)
local ret = c:gsub("\"","\\\"")
return ret
end
local function draw_action( n )
local nodename = get_nodename(n)
local ret = string.format("%s [ label = \"<title> name: %s ", nodename, "action")
local tab = {
{"action_type", string.format("action_type: %s", tostring(n.action_type))},
{"action_id" , string.format("action_id: %s",tostring(n.action_id))},
{"named_id", string.format("named_id: %s",tostring(n.named_id))},
{"file", string.format("file: %s",tostring(n.file))},
{"new_window" , string.format("new_window: %s",tostring(n.new_window))},
{"data", string.format("data: %s",tostring(n.data):gsub(">","\\>"):gsub("<","\\<"))},
{"refcount" , string.format("ref_count: %s",tostring(n.ref_count))},
}
for i=1,#tab do
if tab[i][1] then
ret = ret .. string.format("|<%s> %s",tab[i][1],tab[i][2])
end
end
return ret .. "\"]\n"
end
local function dot_analyze_nodelist( head, options )
local ret = {}
local typ,nodename
while head do
typ = node.type(head.id)
nodename = get_nodename(head)
if typ == "hlist" then
local tmp = {}
if head.width ~= 0 then
local width = string.format("width %gpt",head.width / factor)
tmp[#tmp + 1] = {"width",width}
end
if head.height ~= 0 then
local height= string.format("height %gpt",head.height / factor)
tmp[#tmp + 1] = {"height",height}
end
if head.depth ~= 0 then
local depth = string.format("depth %gpt",head.depth / factor)
tmp[#tmp + 1] = {"depth",depth}
end
if head.glue_set ~= 0 then
local glue_set = string.format("glue_set %g",head.glue_set)
tmp[#tmp + 1] = {"glue_set",glue_set}
end
if head.glue_sign ~= 0 then
local glue_sign = string.format("glue_sign %g",head.glue_sign)
tmp[#tmp + 1] ={"glue_sign",glue_sign}
end
if head.glue_order ~= 0 then
local glue_order = string.format("glue_order %d",head.glue_order)
tmp[#tmp + 1] = {"glue_order",glue_order}
end
if head.shift ~= 0 then
local shift = string.format("shift %gpt",head.shift / factor)
tmp[#tmp + 1] = {"shift",shift }
end
tmp[#tmp + 1] = {"head", "head"}
ret[#ret + 1] = draw_node(head, tmp)
if head.head then
ret[#ret + 1] = link_to(head.head,nodename,"head")
ret[#ret + 1] = dot_analyze_nodelist(head.head,options)
end
elseif typ == "vlist" then
local tmp = {}
if head.width ~= 0 then
local width = string.format("width %gpt",head.width / factor)
tmp[#tmp + 1] = {"width",width}
end
if head.height ~= 0 then
local height= string.format("height %gpt",head.height / factor)
tmp[#tmp + 1] = {"height",height}
end
if head.depth ~= 0 then
local depth = string.format("depth %gpt",head.depth / factor)
tmp[#tmp + 1] = {"depth",depth}
end
if head.glue_set ~= 0 then
local glue_set = string.format("glue_set %g",head.glue_set)
tmp[#tmp + 1] = {"glue_set",glue_set}
end
if head.glue_sign ~= 0 then
local glue_sign = string.format("glue_sign %g",head.glue_sign)
tmp[#tmp + 1] ={"glue_sign",glue_sign}
end
if head.glue_order ~= 0 then
local glue_order = string.format("glue_order %d",head.glue_order)
tmp[#tmp + 1] = {"glue_order",glue_order}
end
if head.shift ~= 0 then
local shift = string.format("shift %gpt",head.shift / factor)
tmp[#tmp + 1] = {"shift",shift }
end
tmp[#tmp + 1] = {"head", "head"}
ret[#ret + 1] = draw_node(head, tmp)
if head.head then
ret[#ret + 1] = link_to(head.head,nodename,"head")
ret[#ret + 1] = dot_analyze_nodelist(head.head,options)
end
elseif typ == "glue" then
local subtype = get_subtype(head)
local spec
if node.has_field(head,"spec") then
spec = head.spec
else
spec = head
end
local spec_string = string.format("%gpt", spec.width / factor)
if spec.stretch ~= 0 then
local stretch_order, shrink_order
if spec.stretch_order == 0 then
stretch_order = string.format(" + %gpt",spec.stretch / factor)
else
stretch_order = string.format(" + %g fi%s", spec.stretch / factor, string.rep("l",spec.stretch_order - 1))
end
spec_string = spec_string .. stretch_order
end
if spec.shrink ~= 0 then
if spec.shrink_order == 0 then
shrink_order = string.format(" - %gpt",spec.shrink / factor)
else
shrink_order = string.format(" - %g fi%s", spec.shrink / factor, string.rep("l",spec.shrink_order - 1))
end
spec_string = spec_string .. shrink_order
end
if head.leader then
ret[#ret + 1] = draw_node(head,{ {"subtype", subtype},{"spec",spec_string},{"leaders","leaders"} })
ret[#ret + 1] = dot_analyze_nodelist(head.leader,options)
ret[#ret + 1] = link_to(head.leader,nodename,"leaders")
else
ret[#ret + 1] = draw_node(head,{ {"subtype", subtype},{"spec",spec_string} })
end
elseif typ == "kern" then
ret[#ret + 1] = draw_node(head,{ {"kern", string.format("kern: %gpt",head.kern / factor) } })
elseif typ == "rule" then
local wd,ht,dp
if head.width == -1073741824 then wd = "width: flexible" else wd = string.format("width: %gpt", head.width / factor) end
if head.height == -1073741824 then ht = "height: flexible" else ht = string.format("height: %gpt", head.height / factor) end
if head.depth == -1073741824 then dp = "depth: flexible" else dp = string.format("depth: %gpt", head.depth / factor) end
local subtype
ret[#ret + 1] = draw_node(head,{ {"wd", wd },{"ht", ht },{"dp", dp } })
elseif typ == "penalty" then
ret[#ret + 1] = draw_node(head,{ {"penalty", string.format("%d",head.penalty) } })
elseif typ == "disc" then
if options.showdisc then
ret[#ret + 1] = draw_node(head, { {"pre","pre"},{"post","post"},{"replace","replace"} })
if head.pre then
ret[#ret + 1] = dot_analyze_nodelist(head.pre,options)
ret[#ret + 1] = link_to(head.pre,nodename,"pre")
end
if head.post then
ret[#ret + 1] = dot_analyze_nodelist(head.post,options)
ret[#ret + 1] = link_to(head.post,nodename,"post")
end
if head.replace then
ret[#ret + 1] = dot_analyze_nodelist(head.replace,options)
ret[#ret + 1] = link_to(head.replace,nodename,"replace")
end
else
ret[#ret + 1] = draw_node(head, { } )
end
elseif typ == "glyph" then
local ch = string.format("char: '%s'",sanitize(head.char))
local lng = string.format("lang: %d",head.lang)
local fnt = string.format("font: %d",head.font)
local wd = string.format("width: %gpt", head.width / factor)
local ht = string.format("height: %gpt", head.height / factor)
local dp = string.format("depth: %gpt", head.depth / factor)
local comp
if options.showdisc then
comp = {"comp","components"}
else
comp = {}
end
ret[#ret + 1] = draw_node(head,{ {"char", ch} ,{"lang",lng },{"font",fnt},{"width", wd},{"height", ht},{"depth", dp}, comp })
if head.components and options.showdisc then
ret[#ret + 1] = dot_analyze_nodelist(head.components,options)
ret[#ret + 1] = link_to(head.components,nodename,"comp")
end
elseif typ == "math" then
ret[#ret + 1] = draw_node(head, { "math", head.subtype == 0 and "on" or "off" })
elseif typ == "whatsit" then
local st = get_subtype(head)
if st == "dir" then
ret[#ret + 1] = draw_node(head, { { "dir", head.dir } })
elseif st == "pdf_start_link" then
local wd = string.format("width (pt): %gpt", head.width / factor)
local ht = string.format("height: %gpt", head.height / factor)
local dp = string.format("depth %gpt", head.depth / factor)
local objnum = string.format("objnum %d",head.objnum)
ret[#ret + 1] = draw_action(head.action)
ret[#ret + 1] = link_to(head.action,nodename,"action")
ret[#ret + 1] = draw_node(head, {{ "subtype", "pdf_start_link"}, {"width", wd},{"widthraw",head.width}, {"height" , ht}, {"depth",dp}, {"objnum", objnum}, {"action", "action"}})
elseif st == "pdf_end_link" then
ret[#ret + 1] = draw_node(head, {{ "subtype", "pdf_end_link"}})
elseif st == "pdf_literal" then
ret[#ret + 1] = draw_node(head,{ {"subtype", "literal"},{"data",data} })
elseif st == "pdf_refximage" then
local wd = string.format("width (pt): %gpt", head.width / factor)
local ht = string.format("height: %gpt", head.height / factor)
local dp = string.format("depth %gpt", head.depth / factor)
local objnum = string.format("objnum %d",head.objnum or 0)
ret[#ret + 1] = draw_node(head,{ {"subtype", "image"},{"width", wd}, {"height" , ht}, {"depth",dp}, {"objnum", objnum} })
elseif st == "pdf_colorstack" then
local stack,cmd,data
stack = string.format("stack: %d",head.stack)
if status.luatex_version < 79 then
cmd = string.format("cmd: %d", head.cmd)
else
cmd = string.format("cmd: %d", head.command)
end
data = string.format("data: %s", head.data)
ret[#ret + 1] = draw_node(head,{ {"subtype", "colorstack"},{"stack",stack},{"cmd",cmd},{"data",data} })
elseif st == "user_defined" then
local uid,t, val
uid = string.format("user_id= %s",tostring(head.user_id))
t = string.format("type = %s",tostring(head.type))
val = string.format("value = %s", tostring(head.value))
ret[#ret + 1] = draw_node(head,{ {"subtype", "user_defined"},{"userid",uid},{"type",t},{"value",val} })
elseif st == "local_par" then
ret[#ret + 1] = draw_node(head, {{ "subtype","local_par"}})
elseif st == "pdf_dest" then
local namedid = string.format("named_id=%s",tostring(head.named_id))
local destid = string.format("dest_id=%s",tostring(head.dest_id))
ret[#ret + 1] = draw_node(head, { {"named_id",namedid},{"dest_id",destid} })
elseif st == "pdf_annot" then
local wd = string.format("width (pt): %gpt", head.width / factor)
local ht = string.format("height: %gpt", head.height / factor)
local dp = string.format("depth %gpt", head.depth / factor)
local objnum = string.format("objnum %d",head.objnum)
local data = string.format("data: %s", string.gsub(head.data,"<","\\<" ):gsub(">","\\>" ))
ret[#ret + 1] = draw_node(head, {{ "subtype","pdf_annot"},{"width", wd}, {"height" , ht}, {"depth",dp}, {"objnum",objnum},{"data",data}})
elseif st == "pdf_save" then
ret[#ret + 1] = draw_node(head, {{ "subtype","pdf_save" }})
elseif st == "pdf_restore" then
ret[#ret + 1] = draw_node(head, {{ "subtype","pdf_restore" }})
else
ret[#ret + 1] = draw_node(head, {{ "subtype",st }})
texio.write_nl(string.format("whatsit type %s not handled",st))
end
else
-- texio.write_nl(string.format("not handled id %d",head.id))
ret[#ret + 1] = draw_node(head, { })
end
head = head.next
end
return table.concat(ret)
end
function nodelist_visualize( box,filename,options )
assert(box,"No box given")
assert(filename,"No filename given")
local box_to_analyze
if type(box)=="number" then
box_to_analyze = tex.box[box]
else
box_to_analyze = box
end
local gv = dot_analyze_nodelist(box_to_analyze,options or {})
local outfile = io.open(filename,"wb")
outfile:write([[
digraph g {
graph [
rankdir = "LR"
];
node [ shape = "record"]
]])
outfile:write(gv)
outfile:write("}\n")
outfile:close()
end
@hennigs
Copy link

hennigs commented Oct 8, 2012

An issue with hlist and vlist nodes. Edges departing from the field labelled list always head south, while in most cases they should head east, like, e.g., the edges departing from next fields do.

@hennigs
Copy link

hennigs commented Oct 8, 2012

An issue with hlist and vlist nodes. Field list should be renamed to head. To quote from the LuaTeX manual:

Note: the new field name head was introduced in 0.65 to replace the old name list. Use of the name list is now deprecated, but it will stay available until at least version 0.80.

@pgundlach
Copy link
Author

I missed your comment, sorry. I will rename head/list and have a look at the edges departing from list/head field. Thank you very much for your comment!

@hennigs
Copy link

hennigs commented Nov 10, 2012

An RFE this time: In user-defined whatsit nodes, would it be possible to show fields user_id, type and value?

@pgundlach
Copy link
Author

Thanks @hennigs, I've finally implemented your suggestions.

@neapel
Copy link

neapel commented May 24, 2013

To limit output for quickly checking a node's neighbours max_prev, max_next and max_head options would be useful, i.e. called on node n with max_head=1 would draw n and n.head but not n.head.head, but instead a dangling arrow.

@Josef-Friedrich
Copy link

Josef-Friedrich commented Jul 7, 2016

I have also witten a package which visualizes the node lists. My package nodetree tries to solve the challenge to visualize the node structure in a different approach. https://www.ctan.org/pkg/nodetree

@pgundlach
Copy link
Author

Here is an example of the output of the file:

sample preview of the output

@u-fischer
Copy link

I tried the code on the AtBeginShipoutBox with luatex 1.07 from texlive 2018 pretest:

\documentclass{article}
\usepackage{atbegshi,lipsum}
\AtBeginShipout {%
 \directlua{require("viznodelist")
  viznodelist.nodelist_visualize(tex.box["AtBeginShipoutBox"],"mybox.gv")}}
  \pagestyle{empty}
\begin{document}
abc

\end{document}

When I try to convert this to a pdf with dot.exe I get a warning

Warning: node n80, port title unrecognized
Warning: node n80, port title unrecognized

and there is a mysterious n80 node in the middle:

n80

@rolfn
Copy link

rolfn commented Apr 12, 2018

I can confirm this (Linux, TeXLive 2017, luatex 1.0.4). I get

Warning: node n78, port title unrecognized

...Rolf

@rolfn
Copy link

rolfn commented Jan 7, 2019

Another issue with current TeXLive (2018, luatex 1.09.0). If I compile ulrikes (u-fischer) example above I see the following error:

./viznodelist.lua:124: attempt to index a nil value (field 'spe
c')
stack traceback:
        ./viznodelist.lua:124: in upvalue 'dot_analyze_nodelist'
        ./viznodelist.lua:120: in upvalue 'dot_analyze_nodelist'
        ./viznodelist.lua:152: in function 'viznodelist.nodelist_visualize'
        [\directlua]:1: in main chunk.
<argument> ...list_visualize("AtBeginShipoutBox","mybox.gv")}

l.10 \end{document}

It would be nice to see a graphic of the ShipoutBox. Thanks in advance.

@pgundlach
Copy link
Author

@u-fischer I cannot reproduce this problem with my installation of tl2018.
@rolfn I don't have LuaTeX 1.09 at the moment (& I don't know how to upgrade)

Both: sorry for the late reply, I somehow don't get a notice when there are new comments here.

@u-fischer
Copy link

@pgundlach: I still get the node message:

\dot.exe -Tpdf mybox.gv -o dot.pdf
Warning: node n80, port title unrecognized
Warning: node n80, port title unrecognized

Regarding the luatex1.09 error: As it uses lua5.3 you should probably replace glue_set %d by glue_set %.0f (twice).

@poetaman
Copy link

poetaman commented Sep 23, 2020

@pgundlach: Fails with a lua error when HarfBuzz renderer is used instead of Node renderer for the following short test-case (Noto Sans Devanagari is available from google fonts GitHub here, though I guess any Devanagari font should do). I encountered this same error in my code while playing with char fields in glyph nodes, someone posted a solution (which I don't really understand) on stack exchange. Please have a look: HarfBuzz UTF-8 issue as it causes visnodelist.lua to produce error. Btw, thanks for the script, graphical visualization helps debug otherwise inhumane tex debug experience. @Josef-Friedrich I filed a bug on your package nodetree too, it seems we all are affected by the same issue :)

\documentclass{article}
\usepackage[lmargin=0.5in,tmargin=0.5in,rmargin=0.5in,bmargin=0.5in]{geometry}
\usepackage{fontspec}

%\newfontscript{Devanagari}{deva,dev2}
\newfontfamily{\devanagarifam}{Noto Sans Devanagari}[Script=Devanagari, Scale=1, Renderer=HarfBuzz]

\begin{document}


\setbox0=\hbox{Hey \devanagarifam एक गांव -- में मोहन} % The error happens on compound glyph: 'में'. As a slightly longer test case, replace contents of box 0 with: Příliš žluťoučký \textit{kůň} úpěl \hbox{ďábelské} ódy difference diffierence. \devanagarifam एक गांव -- में मोहन नाम का लड़का रहता था। उसके पिताजी एक मामूली मजदूर थे।

\directlua{
	require("viznodelist")
	viznodelist.nodelist_visualize(0,"mybox.gv")
}

\box0

\end{document}

Error:

./viznodelist.lua:332: bad argument #1 to 'char' (invalid value)
stack traceback:
	[C]: in field 'char'
	./viznodelist.lua:332: in upvalue 'dot_analyze_nodelist'
	./viznodelist.lua:226: in upvalue 'dot_analyze_nodelist'
	./viznodelist.lua:421: in function 'viznodelist.nodelist_visualize'
	[\directlua]:1: in main chunk.
l.16 }

@pgundlach
Copy link
Author

I have also witten a package which visualizes the node lists. My package nodetree tries to solve the challenge to visualize the node structure in a different approach. https://www.ctan.org/pkg/nodetree

@Josef-Friedrich I really like and use nodetree a lot! Thank you for sharing this (I know this is an old message)

@pgundlach
Copy link
Author

pgundlach commented Sep 23, 2020

@reportaman Thank you for the bug report. I now present everything I can't display as U+FFFD REPLACEMENT CHARACTER (�). Not sure if this is the best way to do, but better than an error.

@poetaman
Copy link

@pgundlach: Its not the best way to do it as the characters used in the test case do actually have unicode code-points. They could be hidden in the way LuaHBTeX uses 'char' field of glyph nodes. I think they are using it as a 'string' now? I have dissected the problem further in my comment on same bug filed on package nodetree here. Please review, and let us know what you think.

@pgundlach
Copy link
Author

@reportaman I'll have a look if I can use get_glyph_info. Since viznodelist is used outside LaTeX/Luaotfload, I am not sure if I implement this.

@poetaman
Copy link

poetaman commented Sep 25, 2020

@pgundlach: Thanks! You might also be interested in this question posted on TeX.SE, given you have a tool (Speedata publisher) with LuaTeX backbone: LuaTeX glyph reordering issue.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment