Skip to content

Instantly share code, notes, and snippets.

@phi-gamma
Last active December 24, 2015 04:49
Show Gist options
  • Save phi-gamma/6745931 to your computer and use it in GitHub Desktop.
Save phi-gamma/6745931 to your computer and use it in GitHub Desktop.
Collecting info about tokens with Luatex (http://tex.stackexchange.com/q/135319/14066)
current file: \printcurrentfile, line \printcurrentpos \par
This token is going to be listed: \collecttoken \jobname %% collect “\jobname”
#!/usr/bin/env texlua
-----------------------------------------------------------------------
-- FILE: tokenposition.lua
-- USAGE: \directlua {dofile "tokenposition.lua"}
-- DESCRIPTION: collect info about tokens
-- REQUIREMENTS: luatex, luatexbase
-- AUTHOR: Philipp Gesang (Phg), <[email protected]>
-- CREATED: 2013-09-28 19:25:46+0200
-----------------------------------------------------------------------
--
packagedata = packagedata or { }
local tokenpos = { }
packagedata.tokenpos = tokenpos
local tex = tex
local csnamename = token.csname_name
local ioopen = io.open
local mathfloor = math.floor
local nexttoken = token.get_next
local tableconcat = table.concat
local texnest = tex.nest
local texsprint = tex.sprint
local unpack = unpack or table.unpack
local add_to_callback = luatexbase.add_to_callback
local remove_from_callback = luatexbase.remove_from_callback
-----------------------------------------------------------------------
--- part 1: collecting file and line info
-----------------------------------------------------------------------
--- we track open files in a table of (filename * lineno) pairs
local main_info = { tex.jobname .. ".tex", 0 }
local file_stack = { main_info } --- push main tex file
--- callback 1, track position in main file
local count_lines = function (line)
if #file_stack == 1 then
main_info [2] = main_info [2] + 1
end
return nil
end
--- callback 2: track position in tex files that are read
local dummy = function () end
local empty_file = { reader = dummy, close = dummy }
local read_file = function (filename)
if not filename or filename == "" then
return empty_file
end
local chan = ioopen (filename, "r")
if not chan then
return empty_file
end
--- file successfully opened; push file info
local file_info = { filename, 0 }
file_stack [#file_stack + 1] = file_info
--- the reader function reads a line and increments the
--- line counter on top of the stack
local reader = function ()
file_info [2] = file_info [2] + 1
return chan:read "*l"
end
--- the close() function frees the descriptor and pops the file info
local close = function ()
chan:close ()
file_stack [#file_stack] = nil
end
return { reader = reader, close = close }
end
--- convenience functions for easy activation / deactivation of both
--- line counters
local active = false
local enable = function (n)
n = tonumber (n)
if active == false and n ~= nil then
main_info [2] = mathfloor (n)
add_to_callback ("process_input_buffer", count_lines, "count lines in main file")
add_to_callback ("open_read_file", read_file, "count input lines")
active = true
end
end
local disable = function ()
if active == true then
remove_from_callback ("process_input_buffer", "count lines in main file")
remove_from_callback ("open_read_file", "count input lines")
active = false
end
end
tokenpos.enable = enable
tokenpos.disable = disable
--- simple function for retrieving the current position state; this
--- function serves as basis for various user level macros
local current = function ()
local file_info = file_stack [#file_stack]
if file_info then
return file_info [1], tostring (file_info [2])
end
return "nil:0"
end
--- user functions based on the above
tokenpos.currentfile = function () local f = current () texsprint (0, f) end
tokenpos.currentpos = function () local _, p = current () texsprint (0, p) end
tokenpos.currentfilepos = function () local f, p = current () texsprint (0, f, ":", p) end
-----------------------------------------------------------------------
--- part 2: collecting tokens
-----------------------------------------------------------------------
--- Calling token.get_next() by itself also removes the token from the
--- input list so it won’t be processed. We work around this by calling
--- the function from inside the token_filter and immediately
--- de-registering the callback.
local tokendata = { } --- collect tokens and file:line info
local ignore = false
local processtoken = function (tok)
local csname
if tok [3] ~= 0 then
csname = csnamename (tok)
end
if csname and csname == "stopcollecttokens" then
ignore = true --- don’t need the rest
end
if ignore == true then
return --- would blow up the collection needlessly otherwise
end
local file_info = file_stack [#file_stack]
local nesting = texnest [texnest.ptr]
tokendata [#tokendata + 1] = {
file = file_info [1],
position = file_info [2],
csname = csname,
token = tok,
grouplevel = tex.currentgrouplevel,
mathmode = nesting.mathstyle >= 0 and nesting.mode,
}
end
--- this is the fun part
local grabtoken = function ()
local tok = nexttoken ()
processtoken (tok)
remove_from_callback ("token_filter", "grab the next token")
return tok
end
--- user level wrapper; the callback is automatically deregistered
--- as soon as one token has been processed
local collecttoken = function ()
ignore = false
add_to_callback ("token_filter", grabtoken, "grab the next token")
end
tokenpos.collect = collecttoken
--- same as the above but must be manually removed
local grabtokens = function ()
local tok = nexttoken ()
processtoken (tok)
return tok
end
local active = false --- state of token grabber
local startcollecting = function ()
if active == false then
add_to_callback ("token_filter", grabtokens, "grap a bunch of tokens")
active = true
ignore = false --- just in case
end
end
local stopcollecting = function ()
if active == true then
remove_from_callback ("token_filter", "grap a bunch of tokens")
active = false
end
end
tokenpos.startcollecting = startcollecting
tokenpos.stopcollecting = stopcollecting
--- user level function that outputs the token info as a table
local displaymath_t = 253
local inlinemath_t = -253
local dumptokens = function ()
if not next (tokendata) then
return
end
local acc = {
[[\settabs 5\columns]],
[[\+ file & line & cs & math & group level \cr ]],
[[\+ \hrulefill & \hrulefill & \hrulefill & \hrulefill& \hrulefill & \hrulefill & \cr ]],
"\n",
}
for i = 1, #tokendata do
local current = tokendata [i]
acc [#acc + 1] = [[\+]]
acc [#acc + 1] = current.file
acc [#acc + 1] = "&"
acc [#acc + 1] = current.position
acc [#acc + 1] = "&"
acc [#acc + 1] = current.csname and [[\char`\\]] .. current.csname or "<nil>"
acc [#acc + 1] = "&"
local mathmode = current.mathmode
if mathmode then
acc [#acc + 1] = mathmode > 0 and "display" or "inline"
else
acc [#acc + 1] = "nope"
end
acc [#acc + 1] = "&"
acc [#acc + 1] = current.grouplevel
acc [#acc + 1] = [[\cr]]
acc [#acc + 1] = "\n"
end
texsprint (tableconcat (acc, "\n"))
--inspect (acc)
end
tokenpos.dump = dumptokens
\input luatexbase.sty
\input luaotfload.sty
\font \mainfont = "file:Iwona-Regular.otf:mode=base" at 10pt
\mainfont
\directlua {dofile "tokenposition.lua"}
\def \tokenposcmd #1{\directlua {packagedata.tokenpos.#1}}
\def \enabletokenpos [#1]{\tokenposcmd {enable [==[\detokenize {#1}]==]}}
\def \disabletokenpos {\tokenposcmd {disable ()}}
\def \printcurrentfile {\tokenposcmd {currentfile ()}}
\def \printcurrentpos {\tokenposcmd {currentpos ()}}
\def \printcurrentfilepos {\tokenposcmd {currentfilepos ()}}
\def \collecttoken {\tokenposcmd {collect ()}}
\def \collectedtokens {\tokenposcmd {dump ()}}
\def \startcollecttokens {\tokenposcmd {startcollecting ()}}
\def \stopcollecttokens {\tokenposcmd {stopcollecting ()}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\enabletokenpos [23] Starting at line no. \printcurrentpos
foo \collecttoken bar %% collect “b”
file:line “\printcurrentfilepos” \par
\begingroup
\begingroup
\collecttoken %% collect “\input”
\input knuth \par %% read other file
\printcurrentfilepos \par %% output line number in main file
\endgroup
\endgroup
current file: \printcurrentfile, line \printcurrentpos \par
\input someinputfile
xyzzy $F\collecttoken=ma$ whatever
Now collect multiple tokens at once:
\startcollecttokens a \endgraf $b$ c \par \stopcollecttokens
\collectedtokens %% pretty-print list of collected tokens
\bye
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment