Last active
December 24, 2015 04:49
-
-
Save phi-gamma/6745931 to your computer and use it in GitHub Desktop.
Collecting info about tokens with Luatex (http://tex.stackexchange.com/q/135319/14066)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
current file: \printcurrentfile, line \printcurrentpos \par | |
This token is going to be listed: \collecttoken \jobname %% collect “\jobname” | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env texlua | |
----------------------------------------------------------------------- | |
-- FILE: tokenposition.lua | |
-- USAGE: \directlua {dofile "tokenposition.lua"} | |
-- DESCRIPTION: collect info about tokens | |
-- REQUIREMENTS: luatex, luatexbase | |
-- AUTHOR: Philipp Gesang (Phg), <[email protected]> | |
-- CREATED: 2013-09-28 19:25:46+0200 | |
----------------------------------------------------------------------- | |
-- | |
packagedata = packagedata or { } | |
local tokenpos = { } | |
packagedata.tokenpos = tokenpos | |
local tex = tex | |
local csnamename = token.csname_name | |
local ioopen = io.open | |
local mathfloor = math.floor | |
local nexttoken = token.get_next | |
local tableconcat = table.concat | |
local texnest = tex.nest | |
local texsprint = tex.sprint | |
local unpack = unpack or table.unpack | |
local add_to_callback = luatexbase.add_to_callback | |
local remove_from_callback = luatexbase.remove_from_callback | |
----------------------------------------------------------------------- | |
--- part 1: collecting file and line info | |
----------------------------------------------------------------------- | |
--- we track open files in a table of (filename * lineno) pairs | |
local main_info = { tex.jobname .. ".tex", 0 } | |
local file_stack = { main_info } --- push main tex file | |
--- callback 1, track position in main file | |
local count_lines = function (line) | |
if #file_stack == 1 then | |
main_info [2] = main_info [2] + 1 | |
end | |
return nil | |
end | |
--- callback 2: track position in tex files that are read | |
local dummy = function () end | |
local empty_file = { reader = dummy, close = dummy } | |
local read_file = function (filename) | |
if not filename or filename == "" then | |
return empty_file | |
end | |
local chan = ioopen (filename, "r") | |
if not chan then | |
return empty_file | |
end | |
--- file successfully opened; push file info | |
local file_info = { filename, 0 } | |
file_stack [#file_stack + 1] = file_info | |
--- the reader function reads a line and increments the | |
--- line counter on top of the stack | |
local reader = function () | |
file_info [2] = file_info [2] + 1 | |
return chan:read "*l" | |
end | |
--- the close() function frees the descriptor and pops the file info | |
local close = function () | |
chan:close () | |
file_stack [#file_stack] = nil | |
end | |
return { reader = reader, close = close } | |
end | |
--- convenience functions for easy activation / deactivation of both | |
--- line counters | |
local active = false | |
local enable = function (n) | |
n = tonumber (n) | |
if active == false and n ~= nil then | |
main_info [2] = mathfloor (n) | |
add_to_callback ("process_input_buffer", count_lines, "count lines in main file") | |
add_to_callback ("open_read_file", read_file, "count input lines") | |
active = true | |
end | |
end | |
local disable = function () | |
if active == true then | |
remove_from_callback ("process_input_buffer", "count lines in main file") | |
remove_from_callback ("open_read_file", "count input lines") | |
active = false | |
end | |
end | |
tokenpos.enable = enable | |
tokenpos.disable = disable | |
--- simple function for retrieving the current position state; this | |
--- function serves as basis for various user level macros | |
local current = function () | |
local file_info = file_stack [#file_stack] | |
if file_info then | |
return file_info [1], tostring (file_info [2]) | |
end | |
return "nil:0" | |
end | |
--- user functions based on the above | |
tokenpos.currentfile = function () local f = current () texsprint (0, f) end | |
tokenpos.currentpos = function () local _, p = current () texsprint (0, p) end | |
tokenpos.currentfilepos = function () local f, p = current () texsprint (0, f, ":", p) end | |
----------------------------------------------------------------------- | |
--- part 2: collecting tokens | |
----------------------------------------------------------------------- | |
--- Calling token.get_next() by itself also removes the token from the | |
--- input list so it won’t be processed. We work around this by calling | |
--- the function from inside the token_filter and immediately | |
--- de-registering the callback. | |
local tokendata = { } --- collect tokens and file:line info | |
local ignore = false | |
local processtoken = function (tok) | |
local csname | |
if tok [3] ~= 0 then | |
csname = csnamename (tok) | |
end | |
if csname and csname == "stopcollecttokens" then | |
ignore = true --- don’t need the rest | |
end | |
if ignore == true then | |
return --- would blow up the collection needlessly otherwise | |
end | |
local file_info = file_stack [#file_stack] | |
local nesting = texnest [texnest.ptr] | |
tokendata [#tokendata + 1] = { | |
file = file_info [1], | |
position = file_info [2], | |
csname = csname, | |
token = tok, | |
grouplevel = tex.currentgrouplevel, | |
mathmode = nesting.mathstyle >= 0 and nesting.mode, | |
} | |
end | |
--- this is the fun part | |
local grabtoken = function () | |
local tok = nexttoken () | |
processtoken (tok) | |
remove_from_callback ("token_filter", "grab the next token") | |
return tok | |
end | |
--- user level wrapper; the callback is automatically deregistered | |
--- as soon as one token has been processed | |
local collecttoken = function () | |
ignore = false | |
add_to_callback ("token_filter", grabtoken, "grab the next token") | |
end | |
tokenpos.collect = collecttoken | |
--- same as the above but must be manually removed | |
local grabtokens = function () | |
local tok = nexttoken () | |
processtoken (tok) | |
return tok | |
end | |
local active = false --- state of token grabber | |
local startcollecting = function () | |
if active == false then | |
add_to_callback ("token_filter", grabtokens, "grap a bunch of tokens") | |
active = true | |
ignore = false --- just in case | |
end | |
end | |
local stopcollecting = function () | |
if active == true then | |
remove_from_callback ("token_filter", "grap a bunch of tokens") | |
active = false | |
end | |
end | |
tokenpos.startcollecting = startcollecting | |
tokenpos.stopcollecting = stopcollecting | |
--- user level function that outputs the token info as a table | |
local displaymath_t = 253 | |
local inlinemath_t = -253 | |
local dumptokens = function () | |
if not next (tokendata) then | |
return | |
end | |
local acc = { | |
[[\settabs 5\columns]], | |
[[\+ file & line & cs & math & group level \cr ]], | |
[[\+ \hrulefill & \hrulefill & \hrulefill & \hrulefill& \hrulefill & \hrulefill & \cr ]], | |
"\n", | |
} | |
for i = 1, #tokendata do | |
local current = tokendata [i] | |
acc [#acc + 1] = [[\+]] | |
acc [#acc + 1] = current.file | |
acc [#acc + 1] = "&" | |
acc [#acc + 1] = current.position | |
acc [#acc + 1] = "&" | |
acc [#acc + 1] = current.csname and [[\char`\\]] .. current.csname or "<nil>" | |
acc [#acc + 1] = "&" | |
local mathmode = current.mathmode | |
if mathmode then | |
acc [#acc + 1] = mathmode > 0 and "display" or "inline" | |
else | |
acc [#acc + 1] = "nope" | |
end | |
acc [#acc + 1] = "&" | |
acc [#acc + 1] = current.grouplevel | |
acc [#acc + 1] = [[\cr]] | |
acc [#acc + 1] = "\n" | |
end | |
texsprint (tableconcat (acc, "\n")) | |
--inspect (acc) | |
end | |
tokenpos.dump = dumptokens | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\input luatexbase.sty | |
\input luaotfload.sty | |
\font \mainfont = "file:Iwona-Regular.otf:mode=base" at 10pt | |
\mainfont | |
\directlua {dofile "tokenposition.lua"} | |
\def \tokenposcmd #1{\directlua {packagedata.tokenpos.#1}} | |
\def \enabletokenpos [#1]{\tokenposcmd {enable [==[\detokenize {#1}]==]}} | |
\def \disabletokenpos {\tokenposcmd {disable ()}} | |
\def \printcurrentfile {\tokenposcmd {currentfile ()}} | |
\def \printcurrentpos {\tokenposcmd {currentpos ()}} | |
\def \printcurrentfilepos {\tokenposcmd {currentfilepos ()}} | |
\def \collecttoken {\tokenposcmd {collect ()}} | |
\def \collectedtokens {\tokenposcmd {dump ()}} | |
\def \startcollecttokens {\tokenposcmd {startcollecting ()}} | |
\def \stopcollecttokens {\tokenposcmd {stopcollecting ()}} | |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
\enabletokenpos [23] Starting at line no. \printcurrentpos | |
foo \collecttoken bar %% collect “b” | |
file:line “\printcurrentfilepos” \par | |
\begingroup | |
\begingroup | |
\collecttoken %% collect “\input” | |
\input knuth \par %% read other file | |
\printcurrentfilepos \par %% output line number in main file | |
\endgroup | |
\endgroup | |
current file: \printcurrentfile, line \printcurrentpos \par | |
\input someinputfile | |
xyzzy $F\collecttoken=ma$ whatever | |
Now collect multiple tokens at once: | |
\startcollecttokens a \endgraf $b$ c \par \stopcollecttokens | |
\collectedtokens %% pretty-print list of collected tokens | |
\bye |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment