|
#! @type "%~dpnx0" ^| icont -o "%~n0.exe" -u -v 0 - ^& "%~n0.exe" %* ^& del "%~n0.exe" |
|
############################################################################ |
|
# |
|
# File: annucode.icn |
|
# |
|
# Subject: annucode.icn - A tool to annotate Icon and Unicon ucode files |
|
# |
|
# Author: Arthur Eschenlauer (https://orcid.org/0000-0002-2882-0508) |
|
# |
|
# Date: 21 January, 2025 |
|
# |
|
# URL: https://gist.github.com/eschen42/a223f6aeee93797a720c559a666ec069 |
|
# |
|
############################################################################ |
|
# |
|
# This file is in the public domain. Art Eschenlauer has waived all |
|
# copyright and related or neighboring rights to: |
|
# annucode.icn - A tool to annotate Icon and Unicon ucode files |
|
# For details, see: |
|
# https://creativecommons.org/publicdomain/zero/1.0/ |
|
# |
|
# If you require a specific license and public domain status is not |
|
# sufficient for your needs, please substitute the MIT license, bearing |
|
# in mind that the copyright "claim" is solely to meet your requirements |
|
# and does not imply any restriction on use or copying by the author: |
|
# |
|
# Copyright (c) 2020-2025, Arthur Eschenlauer |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining |
|
# a copy of this software and associated documentation files (the |
|
# "Software"), to deal in the Software without restriction, including |
|
# without limitation the rights to use, copy, modify, merge, publish, |
|
# distribute, sublicense, and/or sell copies of the Software, and to |
|
# permit persons to whom the Software is furnished to do so, subject |
|
# to the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be |
|
# included in all copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
# SOFTWARE. |
|
# |
|
############################################################################ |
|
# |
|
# This program annotates Icon or Unicon "ucode" files to make them somewhat |
|
# easier to interpret. Although ucode is not intended to be read by humans, |
|
# sometimes I must resort to reading it when I am trying to puzzle out what |
|
# the translator thinks that I am trying to say with my code. If you are |
|
# studying language implementation, you may find this helpful to contrive |
|
# your own examples and look at the resulting ucode. |
|
# |
|
# Appendices of B and C of *The Implementation of Icon and Unicon* |
|
# http://unicon.org/book/ib.pdf |
|
# cover virtual machine code and instructions, which will in turn point you |
|
# to the relevant other sections of the book. These document "icode", |
|
# which is produced by the linker function built into the "icont" program, |
|
# but I have yet to see a great deal of difference between actual ucode |
|
# and the documentation in the book. I think of ucode sort of like |
|
# assembly language for the virtual machine. |
|
# |
|
# This program requires arguments as follows: |
|
# - either paths to one or more ucode files |
|
# - or a single dash, signifying that the program should read from ucode |
|
# streaming in on the standard input. |
|
# Output is streamed to the standard output. |
|
# |
|
# How do you get ucode from your icon source file? |
|
# Pass the "-c" option to "icont" or "unicon". For example: |
|
# icont -c foo.icn |
|
# annucode foo.u1 |
|
# cat foo.u1 foo.u2 | annucode - > foo.annotation.txt |
|
# |
|
############################################################################ |
|
# |
|
# Requires: co-expressions |
|
# |
|
############################################################################ |
|
# |
|
# Links: escapesq, strings |
|
# |
|
############################################################################ |
|
# |
|
# Annotate a ucode file for Icon (.u1) or Unicon (.u). |
|
# On Microsoft Windows, for example, you can use this pipeline for foo.icn: |
|
# icont -c -u foo& type foo.u2 foo.u1 | annucode.cmd - > foo.ann.txt |
|
# |
|
# Library functions used in this file: |
|
# |
|
# escape(s1) |
|
# Returns string s1 with escape sequences (as in Icon string |
|
# representation) converted. |
|
# ref: https://www2.cs.arizona.edu/icon/library/procs/escapesq.htm |
|
link escapesq |
|
# |
|
# replace(s1, s2, s3) |
|
# Replaces all occurrences of s2 in s1 by s3; fails if s2 is null. |
|
# ref: https://www2.cs.arizona.edu/icon/library/procs/escapesq.htm |
|
link strings |
|
|
|
$define SPACES "\t" |
|
|
|
record context( |
|
indi # indentation level for mark/unmark blocks |
|
) |
|
|
|
procedure usage(msg) |
|
write(&errout, "usage: ", &progname, " {-|ucode_file(s)}") |
|
if \msg |
|
then stop(msg) |
|
else stop(&progname, " aborted") |
|
end |
|
|
|
procedure allFilesAllLines(L_of_s) |
|
local f |
|
# input is a list of strings |
|
every f:= open(!L_of_s, "r") do every suspend |read(f) |
|
end |
|
|
|
procedure indent(C, L[]) |
|
local indt # indentation tabs for mark/unmark blocks |
|
if C.indi < 0 then C.indi := 0 |
|
indt := repl(SPACES, C.indi) |
|
L[1] ? if ="lab " |
|
then write(tab(0), ":") |
|
else write ! push(L, indt) |
|
return &null |
|
end |
|
|
|
procedure main(args) |
|
local C # context passed to helper functions |
|
local Cs # co-expression to produce lines of source (.icn) file |
|
local Cu # co-expression to produce lines of ucode (.u1, .u2, or .u) |
|
# file(s) or stdin |
|
local Tcon # table to lookup constant by integer ID |
|
local Tvar # table to lookup variable by integer ID |
|
local dcl # identifier for variable or constant |
|
local fld3 # field 3 from the declaration of a variable or constant |
|
local fld4 # field 4 from the declaration of a global variable |
|
local sfile # name of, or stream of lines from, the source file |
|
local slast # last line read from the source file |
|
local sline # line read freshly from the source file |
|
local uline # line read from ucode file by Cu |
|
local n, s, x # scratch variables |
|
local inRcrd # true when in record definition |
|
local inGlbl # true when in global definition |
|
local inDecl # true when in procedure local/constant declaration |
|
local procName # name of procedure currently being defined |
|
local failStck # stack for failure-IPC specified by mark and mark0 |
|
local failLbl # failure-IPC label |
|
|
|
# Initialize structures and variables |
|
Tcon := table("") |
|
Tvar := table("") |
|
slast := 0 |
|
C := context(0) |
|
failStck := [] |
|
# Validate and respond to input arguments |
|
if *args = 0 then usage("no input files specified") |
|
if args[1] == "-" |
|
then Cu := create !&input |
|
else Cu := create allFilesAllLines(args) |
|
# Scan input ucode and emit annotated ucode |
|
while uline := @Cu do uline ? { |
|
x := &null # this is defensive and ordinarily unnecessary |
|
case &subject of { |
|
|
|
# .u2-specific |
|
# ------------------------------------------------------------ |
|
|
|
# version |
|
# Annotate version number |
|
="version\t" || (x <- tab(0)): { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, "version\t", x, " --- Icon Programming Language major version") |
|
} |
|
|
|
# impl\tlocal |
|
# Annotate implicit local variables (no -u option when invoking icont) |
|
="impl\t" || (x <- tab(0)): { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, "impl\t", x, " --- undeclared variables are ", |
|
if x == "local" then "implicitly local" else "an error") |
|
} |
|
|
|
# global\t5 |
|
# Annotate global variables |
|
="global\t" || (x <- tab(0)): { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, "global\t", x, |
|
" --- count of global variables declared in source file") |
|
inGlbl := 1 |
|
inDecl := inRcrd := &null |
|
} |
|
|
|
# \t0,000001,glb_i,0 |
|
# \t1,000011,rec_r,3 |
|
# \t3,000005,main,1 |
|
# \t4,000005,foo,-2 |
|
# Annotate an global-variable details |
|
="\t" || |
|
(dcl <- tab(many(&digits))) || ="," || |
|
(x <- tab(many(&digits))) || ="," || |
|
(fld3 <- tab(upto(','))) || ="," || |
|
(fld4 <- tab(0)): |
|
{ Tvar[dcl] := case x of { |
|
"000011": "record" |
|
"000005": "procedure" |
|
"000001": "global" |
|
default: x |
|
} || |
|
":" || fld3 || |
|
"[" || if fld4 < 0 then abs(fld4) || "+]" else fld4 || "]" |
|
indent(C, &subject, " --- ", Tvar[dcl]) |
|
} |
|
|
|
# record\t5,r |
|
# Annotate record declaration |
|
="record\t" || |
|
(x <- tab(upto(','))) || move(1) || (n <- tab(many(&digits))): { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, "record\t", x,",", n, " --- record ", x, " has ", |
|
n, " fields") |
|
C.indi +:= 1 |
|
inRcrd := 1 |
|
inDecl := inGlbl := &null |
|
} |
|
|
|
# \t0,field1` |
|
# Annotate record-field declaration |
|
(\inRcrd, ="\t" || (n := tab(many(&digits))) || ="," || (s := tab(0))): |
|
indent(C, n, ",", s, " --- record field ", s) |
|
|
|
# invocable\tall |
|
# invocable\tfoo |
|
# Annotate a string-constant usage |
|
="invocable\t" || (x <- tab(0)): { |
|
C.indi := max(C.indi - 1, 0) |
|
if x == "mark0" |
|
then indent(C, "invocable\t", x, |
|
" --- all defined functions are string-invocable") |
|
else indent(C, "invocable\t", x, |
|
" --- this function is string-invocable") |
|
} |
|
|
|
# \tlocal\t3,000000,what |
|
# Annotate an local-variable declaration |
|
="\tlocal\t" || |
|
(dcl := tab(many(&digits))) || ="," || |
|
(fld3 := tab(many(&digits))) || ="," || |
|
(x <- tab(0)): |
|
( Tvar[dcl] := case fld3 of { |
|
"001000": "arg" |
|
"000020": "local" |
|
"000040": "static" |
|
"000000": "global" |
|
default: fld3 |
|
} || ":" || x |
|
, indent(C, &subject, " --- ", Tvar[dcl]) |
|
) |
|
|
|
# \tcon\t0,010000,5,150,145,154,154,157 |
|
# Annotate a string-constant declaration |
|
="\tcon\t" || |
|
(dcl := tab(many(&digits))) || ="," || |
|
="010000," || |
|
(fld3 := tab(many(&digits))) || |
|
(x <- tab(0)): |
|
( x <- image(escape(replace(x, ",", "\\"))) |
|
, Tcon[dcl] := x |
|
, indent(C, &subject, " --- s:", Tcon[dcl]) |
|
) |
|
|
|
# \tcon\t3,020000,5,141,145,151,157,165 |
|
# Annotate a cset-constant declaration |
|
="\tcon\t" || |
|
(dcl := tab(many(&digits))) || ="," || |
|
="020000," || |
|
(fld3 := tab(many(&digits))) || |
|
(x <- tab(0)): |
|
( x <- image(escape(replace(x, ",", "\\")))[2:-1] |
|
, Tcon[dcl] := x |
|
, indent(C, &subject, " --- c:'", Tcon[dcl], "'") |
|
) |
|
|
|
# \tcon\t2,002000,1,2 |
|
# Annotate an integer-constant declaration |
|
="\tcon\t" || |
|
(dcl := tab(many(&digits))) || ="," || |
|
="002000," || |
|
# field 3 is integer-kind |
|
(fld3 := tab(many(&digits))) || ="," || |
|
(x <- tab(0)): |
|
( Tcon[dcl] := x |
|
, indent(C, &subject, " --- i:", Tcon[dcl]) |
|
) |
|
|
|
# \tcon\t2,002000,1,2 |
|
# Annotate a real-constant declaration |
|
="\tcon\t" || |
|
(dcl := tab(many(&digits))) || ="," || |
|
="004000," || |
|
(x <- tab(0)): |
|
( Tcon[dcl] := x |
|
, indent(C, &subject, " --- r:", Tcon[dcl]) |
|
) |
|
|
|
# \tfilen\tdec.icn |
|
# Create co-expression producing lines of source file so that |
|
# they may be included in the output |
|
="\tfilen\t" || (sfile := tab(0)): |
|
( x := open(sfile, "r") | |
|
stop(&progname, "cannot open .icn file: ", sfile) |
|
, Cs := create |read(x) # TODO put lines into a list instead |
|
, indent(C, "\tfilen\t", sfile, " --- source code file: ", sfile) |
|
) |
|
|
|
# .u1-specific |
|
# ------------------------------------------------------------ |
|
|
|
# ^proc foo$ |
|
# Annotate procedure declaration |
|
="proc " || (x <- tab(0)): { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, x, ":\t--- declare procedure ", x) |
|
inRcrd := inGlbl := &null |
|
inDecl := 1 |
|
procName := x |
|
} |
|
|
|
# \tdeclend |
|
# Annotate end of procedures and constants |
|
="\tdeclend": { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, "\tdeclend", |
|
" --- end declaration of procedures and constants for proc ", |
|
procName) |
|
inDecl := inGlbl := inRcrd := &null |
|
} |
|
|
|
# \tend |
|
# Annotate end of procedures and constants |
|
="\tend": { |
|
C.indi := max(C.indi - 1, 0) |
|
indent(C, "\tend", |
|
" --- end of code for proc ", procName) |
|
procName := &null |
|
} |
|
|
|
# \tvar\t13 |
|
# Annotate an local-variable usage |
|
="\tvar\t" || (x <- tab(0)): |
|
indent(C, "\tvar\t", x, " --- ", Tvar[x]) |
|
|
|
# \tint\t21 |
|
# Annotate an integer-constant usage |
|
="\tint\t" || (x <- tab(0)): |
|
indent(C, "\tint\t", x, " --- i:", Tcon[x]) |
|
|
|
# \treal\t33.33 |
|
# Annotate an real-constant usage |
|
="\treal\t" || (x <- tab(0)): |
|
indent(C, "\treal\t", x, " --- r:", Tcon[x]) |
|
|
|
# \tstr\t22 |
|
# Annotate a string-constant usage |
|
="\tstr\t" || (x <- tab(0)): |
|
indent(C, "\tstr\t", x, " --- s:", Tcon[x]) |
|
|
|
# lab L30 |
|
# rewrite goto label |
|
="lab L" || (x <- tab(0)): |
|
( x := "lab " || procName || "_L" || x |
|
, indent(C, x) |
|
) |
|
|
|
# \tinit\tL2 |
|
# Annotate beginning initial expression |
|
="\tinit\t" || (x <- tab(0)): |
|
( x := procName || "_" || x |
|
, indent(C, "\tinit\t", x, " --- except initial invocation, goto ", x) |
|
) |
|
|
|
# \tmark\tL2 |
|
# Annotate beginning of an expression frame |
|
="\tmark\t" || (x <- tab(0)): |
|
( x := procName || "_" || x |
|
, indent(C, "\tmark\t", x, " --- on expression failure goto ", x) |
|
, push(failStck, x) |
|
, C.indi +:= 1 |
|
) |
|
|
|
# \tgoto\tL2 |
|
# Annotate unconditional goto |
|
="\tgoto\t" || (x <- tab(0)): |
|
( x := procName || "_" || x |
|
, indent(C, "\tgoto\t", x, " --- go to label ", x) |
|
) |
|
|
|
# \tcreate\tL2 |
|
# Annotate co-expression create operatioj |
|
="\tcreate\t" || (x <- tab(0)): |
|
( x := procName || "_" || x |
|
, indent(C, "\tcreate\t", x, " --- create co-expression at label ", x) |
|
, C.indi -:= 1 |
|
) |
|
|
|
# \tchfail\tL2 |
|
# Annotate redirection of an expression failure |
|
="\tchfail\t" || (x <- tab(0)): |
|
( x := procName || "_" || x |
|
, failStck[1] := x |
|
, indent(C, "\tchfail\t", x, |
|
" --- redirect expression failure; failure_ipc <- ", x) |
|
) |
|
|
|
# \tpsusp |
|
# Annotate suspension from procedure with creation of generator frame |
|
="\tpsusp": |
|
( C.indi +:= 1 |
|
, indent(C, "\tpsusp", x, |
|
" --- suspend from procedure ", procName) |
|
) |
|
|
|
# \tmark0 |
|
# Annotate beginning of an expression frame |
|
="\tmark0": |
|
( indent(C, "\tmark0", |
|
" --- on expression failure goto next line after unmark") |
|
, push(failStck, "mark0") |
|
, C.indi +:= 1 |
|
) |
|
|
|
# \tunmark |
|
# Annotate end of an expression frame |
|
="\tunmark": |
|
( C.indi -:= 1 |
|
, failLbl := pop(failStck) |
|
, indent(C, "\tunmark", x, " --- remove expression frame for ", failLbl) |
|
) |
|
|
|
# \tesusp |
|
# Annotate creation of generator frame |
|
="\tesusp": |
|
( indent(C, "\tesusp", x, |
|
" --- create generator frame with portion of data stack") |
|
, C.indi -:= 1 |
|
) |
|
|
|
# \tefail |
|
# Annotate completion of generator frame |
|
="\tefail": |
|
( indent(C, "\tefail", x, |
|
" --- resume generator or remove expression frame for ", |
|
failStck[1]) |
|
, { while failStck[1] == "mark0" |
|
do { |
|
C.indi -:= 1 |
|
failLbl := pop(failStck) |
|
indent(C, "\t\t", |
|
" --- resume generator or remove expression frame for ", |
|
failLbl |
|
) |
|
} |
|
&null |
|
} |
|
) |
|
|
|
# \teret |
|
# Annotate removal of expression frame for case expression |
|
="\teret": |
|
( failLbl := pop(failStck) |
|
, indent(C, "\teret", x, |
|
" --- remove expression frame (for case expression) for ", |
|
failLbl) |
|
, C.indi -:= 1 |
|
) |
|
|
|
# \tpret |
|
# Annotate removal of expression frame for procedure return |
|
="\tpret": |
|
( failLbl := pop(failStck) |
|
, indent(C, "\tpret", x, |
|
" --- remove expression frame (for procedure return) for ", |
|
failLbl) |
|
, C.indi -:= 1 |
|
) |
|
|
|
# \tpfail |
|
# Annotate completion of generator frame |
|
="\tpfail": |
|
indent(C, "\tpfail", " --- remove procedure frame, then fail") |
|
|
|
# \tlimit |
|
# Annotate creation of generator-limitation counter |
|
="\tlimit": |
|
indent(C, "\tlimit", " --- create generator-limitation counter") |
|
|
|
# \tlsusp |
|
# Annotate creation of generator-limitation counter |
|
="\tlsusp": |
|
( x := pop(failStck) |
|
, indent(C, "\tlsusp", |
|
" --- fail unless counter allows suspension; failure_ipc <- ", |
|
x) |
|
) |
|
|
|
|
|
# \tline\t11 |
|
# Capture the text for the current line |
|
="\tline\t" || (x <- tab(0)): |
|
{ |
|
if integer(x) <= integer(slast) |
|
then &null |
|
else |
|
( while *Cs < integer(x) - 1 |
|
do indent(C, "\t--- " || @Cs || " --- line " || *Cs) | |
|
stop(&progname, "unexpected: no more input") |
|
) | |
|
sline := "\t--- " || @Cs || " --- line " || (slast := x) |
|
indent(C, uline) |
|
} |
|
|
|
# \tcolm\t11 |
|
# Show the column and the current line |
|
="\tcolm\t" || (x <- tab(0)): |
|
{ |
|
indent(C, sline, "\n", repl(SPACES, C.indi), |
|
"\t---", repl(" ", integer(x) ), "^ column ", x ) |
|
indent(C, uline) |
|
} |
|
|
|
default: |
|
indent(C, &subject) |
|
} |
|
} |
|
x := "" |
|
return 0 |
|
end |
|
|
|
# vim: sw=2 ts=2 et ai syntax=icon nu : |
This program annotates Icon or Unicon "ucode" files to make them somewhat easier to interpret. Although ucode is not intended to be read by humans (at least not under ordinary circumstances), sometimes I must resort to reading it when I am trying to puzzle out what the translator thinks that I am trying to say with my code. If you are studying language implementation, you may find this helpful to contrive your own examples and look at the resulting ucode.
Note well that
icontcannot read the annotated file!