Skip to content

Instantly share code, notes, and snippets.

@aminophen
Last active October 15, 2022 23:58
Show Gist options
  • Save aminophen/2feaa7fd70de0dd7278c5f8720d66bd8 to your computer and use it in GitHub Desktop.
Save aminophen/2feaa7fd70de0dd7278c5f8720d66bd8 to your computer and use it in GitHub Desktop.
[pTeX] Interface to specify JIS-encoded or UCS-encoded TFM
Index: eptexdir/eptex.defines
===================================================================
--- eptexdir/eptex.defines (revision 64704)
+++ eptexdir/eptex.defines (working copy)
@@ -34,6 +34,7 @@
@define function fromKUTEN ();
@define function UCStoUTF8 ();
@define function fromUCS ();
+@define function toJIS ();
@define function toUCS ();
@define function notkanjicharseq ();
Index: euptexdir/euptex.defines
===================================================================
--- euptexdir/euptex.defines (revision 64704)
+++ euptexdir/euptex.defines (working copy)
@@ -38,6 +38,7 @@
@define function fromKUTEN ();
@define function fromUCS ();
@define function UCStoUTF8 ();
+@define function toJIS ();
@define function toUCS ();
@define function notkanjicharseq ();
Index: ptexdir/ptex-base.ch
===================================================================
--- ptexdir/ptex-base.ch (revision 64704)
+++ ptexdir/ptex-base.ch (working copy)
@@ -3085,8 +3085,11 @@
@!font_info: ^memory_word; {pTeX: use halfword for |char_type| table.}
@!font_dir: ^eight_bits;
{pTeX: direction of fonts, 0 is default, 1 is Yoko, 2 is Tate}
+@!font_enc: ^eight_bits;
+ {pTeX: encoding of fonts, 0 is default, 1 is JIS, 2 is Unicode}
@!font_num_ext: ^integer;
{pTeX: number of the |char_type| table.}
+@!jfm_enc: ^eight_bits; {pTeX: holds scanned result of encoding}
@z
@x [30.550] l.11270 - pTeX:
@@ -3099,6 +3102,13 @@
{pTeX: base addresses for KANJI character type parameters}
@z
+@x
+@ @<Set init...@>=
+@y
+@ @<Set init...@>=
+jfm_enc:=0;
+@z
+
@x [30.554] l.11373 - pTeX:
@d orig_char_info_end(#)==#].qqqq
@d orig_char_info(#)==font_info[char_base[#]+orig_char_info_end
@@ -3238,6 +3248,7 @@
@<Apologize for not loading the font, |goto done|@>;
f:=font_ptr+1;
font_dir[f]:=jfm_flag;
+font_enc[f]:=jfm_enc;
font_num_ext[f]:=nt;
ctype_base[f]:=fmem_ptr;
char_base[f]:=ctype_base[f]+nt-bc;
@@ -3260,8 +3271,15 @@
if jfm_flag<>dir_default then
for k:=ctype_base[f] to ctype_base[f]+nt-1 do
begin
- fget; read_twentyfourx(cx); font_info[k].hh.rh:=tokanji(cx); {|kchar_code|}
- fget; cx:=fbyte; font_info[k].hh.lhfield:=tonum(cx); {|kchar_type|}
+ fget; read_twentyfourx(cx);
+ if jfm_enc=2 then {Unicode TFM}
+ font_info[k].hh.rh:=toDVI(fromUCS(cx))
+ else if jfm_enc=1 then {JIS-encoded TFM}
+ font_info[k].hh.rh:=toDVI(fromJIS(cx))
+ else
+ font_info[k].hh.rh:=tokanji(cx); {|kchar_code|}
+ fget; cx:=fbyte;
+ font_info[k].hh.lhfield:=tonum(cx); {|kchar_type|}
end;
for k:=char_base[f]+bc to width_base[f]-1 do
begin store_four_quarters(font_info[k].qqqq);
@@ -3492,7 +3510,13 @@
synch_h;
end;
p:=link(p);
- jc:=toDVI(KANJI(info(p)));
+ jc:=KANJI(info(p));
+ if font_enc[f]=2 then {Unicode TFM}
+ jc:=toUCS(jc)
+ else if font_enc[f]=1 then {JIS-encoded TFM}
+ jc:=toJIS(jc)
+ else
+ jc:=toDVI(jc);
dvi_out(set2); dvi_out(Hi(jc)); dvi_out(Lo(jc));
cur_h:=cur_h+char_width(f)(orig_char_info(f)(c)); {not |jc|}
end;
@@ -6369,10 +6393,13 @@
@z
@x [49.1211] l.23397 - pTeX: prefixed_command
+@t\4@>@<Declare subprocedures for |prefixed_command|@>@t@>@;@/
procedure prefixed_command;
label done,exit;
var a:small_number; {accumulated prefix codes so far}
@y
+@t\4@>@<Declare the function called |scan_keyword_noexpand|@>
+@<Declare subprocedures for |prefixed_command|@>@t@>@;@/
procedure prefixed_command;
label done,exit;
var a:small_number; {accumulated prefix codes so far}
@@ -6548,6 +6575,13 @@
def_tfont,def_jfont,def_font: new_font(a);
@z
+@x [49.????] pTeX: new_font
+get_r_token; u:=cur_cs;
+@y
+@<Scan the font encoding specification@>;
+get_r_token; u:=cur_cs;
+@z
+
@x [49.1292] l.24451 - pTeX: shift_case
@<Change the case of the token in |p|, if a change is appropriate@>=
t:=info(p);
@@ -6663,6 +6697,7 @@
@ @<Dump the array info for internal font number |k|@>=
begin
dump_things(font_dir[null_font], font_ptr+1-null_font);
+dump_things(font_enc[null_font], font_ptr+1-null_font);
dump_things(font_num_ext[null_font], font_ptr+1-null_font);
dump_things(font_check[null_font], font_ptr+1-null_font);
@z
@@ -6681,6 +6716,7 @@
@<Undump the array info for internal font number |k|@>=
begin {Allocate the font arrays}
font_dir:=xmalloc_array(eight_bits, font_max);
+font_enc:=xmalloc_array(eight_bits, font_max);
font_num_ext:=xmalloc_array(integer, font_max);
@z
@@ -6695,6 +6731,7 @@
undump_things(font_check[null_font], font_ptr+1-null_font);
@y
undump_things(font_dir[null_font], font_ptr+1-null_font);
+undump_things(font_enc[null_font], font_ptr+1-null_font);
undump_things(font_num_ext[null_font], font_ptr+1-null_font);
undump_things(font_check[null_font], font_ptr+1-null_font);
@z
@@ -6723,6 +6760,7 @@
font_check:=xmalloc_array(four_quarters, font_max);
@y
font_dir:=xmalloc_array(eight_bits, font_max);
+ font_enc:=xmalloc_array(eight_bits, font_max);
font_num_ext:=xmalloc_array(integer, font_max);
font_check:=xmalloc_array(four_quarters, font_max);
@z
@@ -6739,6 +6777,7 @@
@y
font_ptr:=null_font; fmem_ptr:=7;
font_dir[null_font]:=dir_default;
+ font_enc[null_font]:=0;
font_num_ext[null_font]:=0;
@z
@@ -6890,7 +6929,9 @@
jc:=toDVI(kcode);
sp:=1; { start position }
ep:=font_num_ext[f]-1; { end position }
-if (ep>=1)and(kchar_code(f)(sp)<=jc)and(jc<=kchar_code(f)(ep)) then
+if (ep>=1) then { nt is larger than 1; |char_type| is non-empty }
+if font_enc[f]=0 then { |kchar_code| are ordered; faster search }
+begin if (kchar_code(f)(sp)<=jc)and(jc<=kchar_code(f)(ep)) then
begin while (sp <= ep) do
begin mp:=sp+((ep-sp) div 2);
if jc<kchar_code(f)(mp) then ep:=mp-1
@@ -6900,9 +6941,56 @@
end;
end;
end;
+end
+else { TFM-DVI encoding conversion; whole search }
+ begin while (sp <= ep) do
+ if jc=kchar_code(f)(sp) then
+ begin get_jfm_pos:=kchar_type(f)(sp); return;
+ end
+ else incr(sp);
+ end;
get_jfm_pos:=kchar_type(f)(0);
end;
+@ The function |scan_keyword_noexpand| is used to scan a keyword
+preceding possibly undefined control sequence.
+It is used while scanning \.{\\font} with JFM encoding specification.
+
+@<Declare the function called |scan_keyword_noexpand|@>=
+function scan_keyword_noexpand(@!s:str_number):boolean;
+label exit;
+var p:pointer; {tail of the backup list}
+@!q:pointer; {new node being added to the token list via |store_new_token|}
+@!k:pool_pointer; {index into |str_pool|}
+begin p:=backup_head; link(p):=null; k:=str_start[s];
+while k<str_start[s+1] do
+ begin get_token; {no expansion}
+ if (cur_cs=0)and@|
+ ((cur_chr=so(str_pool[k]))or(cur_chr=so(str_pool[k])-"a"+"A")) then
+ begin store_new_token(cur_tok); incr(k);
+ end
+ else if (cur_cmd<>spacer)or(p<>backup_head) then
+ begin back_input;
+ if p<>backup_head then back_list(link(backup_head));
+ scan_keyword_noexpand:=false; return;
+ end;
+ end;
+flush_list(link(backup_head)); scan_keyword_noexpand:=true;
+exit:end;
+
+@ @<Scan the font encoding specification@>=
+begin jfm_enc:=0;
+if scan_keyword_noexpand("in") then
+ if scan_keyword_noexpand("jis") then jfm_enc:=1
+ else if scan_keyword_noexpand("ucs") then jfm_enc:=2
+ else begin
+ print_err("Unknown TFM encoding");
[email protected] TFM encoding@>
+ help1("TFM encoding specification is ignored.");@/
+ error;
+ end;
+end
+
@ Following codes are used to calculate a KANJI width and height.
@<Local variables for dimension calculations@>=
Index: ptexdir/ptex.defines
===================================================================
--- ptexdir/ptex.defines (revision 64704)
+++ ptexdir/ptex.defines (working copy)
@@ -27,6 +27,7 @@
@define function fromSJIS ();
@define function fromKUTEN ();
@define function fromUCS ();
+@define function toJIS ();
@define function toUCS ();
@define function notkanjicharseq ();
Index: uptexdir/uptex-m.ch
===================================================================
--- uptexdir/uptex-m.ch (revision 64704)
+++ uptexdir/uptex-m.ch (working copy)
@@ -757,10 +757,14 @@
@z
@x
- jc:=toDVI(KANJI(info(p)));
+ jc:=KANJI(info(p));
+@y
+ jc:=KANJI(info(p)) mod max_cjk_val;
+@z
+
+@x
dvi_out(set2); dvi_out(Hi(jc)); dvi_out(Lo(jc));
@y
- jc:=toDVI(KANJI(info(p)) mod max_cjk_val);
if (jc<@"10000) then begin
dvi_out(set2);
end else begin
Index: uptexdir/uptex.defines
===================================================================
--- uptexdir/uptex.defines (revision 64704)
+++ uptexdir/uptex.defines (working copy)
@@ -36,6 +36,7 @@
@define function fromSJIS ();
@define function fromKUTEN ();
@define function fromUCS ();
+@define function toJIS ();
@define function toUCS ();
@define function notkanjicharseq ();
%#!etex, eptex, euptex
\nonstopmode
\font\xa=cmr10 \xa.\relax\showthe\fontcharwd\xa`.
\font\xb=cmr10 at 5pt \xb.\relax\showthe\fontcharwd\xb`.
\font\xc cmr10 \xc.\relax\showthe\fontcharwd\xc`.
\font\xd cmr10 at 5pt \xd.\relax\showthe\fontcharwd\xd`.
\font\xe\relax cmr10 \xe.\relax\showthe\fontcharwd\xe`.
\font\xf\relax cmr10 at 5pt \xf.\relax\showthe\fontcharwd\xf`.
%\font\xg\relax=cmr10 \xg.\relax\showthe\fontcharwd\xg`. % invalid
%\font\xh\relax=cmr10 at 5pt \xh.\relax\showthe\fontcharwd\xh`. % invalid
\ifx\jfont\undefined \expandafter\end \fi
\ifnum\jis"2121="3000 \relax
\font\oa=umin10 \oa」\relax\showthe\fontcharwd\oa`」
\font\ob=umin10 at 5pt \ob」\relax\showthe\fontcharwd\ob`」
\font\oc umin10 \oc」\relax\showthe\fontcharwd\oc`」
\font\od umin10 at 5pt \od」\relax\showthe\fontcharwd\od`」
\font\oe\relax umin10 \oe」\relax\showthe\fontcharwd\oe`」
\font\of\relax umin10 at 5pt \of」\relax\showthe\fontcharwd\of`」
%\font\og\relax=umin10 \og」\relax\showthe\fontcharwd\og`」 % invalid
%\font\oh\relax=umin10 at 5pt \oh」\relax\showthe\fontcharwd\oh`」 % invalid
\else
\font\oa=min10 \oa」\relax\showthe\fontcharwd\oa`」
\font\ob=min10 at 5pt \ob」\relax\showthe\fontcharwd\ob`」
\font\oc min10 \oc」\relax\showthe\fontcharwd\oc`」
\font\od min10 at 5pt \od」\relax\showthe\fontcharwd\od`」
\font\oe\relax min10 \oe」\relax\showthe\fontcharwd\oe`」
\font\of\relax min10 at 5pt \of」\relax\showthe\fontcharwd\of`」
%\font\og\relax=min10 \og」\relax\showthe\fontcharwd\og`」 % invalid
%\font\oh\relax=min10 at 5pt \oh」\relax\showthe\fontcharwd\oh`」 % invalid
\fi
%% for development version
\font in jis \ja=min9 \ja」\relax\showthe\fontcharwd\ja`」
\font in jis \jb=min9 at 5pt \jb」\relax\showthe\fontcharwd\jb`」
\font in jis \jc nmin9 \jc」\relax\showthe\fontcharwd\jc`」
\font in jis \jd nmin9 at 5pt \jd」\relax\showthe\fontcharwd\jd`」
\font in jis \je\relax min9 \je」\relax\showthe\fontcharwd\je`」
\font in jis \jf\relax min9 at 5pt \jf」\relax\showthe\fontcharwd\jf`」
%\font in jis \jg\relax=nmin9 \jg」\relax\showthe\fontcharwd\jg`」 % invalid
%\font in jis \jh\relax=nmin9 at 5pt \jh」\relax\showthe\fontcharwd\jh`」 % invalid
\font in jis\ji=min8 \ji」\relax\showthe\fontcharwd\ji`」
\font in jis\jj=min8 at 5pt \jj」\relax\showthe\fontcharwd\jj`」
\font in jis\jk nmin8 \jk」\relax\showthe\fontcharwd\jk`」
\font in jis\jl nmin8 at 5pt \jl」\relax\showthe\fontcharwd\jl`」
\font in jis\jm\relax min8 \jm」\relax\showthe\fontcharwd\jm`」
\font in jis\jn\relax min8 at 5pt \jn」\relax\showthe\fontcharwd\jn`」
%\font in jis\jo\relax=nmin8 \jo」\relax\showthe\fontcharwd\jo`」 % invalid
%\font in jis\jp\relax=nmin8 at 5pt \jp」\relax\showthe\fontcharwd\jp`」 % invalid
\font injis \kb=nmin7 at 5pt \kb」\relax\showthe\fontcharwd\kb`」
\font injis \ka=nmin7 \ka」\relax\showthe\fontcharwd\ka`」
\font injis \kd min7 at 5pt \kd」\relax\showthe\fontcharwd\kd`」
\font injis \kc min7 \kc」\relax\showthe\fontcharwd\kc`」
\font injis \kf\relax nmin7 at 5pt \kf」\relax\showthe\fontcharwd\kf`」
\font injis \ke\relax nmin7 \ke」\relax\showthe\fontcharwd\ke`」
%\font injis \kh\relax=min7 at 5pt \kh」\relax\showthe\fontcharwd\kh`」 % invalid
%\font injis \kg\relax=min7 \kg」\relax\showthe\fontcharwd\kg`」 % invalid
\font injis\kj=nmin6 at 5pt \kj」\relax\showthe\fontcharwd\kj`」
\font injis\ki=nmin6 \ki」\relax\showthe\fontcharwd\ki`」
\font injis\kl min6 at 5pt \kl」\relax\showthe\fontcharwd\kl`」
\font injis\kk min6 \kk」\relax\showthe\fontcharwd\kk`」
\font injis\kn\relax nmin6 at 5pt \kn」\relax\showthe\fontcharwd\kn`」
\font injis\km\relax nmin6 \km」\relax\showthe\fontcharwd\km`」
%\font injis\kp\relax=min6 at 5pt \kp」\relax\showthe\fontcharwd\kp`」 % invalid
%\font injis\ko\relax=min6 \ko」\relax\showthe\fontcharwd\ko`」 % invalid
\font in ucs \ua=umin6 \ua」\relax\showthe\fontcharwd\ua`」
\font in ucs \ub=umin6 at 5pt \ub」\relax\showthe\fontcharwd\ub`」
\font in ucs \uc umin6 \uc」\relax\showthe\fontcharwd\uc`」
\font in ucs \ud umin6 at 5pt \ud」\relax\showthe\fontcharwd\ud`」
\font in ucs \ue\relax umin6 \ue」\relax\showthe\fontcharwd\ue`」
\font in ucs \uf\relax umin6 at 5pt \uf」\relax\showthe\fontcharwd\uf`」
%\font in ucs \ug\relax=umin6 \ug」\relax\showthe\fontcharwd\ug`」 % invalid
%\font in ucs \uh\relax=umin6 at 5pt \uh」\relax\showthe\fontcharwd\uh`」 % invalid
\font in ucs\ui=umin7 \ui」\relax\showthe\fontcharwd\ui`」
\font in ucs\uj=umin7 at 5pt \uj」\relax\showthe\fontcharwd\uj`」
\font in ucs\uk umin7 \uk」\relax\showthe\fontcharwd\uk`」
\font in ucs\ul umin7 at 5pt \ul」\relax\showthe\fontcharwd\ul`」
\font in ucs\um\relax umin7 \um」\relax\showthe\fontcharwd\um`」
\font in ucs\un\relax umin7 at 5pt \un」\relax\showthe\fontcharwd\un`」
%\font in ucs\uo\relax=umin7 \uo」\relax\showthe\fontcharwd\uo`」 % invalid
%\font in ucs\up\relax=umin7 at 5pt \up」\relax\showthe\fontcharwd\up`」 % invalid
\font inucs \vb=umin8 at 5pt \vb」\relax\showthe\fontcharwd\vb`」
\font inucs \va=umin8 \va」\relax\showthe\fontcharwd\va`」
\font inucs \vd umin8 at 5pt \vd」\relax\showthe\fontcharwd\vd`」
\font inucs \vc umin8 \vc」\relax\showthe\fontcharwd\vc`」
\font inucs \vf\relax umin8 at 5pt \vf」\relax\showthe\fontcharwd\vf`」
\font inucs \ve\relax umin8 \ve」\relax\showthe\fontcharwd\ve`」
%\font inucs \vh\relax=umin8 at 5pt \vh」\relax\showthe\fontcharwd\vh`」 % invalid
%\font inucs \vg\relax=umin8 \vg」\relax\showthe\fontcharwd\vg`」 % invalid
\font inucs\vj=umin9 at 5pt \vj」\relax\showthe\fontcharwd\vj`」
\font inucs\vi=umin9 \vi」\relax\showthe\fontcharwd\vi`」
\font inucs\vl umin9 at 5pt \vl」\relax\showthe\fontcharwd\vl`」
\font inucs\vk umin9 \vk」\relax\showthe\fontcharwd\vk`」
\font inucs\vn\relax umin9 at 5pt \vn」\relax\showthe\fontcharwd\vn`」
\font inucs\vm\relax umin9 \vm」\relax\showthe\fontcharwd\vm`」
%\font inucs\vp\relax=umin9 at 5pt \vp」\relax\showthe\fontcharwd\vp`」 % invalid
%\font inucs\vo\relax=umin9 \vo」\relax\showthe\fontcharwd\vo`」 % invalid
\end
%#!ptex, uptex, eptex, euptex
\nonstopmode
% allowed encoding: "jis" "ucs"
\font in jis\x=jis \x
あ」\inhibitglue い「\the\lastpenalty
。\ifx\lastnodechar\undefined\else\the\lastnodechar\fi
\font in ucs\y=upjisr-h \y
あ」\inhibitglue い「\the\lastpenalty
。\ifx\lastnodechar\undefined\else\the\lastnodechar\fi
% uppercase also allowed
\font IN JIS\z=jisg \z
あ」\inhibitglue い「\the\lastpenalty
。\ifx\lastnodechar\undefined\else\the\lastnodechar\fi
\font IN UCS\w=upjisg-h \w
あ」\inhibitglue い「\the\lastpenalty
。\ifx\lastnodechar\undefined\else\the\lastnodechar\fi
% Latin fonts -> ignored
\font\xA=ec-lmr10 \xA a?
\font in jis \xB=ec-lmss10 \xB a?
% no expansion in scanning
\def\inX{in X}
\font \inX=unknownX \inX a?
% unknown encoding -> error
\font in Y \inY=unknownY \inY a?
% multiple loading -> first encoding
% correct:
\font in jis \xD=jisn \xD う)\inhibitglue?
\font in ucs \xE=jisn \xE う)\inhibitglue?
% wrong:
\font in ucs \xF=jisgn \xF う)\inhibitglue?
\font in jis \xH=jisgn \xH う)\inhibitglue?
\end
@aminophen
Copy link
Author

texjporg/tex-jp-build#149 の実装を始めようとしているところ。

  • upTeX でも JIS-encoded TFM を読みたい。
  • どうせなら pTeX でも UCS-encoded TFM を読めてもよい?

まだ TeX 側のインタフェースを作っている段階。内部での文字コード変換処理はまだ。

@aminophen
Copy link
Author

内部での文字コード変換処理も完了。

% default
\font\xA=jis           \xA% DVI: JIS 0x2422
% explicit JIS
\font in jis \xB=jisn  \xB% DVI: JIS 0x2422
% wrong!
\font in ucs \xC=jisg  \xC% DVI: U+3042 but JIS-encoded font 0x3042
\bye

20221015-jfmenc-aaan

なんか奇跡が起きている。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment