zr-tex8r · February 16, 2023 14:09
diff --git a/test.tex b/test.tex
 \ExplSyntaxOn
 %---------------------------------------
 %% \token_if_jachar:N <token>
 % Tests if <token> is a jachar token or not.
 \bool_lazy_or:nnTF
  { \sys_if_engine_ptex_p: }
  { \sys_if_engine_uptex_p: }
  {
    \use:x
      {
        \prg_new_conditional:Npnn \exp_not:N \token_if_jachar:N ##1
          { p , T ,  F , TF }
          {
            % A token is a jachar token if and only if
            % its meaning string starts with "kanji ".
            \exp_not:N \str_if_eq:eeTF
              {
                \exp_not:N \exp_after:wN
                \exp_not:N  \__token_delimit_by_kanji:w
                \exp_not:N \token_to_meaning:N ##1
                ? \tl_to_str:n { kanji } ~ \s__token_stop
              }
              { }
              { \exp_not:N \prg_return_true: }
              { \exp_not:N \prg_return_false: }
          }
        \cs_new:Npn \exp_not:N \__token_delimit_by_kanji:w ##1
          \tl_to_str:n { kanji } ~ ##2 \s__token_stop
          {##1}
      }
  }
  {
    \prg_new_conditional:Npnn \token_if_jachar:N #1
      { p , T ,  F , TF }
      { \prg_return_false: }
  }

 %---------------------------------------
 %% Fix for the case-change funcrtions in l3text.
 \bool_lazy_or:nnT
  { \sys_if_engine_ptex_p: }
  { \sys_if_engine_uptex_p: }
  {
    \cs_gset_eq:NN \__text_change_case_char_jachar:nnnN
      \__text_change_case_char_auxii:nnnN
  }
 \bool_lazy_or:nnF
  { \sys_if_engine_luatex_p: }
  { \sys_if_engine_xetex_p: }
  {
    % The current implementation for pdftex should be employed
    % for all non-Unicode engines.
    \cs_gset:Npn \__text_change_case_char_auxii:nnnN #1#2#3#4
      {
        \int_compare:nNnTF { `#4 } < { "F0 }
          { \__text_change_case_char_UTFviii:nnnNNN }
          { \__text_change_case_char_UTFviii:nnnNNNN }
            {#1} {#2} {#3} #4
      }
  }
 \bool_lazy_or:nnT
  { \sys_if_engine_ptex_p: }
  { \sys_if_engine_uptex_p: }
  {
    \cs_gset:Npn \__text_change_case_char:nnnN #1#2#3#4
      {
        \int_compare:nNnTF { `#4 } > { "80 } % why not "7F?
          {
            % Check if the given character token is a jachar.
            \token_if_jachar:NTF #4
              { \__text_change_case_char_jachar:nnnN }
              {
                \int_compare:nNnTF { `#4 } < { "E0 }
                  { \__text_change_case_char_UTFviii:nnnNN }
                  { \__text_change_case_char_auxii:nnnN }
              }
          }
          { \__text_change_case_char_auxi:nnnN }
            {#1} {#2} {#3} #4
       }
  }

 %---------------------------------------
 %% Fix for the case-change funcrtions in l3str.
 \bool_lazy_or:nnF
  { \sys_if_engine_luatex_p: }
  { \sys_if_engine_xetex_p: }
  {
    % The current implementation for pdftex should be employed
    % for all non-Unicode engines.
    \cs_gset:Npn \__str_change_case_char_UTFviii:nNNN #1#2#3#4
      { \__str_change_case_char_UTFviii:nn {#1} {#2#3#4} }
    \cs_gset:Npn \__str_change_case_char_UTFviii:nNNNN #1#2#3#4#5
      { \__str_change_case_char_UTFviii:nn {#1} {#2#3#4#5} }
  }
 \bool_lazy_or:nnT
  { \sys_if_engine_ptex_p: }
  { \sys_if_engine_uptex_p: }
  {
    \cs_gset:Npn \__str_change_case_char:nN #1#2
      {
        \__str_if_recursion_tail_stop_do:Nn #2
          { \__str_change_case_end:wn }
        \int_compare:nNnTF { `#2 } > { "80 }
          {
            % Check if the given character token is a jachar.
            \token_if_jachar:NTF #2
              { \__str_change_case_char_jachar:nN }
              {
                \int_compare:nNnTF { `#2 } < { "E0 }
                  { \__str_change_case_char_UTFviii:nNN }
                  {
                    \int_compare:nNnTF { `#2 } < { "F0 }
                      { \__str_change_case_char_UTFviii:nNNN }
                      { \__str_change_case_char_UTFviii:nNNNN }
                  }
              }
            {#1} #2
          }
          {
            \__str_change_case_output:fw
              { \use:c { char_str_ #1 case:N } #2 }
            \__str_change_case_loop:nw {#1}
          }
      }
    \cs_new:Npn \__str_change_case_char_jachar:nN #1#2
      {
        \__str_change_case_output:nw {#2}
        \__str_change_case_loop:nw {#1}
      }
  }
 %=======================================
 % MATERIALS FOR TESTING
 %---------------------------------------
 \str_new:N \l_my_tmpa_str
 \str_new:N \l_my_raw_str
 \int_new:N \l_my_uc_int
 \int_new:N \l_my_run_int
 % \my_prefer_jachar:N <str var>
 % Converts all non-ASCII characters in <str var> to jachars, so that
 % the display of the string will be readable (on (u)pTeX).
 \cs_new:Nn \my_prefer_jachar:N
  {
    \str_clear:N \l_my_tmpa_str
    \str_map_inline:Nn #1
      {
        \bool_case_true:nF
          {
            { \token_if_jachar_p:N ##1 }
              { \str_put_right:Nn \l_my_tmpa_str ##1 }
            { \int_compare_p:nNn { `##1 } < { "80 } }
              { \str_put_right:Nn \l_my_tmpa_str ##1 }
            { \int_compare_p:nNn { `##1 } > { "EF } }
              { \__my_prefer_jachar:Nnn ##1 { "F0 } { 3 } }
            { \int_compare_p:nNn { `##1 } > { "DF } }
              { \__my_prefer_jachar:Nnn ##1 { "E0 } { 2 } }
            { \int_compare_p:nNn { `##1 } > { "BF } }
              { \__my_prefer_jachar:Nnn ##1 { "C0 } { 1 } }
          }
          { \__my_prefer_jachar:N ##1 }
      }
    \str_set_eq:NN #1 \l_my_tmpa_str
  }
 \cs_new:Nn \__my_prefer_jachar:N
  {
    \str_put_right:Nn \l_my_raw_str #1
    \int_decr:N \l_my_run_int
    \int_set:Nn \l_my_uc_int { \l_my_uc_int * 64 + `#1 - "80 }
    \int_compare:nNnT { \l_my_run_int } = { 0 }
      {
        \int_compare:nNnTF { \ucs \l_my_uc_int } < { 0 }
          { \str_put_right:NV \l_my_tmpa_str \l_my_raw_str }
          { \str_put_right:Nx \l_my_tmpa_str { \__my_jachar:N \l_my_uc_int } }
      }
  }
 \cs_new:Nn \__my_prefer_jachar:Nnn
  {
    \str_clear:N \l_my_raw_str
    \int_set:Nn \l_my_uc_int { `#1 - #2 }
    \int_set:Nn \l_my_run_int { #3 }
  }
 \sys_if_engine_uptex:T
  {
    \cs_new:Nn \__my_jachar:N
      {  \Ucharcat #1 18 \exp_stop_f: }
  }
 \sys_if_engine_ptex:T
  {
    \cs_new:Nn \__my_jachar:N
      {  \Uchar \ucs #1 \exp_stop_f: }
  }
 %---------------------------------------
 \msg_new:nnn { my } { test } { <#1> }
 \str_new:N \l_my_test_str
 \NewDocumentCommand \cTestStrRaw { m }
  { \my_test_change_case:Nn \use:n {#1} }
 \NewDocumentCommand \cTestStrUppercase { m }
  { \my_test_change_case:Nn \str_uppercase:n {#1} }
 \NewDocumentCommand \cTestStrLowercase { m }
  { \my_test_change_case:Nn \str_lowercase:n {#1} }
 \cs_new:Nn \my_test_change_case:Nn
  {
    \str_set:Nx \l_my_test_str { \exp_args:No #1 {#2} }
    \my_prefer_jachar:N \l_my_test_str
    \iow_term:x { \l_my_test_str }
  }
 \ExplSyntaxOff
 \documentclass{article}
 \usepackage{iftex}
 \usepackage[b4paper,scale=0.9]{geometry}
 \usepackage[LGR,T2A,T1]{fontenc}
 \ifuptex
  % symbols are jachars, alphabetic letters are non-jachars
  \usepackage[ccv3,japanesevar]{pxcjkcat}
  % all but obvious CJK letters are non-jachars
  %\usepackage[ccv3,prefernoncjk]{pxcjkcat}
  % all but ASCII characters are jachars
  %\usepackage[ccv3,forcecjk]{pxcjkcat}
 \fi
 \NewDocumentCommand \cGrek {m} {{\fontencoding{LGR}\selectfont#1}}
 \NewDocumentCommand \cCyrl {m} {{\fontencoding{T2A}\selectfont#1}}
 \makeatletter
 % deliberately invalidates the fallback
 \let\@uclclist\@empty
 \makeatother
 \begin{document}
 % Tests for \MakeUppercase/\MakeLowercase.
 \newcommand*\SampleT{%
  ABCxyz+?%
  £§±¶½ÅÆÇÊÏÐÑÒ×ØÙÝÞß%
  ãæçéìðô÷øûþÿ%
  ĂĘŁŊŐąěğŋşůż%
  \cGrek{ΑΒΓΩαβγω}%
  \cCyrl{АБВЉЇЯабвґџя}%
  ‰}
 \SampleT\par
 \MakeUppercase{\SampleT}\par
 \MakeLowercase{\SampleT}\par
 % Tests for the l3str functions.
 \newcommand*\SampleS{%
  ABCxyz+?%
  £§±¶½ÅÆÇÊÏÐÑÒ×ØÙÝÞß%
  ãæçéìðô÷øûþÿ%
  ĂĘŁŊŐąěğŋşůż%
  ΑΒΓΩαβγω%
  АБВЉЇЯабвґџя%
  ‰}
 \cTestStrRaw{\SampleS}
 \cTestStrUppercase{\SampleS}
 \cTestStrLowercase{\SampleS}
 % done
 \end{document}
	\ExplSyntaxOn
	%---------------------------------------
	%% \token_if_jachar:N <token>
	% Tests if <token> is a jachar token or not.
	\bool_lazy_or:nnTF
	{ \sys_if_engine_ptex_p: }
	{ \sys_if_engine_uptex_p: }
	{
	\use:x
	{
	\prg_new_conditional:Npnn \exp_not:N \token_if_jachar:N ##1
	{ p , T , F , TF }
	{
	% A token is a jachar token if and only if
	% its meaning string starts with "kanji ".
	\exp_not:N \str_if_eq:eeTF
	{
	\exp_not:N \exp_after:wN
	\exp_not:N \__token_delimit_by_kanji:w
	\exp_not:N \token_to_meaning:N ##1
	? \tl_to_str:n { kanji } ~ \s__token_stop
	}
	{ }
	{ \exp_not:N \prg_return_true: }
	{ \exp_not:N \prg_return_false: }
	}
	\cs_new:Npn \exp_not:N \__token_delimit_by_kanji:w ##1
	\tl_to_str:n { kanji } ~ ##2 \s__token_stop
	{##1}
	}
	}
	{
	\prg_new_conditional:Npnn \token_if_jachar:N #1
	{ p , T , F , TF }
	{ \prg_return_false: }
	}

	%---------------------------------------
	%% Fix for the case-change funcrtions in l3text.
	\bool_lazy_or:nnT
	{ \sys_if_engine_ptex_p: }
	{ \sys_if_engine_uptex_p: }
	{
	\cs_gset_eq:NN \__text_change_case_char_jachar:nnnN
	\__text_change_case_char_auxii:nnnN
	}
	\bool_lazy_or:nnF
	{ \sys_if_engine_luatex_p: }
	{ \sys_if_engine_xetex_p: }
	{
	% The current implementation for pdftex should be employed
	% for all non-Unicode engines.
	\cs_gset:Npn \__text_change_case_char_auxii:nnnN #1#2#3#4
	{
	\int_compare:nNnTF { `#4 } < { "F0 }
	{ \__text_change_case_char_UTFviii:nnnNNN }
	{ \__text_change_case_char_UTFviii:nnnNNNN }
	{#1} {#2} {#3} #4
	}
	}
	\bool_lazy_or:nnT
	{ \sys_if_engine_ptex_p: }
	{ \sys_if_engine_uptex_p: }
	{
	\cs_gset:Npn \__text_change_case_char:nnnN #1#2#3#4
	{
	\int_compare:nNnTF { `#4 } > { "80 } % why not "7F?
	{
	% Check if the given character token is a jachar.
	\token_if_jachar:NTF #4
	{ \__text_change_case_char_jachar:nnnN }
	{
	\int_compare:nNnTF { `#4 } < { "E0 }
	{ \__text_change_case_char_UTFviii:nnnNN }
	{ \__text_change_case_char_auxii:nnnN }
	}
	}
	{ \__text_change_case_char_auxi:nnnN }
	{#1} {#2} {#3} #4
	}
	}

	%---------------------------------------
	%% Fix for the case-change funcrtions in l3str.
	\bool_lazy_or:nnF
	{ \sys_if_engine_luatex_p: }
	{ \sys_if_engine_xetex_p: }
	{
	% The current implementation for pdftex should be employed
	% for all non-Unicode engines.
	\cs_gset:Npn \__str_change_case_char_UTFviii:nNNN #1#2#3#4
	{ \__str_change_case_char_UTFviii:nn {#1} {#2#3#4} }
	\cs_gset:Npn \__str_change_case_char_UTFviii:nNNNN #1#2#3#4#5
	{ \__str_change_case_char_UTFviii:nn {#1} {#2#3#4#5} }
	}
	\bool_lazy_or:nnT
	{ \sys_if_engine_ptex_p: }
	{ \sys_if_engine_uptex_p: }
	{
	\cs_gset:Npn \__str_change_case_char:nN #1#2
	{
	\__str_if_recursion_tail_stop_do:Nn #2
	{ \__str_change_case_end:wn }
	\int_compare:nNnTF { `#2 } > { "80 }
	{
	% Check if the given character token is a jachar.
	\token_if_jachar:NTF #2
	{ \__str_change_case_char_jachar:nN }
	{
	\int_compare:nNnTF { `#2 } < { "E0 }
	{ \__str_change_case_char_UTFviii:nNN }
	{
	\int_compare:nNnTF { `#2 } < { "F0 }
	{ \__str_change_case_char_UTFviii:nNNN }
	{ \__str_change_case_char_UTFviii:nNNNN }
	}
	}
	{#1} #2
	}
	{
	\__str_change_case_output:fw
	{ \use:c { char_str_ #1 case:N } #2 }
	\__str_change_case_loop:nw {#1}
	}
	}
	\cs_new:Npn \__str_change_case_char_jachar:nN #1#2
	{
	\__str_change_case_output:nw {#2}
	\__str_change_case_loop:nw {#1}
	}
	}
	%=======================================
	% MATERIALS FOR TESTING
	%---------------------------------------
	\str_new:N \l_my_tmpa_str
	\str_new:N \l_my_raw_str
	\int_new:N \l_my_uc_int
	\int_new:N \l_my_run_int
	% \my_prefer_jachar:N <str var>
	% Converts all non-ASCII characters in <str var> to jachars, so that
	% the display of the string will be readable (on (u)pTeX).
	\cs_new:Nn \my_prefer_jachar:N
	{
	\str_clear:N \l_my_tmpa_str
	\str_map_inline:Nn #1
	{
	\bool_case_true:nF
	{
	{ \token_if_jachar_p:N ##1 }
	{ \str_put_right:Nn \l_my_tmpa_str ##1 }
	{ \int_compare_p:nNn { `##1 } < { "80 } }
	{ \str_put_right:Nn \l_my_tmpa_str ##1 }
	{ \int_compare_p:nNn { `##1 } > { "EF } }
	{ \__my_prefer_jachar:Nnn ##1 { "F0 } { 3 } }
	{ \int_compare_p:nNn { `##1 } > { "DF } }
	{ \__my_prefer_jachar:Nnn ##1 { "E0 } { 2 } }
	{ \int_compare_p:nNn { `##1 } > { "BF } }
	{ \__my_prefer_jachar:Nnn ##1 { "C0 } { 1 } }
	}
	{ \__my_prefer_jachar:N ##1 }
	}
	\str_set_eq:NN #1 \l_my_tmpa_str
	}
	\cs_new:Nn \__my_prefer_jachar:N
	{
	\str_put_right:Nn \l_my_raw_str #1
	\int_decr:N \l_my_run_int
	\int_set:Nn \l_my_uc_int { \l_my_uc_int * 64 + `#1 - "80 }
	\int_compare:nNnT { \l_my_run_int } = { 0 }
	{
	\int_compare:nNnTF { \ucs \l_my_uc_int } < { 0 }
	{ \str_put_right:NV \l_my_tmpa_str \l_my_raw_str }
	{ \str_put_right:Nx \l_my_tmpa_str { \__my_jachar:N \l_my_uc_int } }
	}
	}
	\cs_new:Nn \__my_prefer_jachar:Nnn
	{
	\str_clear:N \l_my_raw_str
	\int_set:Nn \l_my_uc_int { `#1 - #2 }
	\int_set:Nn \l_my_run_int { #3 }
	}
	\sys_if_engine_uptex:T
	{
	\cs_new:Nn \__my_jachar:N
	{ \Ucharcat #1 18 \exp_stop_f: }
	}
	\sys_if_engine_ptex:T
	{
	\cs_new:Nn \__my_jachar:N
	{ \Uchar \ucs #1 \exp_stop_f: }
	}
	%---------------------------------------
	\msg_new:nnn { my } { test } { <#1> }
	\str_new:N \l_my_test_str
	\NewDocumentCommand \cTestStrRaw { m }
	{ \my_test_change_case:Nn \use:n {#1} }
	\NewDocumentCommand \cTestStrUppercase { m }
	{ \my_test_change_case:Nn \str_uppercase:n {#1} }
	\NewDocumentCommand \cTestStrLowercase { m }
	{ \my_test_change_case:Nn \str_lowercase:n {#1} }
	\cs_new:Nn \my_test_change_case:Nn
	{
	\str_set:Nx \l_my_test_str { \exp_args:No #1 {#2} }
	\my_prefer_jachar:N \l_my_test_str
	\iow_term:x { \l_my_test_str }
	}
	\ExplSyntaxOff
	\documentclass{article}
	\usepackage{iftex}
	\usepackage[b4paper,scale=0.9]{geometry}
	\usepackage[LGR,T2A,T1]{fontenc}
	\ifuptex
	% symbols are jachars, alphabetic letters are non-jachars
	\usepackage[ccv3,japanesevar]{pxcjkcat}
	% all but obvious CJK letters are non-jachars
	%\usepackage[ccv3,prefernoncjk]{pxcjkcat}
	% all but ASCII characters are jachars
	%\usepackage[ccv3,forcecjk]{pxcjkcat}
	\fi
	\NewDocumentCommand \cGrek {m} {{\fontencoding{LGR}\selectfont#1}}
	\NewDocumentCommand \cCyrl {m} {{\fontencoding{T2A}\selectfont#1}}
	\makeatletter
	% deliberately invalidates the fallback
	\let\@uclclist\@empty
	\makeatother
	\begin{document}
	% Tests for \MakeUppercase/\MakeLowercase.
	\newcommand*\SampleT{%
	ABCxyz+?%
	£§±¶½ÅÆÇÊÏÐÑÒ×ØÙÝÞß%
	ãæçéìðô÷øûþÿ%
	ĂĘŁŊŐąěğŋşůż%
	\cGrek{ΑΒΓΩαβγω}%
	\cCyrl{АБВЉЇЯабвґџя}%
	‰}
	\SampleT\par
	\MakeUppercase{\SampleT}\par
	\MakeLowercase{\SampleT}\par
	% Tests for the l3str functions.
	\newcommand*\SampleS{%
	ABCxyz+?%
	£§±¶½ÅÆÇÊÏÐÑÒ×ØÙÝÞß%
	ãæçéìðô÷øûþÿ%
	ĂĘŁŊŐąěğŋşůż%
	ΑΒΓΩαβγω%
	АБВЉЇЯабвґџя%
	‰}
	\cTestStrRaw{\SampleS}
	\cTestStrUppercase{\SampleS}
	\cTestStrLowercase{\SampleS}
	% done
	\end{document}