Last active
February 18, 2019 12:18
-
-
Save zr-tex8r/9f94493ecab4ae4e1ce6 to your computer and use it in GitHub Desktop.
To make CMap files from OpenType font files using texlua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- zrmakecmap.lua | |
prog_name = "zrmakecmap" | |
version = "0.3" | |
mod_date = "2015/10/09" | |
verbose = false | |
sort = false | |
inencoding = "utf16" | |
fwid = true | |
---------------------------------------- preparations | |
filename, fontname, outname, jis_map = nil | |
tinsert, tconcat = table.insert, table.concat | |
tunpack = unpack or table.unpack | |
texlua = pcall(function() | |
fontloader = require "fontloader" | |
kpse = require "kpse" | |
kpse.set_program_name("dvipdfmx") | |
end) | |
---------------------------------------- create CMap text | |
do | |
local blocksize = 100 | |
local prologue = [[ | |
%%!PS-Adobe-3.0 Resource-CMap | |
%%%%DocumentNeededResources: ProcSet (CIDInit) | |
%%%%IncludeResource: ProcSet (CIDInit) | |
%%%%BeginResource: CMap (%s) | |
%%%%Title: (%s %s %s %s) | |
%%%%Version: %s | |
%%%%EndComments | |
%% NOTICE: | |
%% It is intended that this file is used only with dvipdfmx. | |
/CIDInit /ProcSet findresource begin | |
12 dict begin | |
begincmap | |
/CIDSystemInfo 3 dict dup begin | |
/Registry (%s) def | |
/Ordering (%s) def | |
/Supplement %s def | |
end def | |
/CMapName /%s def | |
/CMapVersion %s def | |
/CMapType 1 def | |
/WMode 0 def]] | |
local epilogue = [[ | |
endcmap | |
CMapName currentdict /CMap defineresource pop | |
end | |
end | |
%%%%EndResource | |
%%%%EOF | |
]] | |
local function hex_utf16(uc) | |
if uc >= 0x10000 then | |
local h, l = math.floor(uc / 0x400), uc % 0x400 | |
uc = h * 0x10000 + l + 0xD7C0DC00 | |
return ("%08x"):format(uc) | |
else | |
return ("%04x"):format(uc) | |
end | |
end | |
local function hex_2(uc) | |
return ("%04x"):format(uc) | |
end | |
local function hex_4(uc) | |
return ("%08x"):format(uc) | |
end | |
local function rearrange(map, minuc, maxuc) | |
local ranges, suc, scc, pcc = {} | |
for uc = minuc, maxuc do | |
local cc = map[uc] | |
if not (pcc and pcc + 1 == cc and uc % 256 > 0) then | |
if pcc then | |
tinsert(ranges, { suc, uc - 1, scc }) | |
end | |
if cc then suc, scc = uc, cc end | |
end | |
pcc = cc | |
end | |
if pcc then | |
tinsert(ranges, { suc, maxuc, scc }) | |
end | |
return ranges | |
end | |
local function rearrange_unicode(map, maxuc) | |
maxuc = maxuc or 0x10FFFF | |
local notdef = map[0] | |
for uc = 1, 31 do | |
if map[uc] ~= notdef then notdef = nil end | |
end | |
local ranges = rearrange(map, notdef and 32 or 0, | |
maxuc or 0x10FFFF) | |
return ranges, notdef | |
end | |
local function rearrange_jis(map) | |
local jismap, cc = {} | |
for jc, ucary in pairs(jis_map) do | |
cc = nil | |
for i = 1, #ucary do | |
cc = map[ucary[i]] | |
if cc then break end | |
end | |
jismap[jc] = cc | |
end | |
local ranges = rearrange(jismap, 0x2100, 0x7FFF) | |
return ranges, nil | |
end | |
local function add_partition (flines, name, lines) | |
if sort then | |
table.sort(lines) | |
end | |
local sl, el = 0, 0 | |
while sl < #lines do | |
el = math.min(sl + blocksize, #lines) | |
tinsert(flines, '') | |
tinsert(flines, ("%s begin%s"):format(el - sl, name)) | |
for l = sl + 1, el do | |
tinsert(flines, lines[l]) | |
end | |
tinsert(flines, ("end%s"):format(name)) | |
sl = el | |
end | |
end | |
function cmap_text(name, version, ros, map, maxuc, inenc) | |
local out = {} | |
local rr, ro, rs = tunpack(ros) | |
tinsert(out, prologue:format( | |
name, name, rr, ro, rs, version, rr, ro, rs, | |
name, version)) | |
-- | |
local hex, csrls, ndrls, ccls, crls = nil, nil, {}, {}, {} | |
if inenc == "utf32" then | |
hex, rearr = hex_4, rearrange_unicode | |
csrls = { " <00000000> <0010FFFF>" } | |
elseif inenc == "utf16" then | |
hex, rearr = hex_utf16, rearrange_unicode | |
csrls = { " <0000> <D7FF>", | |
" <D800DC00> <DBFFDFFF>", | |
" <E000> <FFFF>" } | |
elseif inenc == "jis" then | |
hex, rearr = hex_2, rearrange_jis | |
csrls = { " <2121> <7E7E>" } | |
else sure(nil, 9) | |
end | |
local ranges, notdef = rearr(map, maxuc) | |
if notdef then | |
ndrls[1] = ("<%s> <%s> %s"):format(hex(0), hex(31), notdef) | |
end | |
for i = 1, #ranges do | |
local suc, euc, scc = tunpack(ranges[i]) | |
if suc == euc then | |
tinsert(ccls, ("<%s> %s"):format(hex(suc), scc)) | |
else | |
tinsert(crls, | |
("<%s> <%s> %s"):format(hex(suc), hex(euc), scc)) | |
end | |
end | |
add_partition(out, "codespacerange", csrls) | |
add_partition(out, "notdefrange", ndrls) | |
add_partition(out, "cidchar", ccls) | |
add_partition(out, "cidrange", crls) | |
tinsert(out, '') | |
-- | |
tinsert(out, epilogue:format()) | |
return tconcat(out, "\n") | |
end | |
end | |
---------------------------------------- process one font | |
do | |
local function ver_number(version) | |
local ver = (version or ""):gsub("(%d)_(%d)", "%1%2") | |
local ps, pe = ver:find("^%d+%.%d+") | |
if not ps then | |
ps, pe = ver:find("^%d+") | |
end | |
ver = (ps) and ver:sub(ps, pe) or "0" | |
if ver ~= version then | |
info("version is not a number", version) | |
end | |
info("resolved version", ver) | |
return ver | |
end | |
local function ros_info(cidinfo) | |
cidinfo = cidinfo or {} | |
if not cidinfo.registry then | |
info("cidinfo is missing") | |
end | |
local res = { | |
cidinfo.registry or "Adobe", | |
cidinfo.ordering or "Identity", | |
cidinfo.supplement or 0 | |
} | |
info("resolved ROS", tconcat(res, "-")) | |
return res | |
end | |
local function map_info(mapobj) | |
mapobj = mapobj or {} | |
sure(mapobj.map, "cannot find map data") | |
info("obtained map data") | |
info("*max codepoint", mapobj.encmax or "(unknown)") | |
return mapobj.map, mapobj.encmax or 0x10FFFF | |
end | |
function extract_map(file, name) | |
info("open font file", file) | |
local font, diag | |
if name then | |
font, diag = fontloader.open(file, name) | |
else | |
font, diag = fontloader.open(file) | |
end | |
if diag then | |
local max = math.min(#diag, font and 5 or 50) | |
info(("-------- diagnoses from fontloader (%s of %s)") | |
:format(max, #diag)) | |
for i = 1, max do info(diag[i]) end | |
info("-------- end") | |
end | |
sure(font, "failure in extracing map data", fontfile) | |
local version = ver_number(font.version) | |
local ros = ros_info(font.cidinfo) | |
local map, maxuc = map_info(font.map) | |
return version, ros, map, maxuc | |
end | |
end | |
---------------------------------------- resolve parameters | |
do | |
local function out_name(core) | |
if inencoding == "utf32" then | |
return "Uni"..core.."-UTF32-H" | |
elseif inencoding == "utf16" then | |
return "Uni"..core.."-UTF16-H" | |
elseif inencoding == "jis" then | |
return "Jis"..core.."-H" | |
else sure(nil, 8) | |
end | |
end | |
local function trim(name) | |
return (name or ""):gsub("[%W]", "") | |
end | |
local function count(sum, name) | |
sum[name] = (sum[name] or 0) + 1 | |
end | |
function resolve_param() | |
local finfo, msg = fontloader.info(filename) | |
local ttc, sel = false, fontname | |
if not finfo then | |
info("-------- diagnosis from fontloader") | |
info(msg) | |
abort("not a valid font file", filename) | |
elseif finfo[1] then | |
ttc = true; info("TTC file", filename) | |
elseif finfo.fontname then | |
info("not TTC file", filename) | |
sel = sel or 0; finfo = { finfo } | |
else sure(nil, 5) | |
end | |
if type(sel) == "string" then | |
local t = {} | |
for i = 1, #finfo do | |
if finfo[i].fontname == sel then tinsert(t, i - 1) end | |
end | |
for i = 1, #finfo do | |
if finfo[i].fullname == sel then tinsert(t, i - 1) end | |
end | |
sure(#t <= 1, "font name is ambiguous", sel) | |
sure(#t >= 1, "no such font in font file", sel) | |
sel = t[1] | |
end | |
info("font count", #finfo) | |
info("font index", sel or "(all)") | |
sure(not sel or (0 <= sel and sel < #finfo), | |
"bad font index number", sel) | |
sure(not outname or sel, | |
"cannot specify output file name in processing all fonts") | |
if sel then | |
local fi = finfo[sel + 1] | |
return { { | |
name = ttc and fi.fullname, | |
out = outname or out_name(trim(fi.familyname)) | |
} } | |
else | |
local res, sum = {}, {} | |
for i = 1, #finfo do | |
local fi = finfo[i] | |
res[i] = { name = fi.fullname, out = trim(fi.familyname) } | |
info("font name", i, res[i].name) | |
info("out-core", i, res[i].out) | |
count(sum, res[i].out) | |
end | |
for i = 1, #finfo do | |
if sum[res[i].out] > 1 then | |
res[i].out = res[i].out.."_"..trim(finfo[i].weight) | |
info("new out-core", i, res[i].out) | |
count(sum, res[i].out) | |
end | |
end | |
for i = 1, #finfo do | |
if sum[res[i].out] > 1 then | |
res[i].out = res[i].out.."_"..tostring(i) | |
info("new out-core", i, res[i].out) | |
end | |
res[i].out = out_name(res[i].out) | |
info("output", i, res[i].out) | |
end | |
return res | |
end | |
end | |
end | |
---------------------------------------- create JIS-UCS map | |
do | |
local cnjis, cnucs = "H", "UniJIS-UCS2" | |
local ccname = "Adobe-Japan1" | |
local function cvalue(expr) | |
local cv = (#expr == 4) and tonumber(expr, 16) | |
if cv then return { cv } end | |
local set, cvs = {}, expr:explode(",") | |
for i = 1, #cvs do | |
cv = tonumber(cvs[i], 16) | |
if cv then tinsert(set, cv) end | |
end | |
return set | |
end | |
local function one_file(path) | |
local file = assert(io.open(path, "rb")) | |
local ok, line, cjis, cucs | |
while true do | |
line = file:read("*l") | |
if not (line and (line == "" or line:sub(1, 1) == "#")) then | |
break | |
elseif line:find(cnucs, 1, true) then | |
ok = true | |
end | |
end | |
if not ok or not line then return end | |
local fs = line:explode("\t") | |
if not fs[1] == "CID" then return end | |
for k, cn in ipairs(fs) do | |
if cn == cnjis then cjis = k end | |
if cn == cnucs then cucs = k end | |
end | |
if not (cjis and cucs) then return end | |
local map, alrt = {}, false | |
while true do | |
line = file:read("*l") | |
if not line then break end | |
local fs = line:explode("\t") | |
local jis, ucs = fs[cjis], fs[cucs] | |
if jis ~= "*" then | |
if ucs == "*" then | |
alrt = true | |
else | |
local cvjis, cvucs = cvalue(jis), cvalue(ucs) | |
if #cvjis > 0 and #cvucs > 0 then | |
if #cvjis > 1 then alrt = true end | |
map[cvjis[1]] = cvucs | |
end | |
end | |
end | |
end | |
return map, alrt | |
end | |
function make_jis_map() | |
info("make JIS-UCS map from cid2code data...") | |
local c2cs = { kpse.lookup("cid2code.txt", { | |
format = "cmap files", all = true, mustexist = true | |
}) } | |
local res | |
for _, path in pairs(c2cs) do | |
info("try", path) | |
res, alrt = one_file(path) | |
if res then | |
info("success") | |
if alrt then info("(but strange cid2code data)") end | |
break | |
else info("failure (not AJ1 data)") | |
end | |
end | |
sure(res, "suitable cid2code.txt is not found") | |
jis_map = res | |
end | |
end | |
---------------------------------------- logging | |
do | |
local stt_meta = { | |
__tostring = function(self) | |
return "{"..concat(self, ",").."}" | |
end | |
} | |
function stt(tbl) | |
return setmetatable(tbl, stt_meta) | |
end | |
function concat(tbl, ...) | |
local t = {} | |
for i = 1, #tbl do t[i] = tostring(tbl[i]) end | |
return table.concat(t, ...):gsub("\n$", "") | |
end | |
function info(...) | |
if not verbose then return end | |
local t = { prog_name, ... } | |
io.stderr:write(concat(t, ": ").."\n") | |
end | |
function abort(...) | |
verbose = true; info(...) | |
os.exit(-1) | |
end | |
function sure(val, a1, ...) | |
if val then return val end | |
if type(a1) == "number" then | |
a1 = "error("..a1..")" | |
end | |
abort(a1, ...) | |
end | |
end | |
---------------------------------------- main | |
do | |
local function show_usage() | |
io.stdout:write(([[ | |
This is %s v%s <%s> by 'ZR' | |
Usage: %s[.lua] [<option>...] <font_file> | |
-h/--help show help | |
-v/--verbose be verbose | |
-o/--out <name> specify CMap (file) name to output | |
-i/--index <num> choose from TTC by font index (0-origin) | |
-n/--name <name> choose from TTC by font name | |
-j/--jis use JIS as CMap input codespace | |
--utf32 use UTF32 as CMap input codespace | |
--utf16 use UTF16 as CMap input codespace (default) | |
<font_file> name of the input font file, which can be | |
of TTF, OTF or TTC format. | |
]]):format(prog_name, version, mod_date, prog_name)) | |
os.exit(0) | |
end | |
function find_file(name) | |
return kpse.find_file(name, "opentype fonts", true) or | |
kpse.find_file(name, "truetype fonts", true) or name | |
end | |
function read_option() | |
if #arg == 0 then show_usage() end | |
local idx = 1 | |
while idx <= #arg do | |
local opt = arg[idx] | |
if opt:sub(1, 1) ~= "-" then break end | |
if opt == "-h" or opt == "--help" then | |
show_usage() | |
elseif opt == "-v" or opt == "--verbose" then | |
verbose = true | |
elseif opt == "-o" or opt == "--out" then | |
idx = idx + 1 | |
outname = sure(arg[idx], "output name is missing") | |
elseif opt == "-i" or opt == "--index" then | |
idx = idx + 1 | |
fontname = tonumber(arg[idx]) | |
sure(fontname, "index is not a number") | |
elseif opt == "-n" or opt == "--name" then | |
idx = idx + 1 | |
fontname = sure(arg[idx], "font name is missing") | |
elseif opt == "-f" or opt == "--fwid" then | |
fwid = true | |
elseif opt == "-F" or opt == "--no-fwid" then | |
fwid = false | |
elseif opt == "-j" or opt == "--jis" then | |
inencoding = "jis" | |
elseif opt == "--utf32" then | |
inencoding = "utf32" | |
elseif opt == "--utf16" then | |
inencoding = "utf16" | |
elseif opt == "--sort" then | |
sort = true | |
else abort("invalid option", opt) | |
end | |
idx = idx + 1 | |
end | |
sure(#arg == idx, "wrong number of arguments") | |
filename = find_file(arg[idx]) | |
end | |
local function write_whole(file, data) | |
local ofile = io.open(file, "wb") | |
sure(ofile, "cannot open for output", file) | |
sure(ofile:write(data), "output failed", file) | |
ofile:close() | |
end | |
function main() | |
sure(texlua, "this script requrires TeXlua") | |
read_option() | |
if inencoding == "jis" then | |
make_jis_map() | |
end | |
local param = resolve_param() | |
for i = 1, #param do | |
local name, out = param[i].name, param[i].out | |
info("******** PROCESS", filename, name or nil) | |
local version, ros, map, maxuc = extract_map(filename, name) | |
info("create cmap file", out) | |
local text = cmap_text(out, version, ros, map, maxuc, | |
inencoding) | |
write_whole(out, text) | |
info("DONE", filename, name or nil) | |
end | |
info("all done") | |
end | |
end | |
---------------------------------------- done | |
main() | |
-- EOF |
Can I use this for True type fonts ? How ?
私の環境では cid2code.txt の最後にコメント行があって、そのままだとエラーになってしまうためパッチを作ってみました。
https://gist.github.com/trueroad/830f59df102e27392ea4d0ad54208121/revisions#diff-b817b49d8db36cca5a0788e014edd713
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script requires the LuaTeX engine, which is run as an extended Lua interpreter.