Last active
January 22, 2023 06:10
-
-
Save trueroad/e14ff8ca64f6765391071c0ca6f60a34 to your computer and use it in GitHub Desktop.
Experimental PDF Font Embedder without Ghostscript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Experimental PDF Font Embedder without Ghostscript | |
// https://gist.github.com/trueroad/e14ff8ca64f6765391071c0ca6f60a34 | |
// | |
// Copyright (C) 2023 Masamichi Hosoda. All rights reserved. | |
// | |
// Redistribution and use in source and binary forms, with or without | |
// modification, are permitted provided that the following conditions | |
// are met: | |
// | |
// * Redistributions of source code must retain the above copyright notice, | |
// this list of conditions and the following disclaimer. | |
// | |
// * Redistributions in binary form must reproduce the above copyright notice, | |
// this list of conditions and the following disclaimer in the documentation | |
// and/or other materials provided with the distribution. | |
// | |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
// ARE DISCLAIMED. | |
// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
// SUCH DAMAGE. | |
// | |
// | |
// Required: | |
// libqpdf (https://qpdf.sourceforge.io/) | |
// cmdlineparse.hh (https://github.com/trueroad/cmdlineparse) | |
// | |
// Build: | |
// g++ -o pdf-font-embedder pdf-font-embedder.cc -lqpdf | |
// | |
// Usage: | |
// $ ./pdf-font-embedder TABLE.txt INPUT.pdf OUTPUT.pdf | |
// | |
// ```TABLE.txt | |
// # original_fontname new_fontname font_filename | |
// /Ryumin-Light /HaranoAjiMincho-Light /dir/HaranoAjiMincho-Light.otf | |
// /GothicBBB-Medium /HaranoAjiGothic-Regular /dir/HaranoAjiGothic-Regular.otf | |
// ``` | |
// | |
#include <fstream> | |
#include <iostream> | |
#include <map> | |
#include <set> | |
#include <sstream> | |
#include <string> | |
#include <utility> | |
#include <vector> | |
#include <qpdf/QPDF.hh> | |
#include <qpdf/QPDFObjectHandle.hh> | |
#include <qpdf/QPDFWriter.hh> | |
#define PACKAGE_STRING "Experimental PDF Font Embedder without Ghostscript" | |
#define PACKAGE_COPYRIGHT "Copyright (C) 2023 Masamichi Hosoda" | |
#define PACKAGE_LICENSE "License: BSD-2-Clause" | |
#define PACKAGE_URL \ | |
"https://gist.github.com/trueroad/e14ff8ca64f6765391071c0ca6f60a34" | |
#include "cmdlineparse.hh" | |
class pdf_font_embedder | |
{ | |
public: | |
void set_linearize (bool l) | |
{ | |
linearize_ = l; | |
} | |
void set_object_streams (qpdf_object_stream_e os) | |
{ | |
object_streams_ = os; | |
} | |
void set_newline_before_endstream (bool n) | |
{ | |
newline_before_endstream_ = n; | |
} | |
void set_qdf (bool q) | |
{ | |
qdf_ = q; | |
} | |
void load_pdf (const std::string &filename); | |
void process_pdf (); | |
void save_pdf (const std::string &filename); | |
void load_table (const std::string &filename); | |
private: | |
void process_obj (QPDFObjectHandle oh); | |
void process_font (QPDFObjectHandle oh); | |
void process_font_type0 (QPDFObjectHandle oh); | |
std::string process_font_type0_descendant (QPDFObjectHandle oh); | |
bool process_font_other (QPDFObjectHandle oh); | |
bool process_fontdescriptor (QPDFObjectHandle oh); | |
QPDF qpdf_; | |
std::map<std::string, std::pair<std::string, std::string>> table_; | |
std::map<std::string, QPDFObjectHandle> font_obj_map_; | |
std::set<int> processed_obj_id_; | |
bool linearize_ = false; | |
qpdf_object_stream_e object_streams_ = qpdf_o_preserve; | |
bool newline_before_endstream_ = false; | |
bool qdf_ = false; | |
}; | |
void pdf_font_embedder::load_pdf (const std::string &filename) | |
{ | |
qpdf_.processFile (filename.c_str ()); | |
} | |
void pdf_font_embedder::process_pdf () | |
{ | |
auto objs {qpdf_.getAllObjects ()}; | |
for (auto o: objs) | |
process_obj (o); | |
} | |
void pdf_font_embedder::save_pdf (const std::string &filename) | |
{ | |
QPDFWriter w (qpdf_, filename.c_str ()); | |
w.setLinearization (linearize_); | |
w.setObjectStreamMode (object_streams_); | |
w.setNewlineBeforeEndstream (newline_before_endstream_); | |
w.setQDFMode (qdf_); | |
w.setMinimumPDFVersion ("1.6"); | |
w.write (); | |
} | |
void pdf_font_embedder::load_table (const std::string &filename) | |
{ | |
std::ifstream ifs {filename}; | |
std::string line; | |
while (std::getline (ifs, line)) | |
{ | |
if (line[0] == '#') | |
continue; | |
std::istringstream iss {line}; | |
std::string org_fontname; | |
std::string new_fontname; | |
std::string font_filename; | |
iss >> org_fontname >> new_fontname >> font_filename; | |
table_[org_fontname] = std::make_pair (new_fontname, | |
font_filename); | |
} | |
} | |
void pdf_font_embedder::process_obj (QPDFObjectHandle oh) | |
{ | |
if (!(oh.isDictionary () && oh.hasKey ("/Type"))) | |
return; | |
auto type {oh.getKey ("/Type")}; | |
if (!type.isName ()) | |
return; | |
if (std::string ("/Font") == type.getName ()) | |
process_font (oh); | |
} | |
void pdf_font_embedder::process_font (QPDFObjectHandle oh) | |
{ | |
std::cout << "Font: Object ID " << oh.getObjectID () | |
<< ", Generation " << oh.getGeneration () | |
<< std::endl; | |
if (processed_obj_id_.find (oh.getObjectID ()) != | |
processed_obj_id_.end ()) | |
{ | |
std::cout << " Already processed. Skipping..." | |
<< std::endl; | |
return; | |
} | |
auto subtype {oh.getKey ("/Subtype")}; | |
std::cout << " Subtype is " | |
<< subtype.getName () | |
<< "." | |
<< std::endl; | |
auto fontname {oh.getKey ("/BaseFont")}; | |
const std::string org_fontname {fontname.getName ()}; | |
std::cout << " BaseFont is " | |
<< org_fontname | |
<< "." | |
<< std::endl; | |
if (std::string ("/Type0") == subtype.getName ()) | |
process_font_type0 (oh); | |
else | |
process_font_other (oh); | |
} | |
void pdf_font_embedder::process_font_type0 (QPDFObjectHandle oh) | |
{ | |
auto descendantfonts {oh.getKey ("/DescendantFonts")}; | |
if (descendantfonts.getArrayNItems () != 1) | |
{ | |
std::cout << " Error: DescendantFonts is not a one-element array." | |
<< std::endl | |
<< " It is not PDF32000-1:2008 compliant." | |
<< std::endl; | |
return; | |
} | |
auto descendantfont {oh.getKey ("/DescendantFonts").getArrayItem (0)}; | |
auto new_fontname {process_font_type0_descendant (descendantfont)}; | |
if (new_fontname.empty ()) | |
{ | |
std::cout << " Skipping..." | |
<< std::endl; | |
return; | |
} | |
auto fontname {oh.getKey ("/BaseFont")}; | |
const std::string org_fontname {fontname.getName ()}; | |
const std::string encoding {oh.getKey ("/Encoding").getName ()}; | |
if (org_fontname.size () > encoding.size ()) | |
{ | |
const std::string encoding_hyphen | |
{std::string ("-") + encoding.substr (1)}; | |
if (org_fontname.substr (org_fontname.size () - encoding_hyphen.size ()) | |
== encoding_hyphen) | |
new_fontname = new_fontname + encoding_hyphen; | |
} | |
if (org_fontname != new_fontname) | |
{ | |
std::cout << " Replacing /BaseFont of Type0: " | |
<< org_fontname | |
<< " -> " | |
<< new_fontname | |
<< std::endl; | |
oh.replaceKey ("/BaseFont", QPDFObjectHandle::newName (new_fontname)); | |
} | |
processed_obj_id_.insert (oh.getObjectID ()); | |
} | |
std::string | |
pdf_font_embedder::process_font_type0_descendant (QPDFObjectHandle oh) | |
{ | |
std::cout << " --- Font (descendant): Object ID " << oh.getObjectID () | |
<< ", Generation " << oh.getGeneration () | |
<< " ---" | |
<< std::endl; | |
if (processed_obj_id_.find (oh.getObjectID ()) != | |
processed_obj_id_.end ()) | |
{ | |
std::cout << " Already processed." | |
<< std::endl | |
<< " -- Font (descendant): Skipping... ---" | |
<< std::endl; | |
return oh.getKey ("/BaseFont").getName (); | |
} | |
auto subtype {oh.getKey ("/Subtype")}; | |
std::cout << " Subtype is " | |
<< subtype.getName () | |
<< "." | |
<< std::endl; | |
auto fontname {oh.getKey ("/BaseFont")}; | |
const std::string org_fontname {fontname.getName ()}; | |
std::cout << " BaseFont is " | |
<< org_fontname | |
<< "." | |
<< std::endl; | |
if (!process_font_other (oh)) | |
{ | |
std::cout << " --- Font (descendant): Skipping... ---" | |
<< std::endl; | |
return ""; | |
} | |
std::cout << " --- Font (descendant): Complete. ---" | |
<< std::endl; | |
return oh.getKey ("/BaseFont").getName (); | |
} | |
bool pdf_font_embedder::process_font_other (QPDFObjectHandle oh) | |
{ | |
if (!oh.hasKey ("/FontDescriptor")) | |
{ | |
std::cout << " Error: It does not have /FontDescriptor." | |
<< std::endl; | |
return false; | |
} | |
auto fontname {oh.getKey ("/BaseFont")}; | |
const std::string org_fontname {fontname.getName ()}; | |
if (table_.find (org_fontname) == table_.end ()) | |
{ | |
std::cout << " It is not in the table. Skipping..." | |
<< std::endl; | |
return false; | |
} | |
auto fontdescriptor {oh.getKey ("/FontDescriptor")}; | |
if (!process_fontdescriptor (fontdescriptor)) | |
{ | |
std::cout << " Skipping..." | |
<< std::endl; | |
return false; | |
} | |
const std::string new_fontname {table_[org_fontname].first}; | |
if (org_fontname != new_fontname) | |
{ | |
std::cout << " Replacing /BaseFont: " | |
<< org_fontname | |
<< " -> " | |
<< new_fontname | |
<< std::endl; | |
oh.replaceKey ("/BaseFont", QPDFObjectHandle::newName (new_fontname)); | |
} | |
processed_obj_id_.insert (oh.getObjectID ()); | |
return true; | |
} | |
bool pdf_font_embedder::process_fontdescriptor (QPDFObjectHandle oh) | |
{ | |
if (processed_obj_id_.find (oh.getObjectID ()) != | |
processed_obj_id_.end ()) | |
return true; | |
std::cout << " FontDescriptor: Object ID " << oh.getObjectID () | |
<< ", Generation " << oh.getGeneration () | |
<< std::endl; | |
if (!(oh.isDictionary () && oh.hasKey ("/Type"))) | |
return false; | |
auto type {oh.getKey ("/Type")}; | |
if (!type.isName ()) | |
return false; | |
if (std::string ("/FontDescriptor") != type.getName ()) | |
return false; | |
auto fontname {oh.getKey ("/FontName")}; | |
const std::string org_fontname {fontname.getName ()}; | |
std::cout << " FontName is " | |
<< org_fontname | |
<< "." | |
<< std::endl; | |
if (oh.hasKey ("/FontFile")) | |
{ | |
std::cout << " It has /FontFile. Skipping..." | |
<< std::endl; | |
return false; | |
} | |
if (oh.hasKey ("/FontFile2")) | |
{ | |
std::cout << " It has /FontFile2. Skipping..." | |
<< std::endl; | |
return false; | |
} | |
if (oh.hasKey ("/FontFile3")) | |
{ | |
std::cout << " It has /FontFile3. Skipping..." | |
<< std::endl; | |
return false; | |
} | |
if (table_.find (org_fontname) == table_.end ()) | |
return false; | |
const std::string new_fontname {table_[org_fontname].first}; | |
const std::string font_filename {table_[org_fontname].second}; | |
if (org_fontname != new_fontname) | |
{ | |
std::cout << " Replacing /FontName: " | |
<< org_fontname | |
<< " -> " | |
<< new_fontname | |
<< std::endl; | |
oh.replaceKey ("/FontName", QPDFObjectHandle::newName (new_fontname)); | |
} | |
if (font_obj_map_.find (new_fontname) == font_obj_map_.end ()) | |
{ | |
std::cout << " Embedding " | |
<< font_filename | |
<< std::endl; | |
std::ifstream ifs (font_filename); | |
auto fontfile {QPDFObjectHandle::newStream | |
(&qpdf_, | |
std::string (std::istreambuf_iterator<char>(ifs), | |
std::istreambuf_iterator<char>()))}; | |
auto fontfile_dir {fontfile.getDict ()}; | |
fontfile_dir.replaceKey ("/Subtype", | |
QPDFObjectHandle::newName ("/OpenType")); | |
font_obj_map_[new_fontname] = fontfile; | |
} | |
std::cout << " Adding /FontFile3" << std::endl; | |
oh.replaceKey ("/FontFile3", font_obj_map_[new_fontname]); | |
processed_obj_id_.insert (oh.getObjectID ()); | |
return true; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
cmdlineparse::parser cmd; | |
cmd.set_usage_unamed_opts ("TABLE.txt INPUT.pdf OUTPUT.pdf"); | |
cmd.add_default (); | |
bool linearize; | |
std::string object_streams; | |
bool newline_before_endstream; | |
bool qdf; | |
cmd.add_flag (0, "linearize", &linearize, | |
" Output linearized (web-optimized) PDF", | |
"Output PDF settings (QPDF)"); | |
cmd.add_string (0, "object-streams", &object_streams, "preserve", | |
" Settings for object streams", | |
"[preserve|disable|generate]", | |
"Output PDF settings (QPDF)"); | |
cmd.add_flag (0, "newline-before-endstream", &newline_before_endstream, | |
" Output newline before endstream", | |
"Output PDF settings (QPDF)"); | |
cmd.add_flag (0, "qdf", &qdf, | |
" Output QDF", | |
"Output PDF settings (QPDF)"); | |
if (!cmd.parse (argc, argv)) | |
return 1; | |
auto uargs {cmd.get_unamed_args ()}; | |
if (uargs.size () != 3) | |
{ | |
std::cout << cmd.build_help (); | |
return 1; | |
} | |
std::cout << cmd.get_version_string () << std::endl; | |
pdf_font_embedder pfe; | |
pfe.set_linearize (linearize); | |
if (object_streams == "preserve") | |
pfe.set_object_streams (qpdf_o_preserve); | |
else if (object_streams == "generate") | |
pfe.set_object_streams (qpdf_o_generate); | |
else if (object_streams == "disable") | |
pfe.set_object_streams (qpdf_o_disable); | |
else | |
{ | |
std::cerr << "unknwon --object-streams mode" << std::endl; | |
return 1; | |
} | |
pfe.set_newline_before_endstream (newline_before_endstream); | |
pfe.set_qdf (qdf); | |
pfe.load_table (uargs[0]); | |
pfe.load_pdf (uargs[1]); | |
pfe.process_pdf (); | |
pfe.save_pdf (uargs[2]); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment