Created
September 20, 2017 11:07
-
-
Save retrography/f5cb4e84d53150ea896cff73a9782e35 to your computer and use it in GitHub Desktop.
Transforms jbig2-encoded images into PDF - A Ruby port of https://github.com/agl/jbig2enc/blob/master/pdf.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Copyright 2017 Mahmood S. Zargar | |
# Author: [email protected] (Mahmood S. Zargar) | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# This is a Ruby port of "pdf.py" published under the same license | |
# Copyright 2006 Google Inc. | |
# Author: [email protected] (Adam Langley) | |
# JBIG2 Encoder | |
# https://github.com/agl/jbig2enc | |
dpi = 72 | |
class Dict | |
attr_accessor :d | |
def initialize(values = {}) | |
@d = {} | |
@d.update(values) | |
end | |
def to_s | |
s = ['<< '] | |
@d.each do |x, y| | |
s << '/%s ' % x | |
s << y.to_s | |
s << "\n" | |
end | |
s << ">>\n" | |
s.join | |
end | |
end | |
$global_next_id = 1 | |
class Obj | |
attr_accessor :id | |
attr_accessor :d | |
def initialize(d = {}, stream = nil) | |
if !stream.nil? | |
d['Length'] = stream.size.to_s | |
end | |
@d = Dict.new(d) | |
@stream = stream | |
@id = $global_next_id | |
$global_next_id = $global_next_id + 1 | |
end | |
def to_s | |
s = [] | |
s << @d.to_s | |
if [email protected]? | |
s << "stream\n" | |
s << @stream | |
s << "\nendstream\n" | |
end | |
s << "endobj\n" | |
s.join | |
end | |
end | |
class Doc | |
def initialize | |
@objs = [] | |
@pages = [] | |
end | |
def add_object(o) | |
@objs << o | |
o | |
end | |
def add_page(o) | |
@pages << o | |
add_object(o) | |
end | |
def to_s | |
a = [] | |
j = [0] | |
offsets = [] | |
add = lambda do |x| | |
a << x | |
j[0] = j[0] + x.size + 1 | |
end | |
add.call('%PDF-1.4') | |
for o in @objs | |
offsets << j[0] | |
add.call('%i 0 obj' % o.id) | |
add.call(o.to_s) | |
end | |
xrefstart = j[0] | |
a << 'xref' | |
a << '0 %i' % (offsets.size + 1) | |
a << '0000000000 65535 f ' | |
for o in offsets | |
a << '%010i 00000 n ' % o | |
end | |
a << '' | |
a << 'trailer' | |
a << "<< /Size %i\n/Root 1 0 R >>" % (offsets.size + 1) | |
a << 'startxref' | |
a << xrefstart.to_s | |
a << '%%EOF' | |
a.join("\n") | |
end | |
end | |
def ref(x) | |
'%i 0 R' % x | |
end | |
def jbig2pdf(symboltable='output.sym', pagefiles=Dir['output.[0-9]*']) | |
doc = Doc.new | |
doc.add_object(Obj.new({'Type' => '/Catalog', 'Outlines' => ref(2), 'Pages' => ref(3)})) | |
doc.add_object(Obj.new({'Type' => '/Outlines', 'Count' => '0'})) | |
pages = Obj.new({'Type' => '/Pages'}) | |
doc.add_object(pages) | |
symd = doc.add_object(Obj.new({}, File::read(symboltable, mode: "rb"))) | |
page_objs = [] | |
pagefiles.sort! | |
for p in pagefiles | |
begin | |
contents = File::read(p, mode: "rb") | |
rescue IOError | |
$stderr.puts("error reading page file %s\n"% p) | |
next | |
end | |
width, height, xres, yres = contents[11..26].unpack('NNNN') | |
xres = dpi if xres == 0 | |
yres = dpi if yres == 0 | |
xobj = Obj.new({'Type' => '/XObject', 'Subtype' => '/Image', 'Width' => | |
width.to_s, 'Height' => height.to_s, 'ColorSpace' => '/DeviceGray', | |
'BitsPerComponent' => '1', 'Filter' => '/JBIG2Decode', 'DecodeParms' => | |
' << /JBIG2Globals %i 0 R >>' % symd.id}, contents) | |
contents = Obj.new({}, 'q %f 0 0 %f 0 0 cm /Im1 Do Q' % [(width.to_f * 72) / xres, (height.to_f * 72) / yres]) | |
resources = Obj.new({'ProcSet' => '[/PDF /ImageB]', | |
'XObject' => '<< /Im1 %i 0 R >>' % xobj.id}) | |
page = Obj.new({'Type' => '/Page', 'Parent' => '3 0 R', | |
'MediaBox' => '[ 0 0 %f %f ]' % [(width.to_f * 72) / xres, (height.to_f * 72) / yres], | |
'Contents' => ref(contents.id), | |
'Resources' => ref(resources.id)}) | |
for x in [xobj, contents, resources, page] | |
doc.add_object(x) | |
end | |
page_objs << page | |
pages.d.d['Count'] = page_objs.size.to_s | |
pages.d.d['Kids'] = '[' + page_objs.map{|x| ref(x.id)}.join(" ") + "]" | |
end | |
doc.to_s | |
end | |
def usage(msg) | |
$stderr.puts("%s: %s\n"% ["pdf.rb", msg]) | |
$stderr.puts("Usage: %s [file_basename] > out.pdf\n"% "pdf.rb") | |
exit(false) | |
end | |
########## Main ########## | |
if __FILE__ == $0 | |
if ARGV.size == 1 | |
sym = ARGV[0] + '.sym' | |
pages = Dir[ARGV[0] + '.[0-9]*'] | |
elsif ARGV.size == 0 | |
sym = 'output.sym' | |
pages = Dir['output.[0-9]*'] | |
else | |
usage("Wrong number of arguments!") | |
end | |
if not File.exists?(sym) | |
usage("Symbol table %s not found!"% sym) | |
elsif pages.size == 0 | |
usage("No pages found!") | |
end | |
print(jbig2pdf(sym, pages)) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment