|
#! /usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
# taken from https://github.com/baruchel/txt2pdf |
|
import argparse |
|
import reportlab.lib.pagesizes |
|
from reportlab.pdfgen.canvas import Canvas |
|
from reportlab.lib import units |
|
from reportlab.pdfbase import pdfmetrics |
|
from reportlab.pdfbase.ttfonts import TTFont |
|
import re |
|
import sys |
|
import os |
|
|
|
|
|
class Margins(object): |
|
def __init__(self, right, left, top, bottom): |
|
self._right = right |
|
self._left = left |
|
self._top = top |
|
self._bottom = bottom |
|
|
|
@property |
|
def right(self): |
|
return self._right * units.cm |
|
|
|
@property |
|
def left(self): |
|
return self._left * units.cm |
|
|
|
@property |
|
def top(self): |
|
return self._top * units.cm |
|
|
|
@property |
|
def bottom(self): |
|
return self._bottom * units.cm |
|
|
|
def adjustLeft(self, width): |
|
self._left -= width / units.cm |
|
|
|
|
|
class PDFCreator(object): |
|
appName = "txt2pdf (version 1.0)" |
|
|
|
def __init__(self, args, margins): |
|
pageWidth, pageHeight = reportlab.lib.pagesizes.__dict__[args.media] |
|
if args.landscape: |
|
pageWidth, pageHeight = reportlab.lib.pagesizes.landscape( |
|
(pageWidth, pageHeight)) |
|
self.author = args.author |
|
self.title = args.title |
|
self.keywords = args.keywords |
|
self.subject = args.subject |
|
self.canvas = Canvas(args.output, pagesize=(pageWidth, pageHeight)) |
|
self.canvas.setCreator(self.appName) |
|
if len(args.author) > 0: |
|
self.canvas.setAuthor(args.author) |
|
if len(args.title) > 0: |
|
self.canvas.setTitle(args.title) |
|
if len(args.subject) > 0: |
|
self.canvas.setSubject(args.subject) |
|
if len(args.keywords) > 0: |
|
self.canvas.setKeywords(args.keywords) |
|
self.fontSize = args.font_size |
|
if args.font not in ('Courier'): |
|
self.font = 'myFont' |
|
pdfmetrics.registerFont(TTFont('myFont', args.font)) |
|
else: |
|
self.font = args.font |
|
self.kerning = args.kerning |
|
self.margins = margins |
|
self.leading = (args.extra_vertical_space + 1.2) * self.fontSize |
|
self.linesPerPage = int( |
|
(self.leading + pageHeight |
|
- margins.top - margins.bottom - self.fontSize) / self.leading) |
|
self.lppLen = len(str(self.linesPerPage)) |
|
fontWidth = self.canvas.stringWidth( |
|
".", fontName=self.font, fontSize=self.fontSize) |
|
self.lineNumbering = args.line_numbers |
|
if self.lineNumbering: |
|
margins.adjustLeft(fontWidth * (self.lppLen + 2)) |
|
contentWidth = pageWidth - margins.left - margins.right |
|
self.charsPerLine = int( |
|
(contentWidth + self.kerning) / (fontWidth + self.kerning)) |
|
self.top = pageHeight - margins.top - self.fontSize |
|
self.filename = args.filename |
|
self.verbose = not args.quiet |
|
self.breakOnBlanks = args.break_on_blanks |
|
self.encoding = args.encoding |
|
self.pageNumbering = args.page_numbers |
|
if self.pageNumbering: |
|
self.pageNumberPlacement = \ |
|
(pageWidth / 2, margins.bottom / 2) |
|
|
|
def _process(self, data): |
|
flen = os.fstat(data.fileno()).st_size |
|
lineno = 0 |
|
read = 0 |
|
for line in data: |
|
lineno += 1 |
|
if sys.version_info.major == 2: |
|
read += len(line) |
|
yield flen == \ |
|
read, lineno, line.decode(self.encoding).rstrip('\r\n') |
|
else: |
|
read += len(line.encode(self.encoding)) |
|
yield flen == read, lineno, line.rstrip('\r\n') |
|
|
|
def _readDocument(self): |
|
with open(self.filename, 'r') as data: |
|
for done, lineno, line in self._process(data): |
|
if len(line) > self.charsPerLine: |
|
self._scribble( |
|
"Warning: wrapping line %d in %s" % |
|
(lineno + 1, self.filename)) |
|
while len(line) > self.charsPerLine: |
|
yield done, line[:self.charsPerLine] |
|
line = line[self.charsPerLine:] |
|
yield done, line |
|
|
|
def _newpage(self): |
|
textobject = self.canvas.beginText() |
|
textobject.setFont(self.font, self.fontSize, leading=self.leading) |
|
textobject.setTextOrigin(self.margins.left, self.top) |
|
textobject.setCharSpace(self.kerning) |
|
if self.pageNumbering: |
|
self.canvas.drawString( |
|
self.pageNumberPlacement[0], |
|
self.pageNumberPlacement[1], |
|
str(self.canvas.getPageNumber())) |
|
return textobject |
|
|
|
def _scribble(self, text): |
|
if self.verbose: |
|
sys.stderr.write(text + os.linesep) |
|
|
|
def generate(self): |
|
self._scribble( |
|
"Writing '%s' with %d characters per " |
|
"line and %d lines per page..." % |
|
(self.filename, self.charsPerLine, self.linesPerPage) |
|
) |
|
if self.breakOnBlanks: |
|
pageno = self._generateBob(self._readDocument()) |
|
else: |
|
pageno = self._generatePlain(self._readDocument()) |
|
self._scribble("PDF document: %d pages" % pageno) |
|
|
|
def _generatePlain(self, data): |
|
pageno = 1 |
|
lineno = 0 |
|
page = self._newpage() |
|
for _, line in data: |
|
lineno += 1 |
|
|
|
# Handle form feed characters. |
|
(line, pageBreakCount) = re.subn(r'\f', r'', line) |
|
if pageBreakCount > 0 and lineno >= args.minimum_page_length: |
|
for _ in range(pageBreakCount): |
|
self.canvas.drawText(page) |
|
self.canvas.showPage() |
|
lineno = 0 |
|
pageno += 1 |
|
page = self._newpage() |
|
if args.minimum_page_length > 0: |
|
break |
|
|
|
page.textLine(line) |
|
|
|
if lineno == self.linesPerPage: |
|
self.canvas.drawText(page) |
|
self.canvas.showPage() |
|
lineno = 0 |
|
pageno += 1 |
|
page = self._newpage() |
|
if lineno > 0: |
|
self.canvas.drawText(page) |
|
else: |
|
pageno -= 1 |
|
self.canvas.save() |
|
return pageno |
|
|
|
def _writeChunk(self, page, chunk, lineno): |
|
if self.lineNumbering: |
|
formatstr = '%%%dd: %%s' % self.lppLen |
|
for index, line in enumerate(chunk): |
|
page.textLine( |
|
formatstr % (lineno - len(chunk) + index + 1, line)) |
|
else: |
|
for line in chunk: |
|
page.textLine(line) |
|
|
|
def _generateBob(self, data): |
|
pageno = 1 |
|
lineno = 0 |
|
page = self._newpage() |
|
chunk = list() |
|
for last, line in data: |
|
if lineno == self.linesPerPage: |
|
self.canvas.drawText(page) |
|
self.canvas.showPage() |
|
lineno = len(chunk) |
|
pageno += 1 |
|
page = self._newpage() |
|
lineno += 1 |
|
chunk.append(line) |
|
if last or len(line.strip()) == 0: |
|
self._writeChunk(page, chunk, lineno) |
|
chunk = list() |
|
if lineno > 0: |
|
self.canvas.drawText(page) |
|
self.canvas.showPage() |
|
else: |
|
pageno -= 1 |
|
if len(chunk) > 0: |
|
page = self._newpage() |
|
self.canvas.drawText(page) |
|
self.canvas.showPage() |
|
pageno += 1 |
|
self.canvas.save() |
|
return pageno |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('filename') |
|
parser.add_argument( |
|
'--font', |
|
'-f', |
|
default='Courier', |
|
help='Select a font (True Type format) by its full path') |
|
parser.add_argument( |
|
'--font-size', |
|
'-s', |
|
type=float, |
|
default=10.0, |
|
help='Size of the font') |
|
parser.add_argument( |
|
'--extra-vertical-space', |
|
'-v', |
|
type=float, |
|
default=0.0, |
|
help='Extra vertical space between lines') |
|
parser.add_argument( |
|
'--kerning', |
|
'-k', |
|
type=float, |
|
default=0.0, |
|
help='Extra horizontal space between characters') |
|
parser.add_argument( |
|
'--media', |
|
'-m', |
|
default='A4', |
|
help='Select the size of the page (A4, A3, etc.)') |
|
parser.add_argument( |
|
'--minimum-page-length', |
|
'-M', |
|
type=int, |
|
default=10, |
|
help='The minimum number of lines before a form feed character will change the page') |
|
parser.add_argument( |
|
'--landscape', |
|
'-l', |
|
action="store_true", |
|
default=False, |
|
help='Select landscape mode') |
|
parser.add_argument( |
|
'--margin-left', |
|
'-L', |
|
type=float, |
|
default=2.0, |
|
help='Left margin (in cm unit)') |
|
parser.add_argument( |
|
'--margin-right', |
|
'-R', |
|
type=float, |
|
default=2.0, |
|
help='Right margin (in cm unit)') |
|
parser.add_argument( |
|
'--margin-top', |
|
'-T', |
|
type=float, |
|
default=2.0, |
|
help='Top margin (in cm unit)') |
|
parser.add_argument( |
|
'--margin-bottom', |
|
'-B', |
|
type=float, |
|
default=2.0, |
|
help='Bottom margin (in cm unit)') |
|
parser.add_argument( |
|
'--output', |
|
'-o', |
|
default='output.pdf', |
|
help='Output file') |
|
parser.add_argument( |
|
'--author', |
|
default='', |
|
help='Author of the PDF document') |
|
parser.add_argument( |
|
'--title', |
|
default='', |
|
help='Title of the PDF document') |
|
parser.add_argument( |
|
'--quiet', |
|
'-q', |
|
action='store_true', |
|
default=False, |
|
help='Hide detailed information') |
|
parser.add_argument('--subject', default='', |
|
help='Subject of the PDF document') |
|
parser.add_argument('--keywords', default='', |
|
help='Keywords of the PDF document') |
|
parser.add_argument( |
|
'--break-on-blanks', |
|
'-b', |
|
action='store_true', |
|
default=False, |
|
help='Only break page on blank lines') |
|
parser.add_argument( |
|
'--encoding', |
|
'-e', |
|
type=str, |
|
default='utf8', |
|
help='Input encoding') |
|
parser.add_argument( |
|
'--page-numbers', |
|
'-n', |
|
action='store_true', |
|
help='Add page numbers') |
|
parser.add_argument( |
|
'--line-numbers', |
|
action='store_true', |
|
help='Add line numbers') |
|
|
|
args = parser.parse_args() |
|
|
|
PDFCreator(args, Margins( |
|
args.margin_right, |
|
args.margin_left, |
|
args.margin_top, |
|
args.margin_bottom)).generate() |