Created
January 8, 2019 12:20
-
-
Save OrkoHunter/e3200adeba42e14fa0dd49ca4fe8f809 to your computer and use it in GitHub Desktop.
text to pdf creater
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import argparse | |
import reportlab.lib.pagesizes | |
from reportlab.pdfgen.canvas import Canvas | |
from reportlab.lib import units | |
from reportlab.lib.styles import ParagraphStyle | |
from reportlab.platypus import Paragraph, SimpleDocTemplate, BaseDocTemplate, XPreformatted | |
from reportlab.lib.colors import red, black, navy, white, green | |
from reportlab.rl_config import defaultPageSize | |
(PAGE_WIDTH, PAGE_HEIGHT) = defaultPageSize | |
from reportlab.lib.units import inch | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
import re | |
import sys | |
import os | |
class Margins(object): | |
def __init__(self, right, left, top, bottom): | |
self._right = right | |
self._left = left | |
self._top = top | |
self._bottom = bottom | |
@property | |
def right(self): | |
return self._right * units.cm | |
@property | |
def left(self): | |
return self._left * units.cm | |
@property | |
def top(self): | |
return self._top * units.cm | |
@property | |
def bottom(self): | |
return self._bottom * units.cm | |
def adjustLeft(self, width): | |
self._left -= width / units.cm | |
class PDFCreator(object): | |
appName = "txt2pdf (version 1.0)" | |
def __init__(self, args, margins): | |
pageWidth, pageHeight = reportlab.lib.pagesizes.__dict__[args.media] | |
if args.landscape: | |
pageWidth, pageHeight = reportlab.lib.pagesizes.landscape( | |
(pageWidth, pageHeight)) | |
self.author = args.author | |
self.title = args.title | |
self.keywords = args.keywords | |
self.subject = args.subject | |
self.output = args.output | |
self.canvas = Canvas(args.output, pagesize=(pageWidth, pageHeight)) | |
self.canvas.setCreator(self.appName) | |
if len(args.author) > 0: | |
self.canvas.setAuthor(args.author) | |
if len(args.title) > 0: | |
self.canvas.setTitle(args.title) | |
if len(args.subject) > 0: | |
self.canvas.setSubject(args.subject) | |
if len(args.keywords) > 0: | |
self.canvas.setKeywords(args.keywords) | |
self.fontSize = args.font_size | |
if args.font not in ('Courier'): | |
self.font = 'myFont' | |
pdfmetrics.registerFont(TTFont('myFont', args.font)) | |
else: | |
self.font = args.font | |
self.kerning = args.kerning | |
self.margins = margins | |
self.leading = (args.extra_vertical_space + 1.2) * self.fontSize | |
self.linesPerPage = int( | |
(self.leading + pageHeight | |
- margins.top - margins.bottom - self.fontSize) / self.leading) | |
self.lppLen = len(str(self.linesPerPage)) | |
fontWidth = self.canvas.stringWidth( | |
".", fontName=self.font, fontSize=self.fontSize) | |
self.font_size_vs_width = {} | |
for _fontsize in range(1, 600): | |
_fontwidth = self.canvas.stringWidth(".", fontName=self.font, fontSize=_fontsize) | |
#print(f"{_fontsize} {_fontwidth}") | |
self.font_size_vs_width[_fontsize] = _fontwidth | |
self.max_chars_in_line = 90 | |
self.lineNumbering = args.line_numbers | |
if self.lineNumbering: | |
margins.adjustLeft(fontWidth * (self.lppLen + 2)) | |
contentWidth = pageWidth - margins.left - margins.right | |
self.contentWidth = contentWidth | |
# print(self.contentWidth, self.font_size_vs_width) | |
#print(self.contentWidth) | |
# self.charsPerLine = int( | |
# (contentWidth + self.kerning) / (fontWidth + self.kerning)) | |
self.top = pageHeight - margins.top - self.fontSize | |
self.filename = args.filename | |
self.verbose = not args.quiet | |
self.breakOnBlanks = args.break_on_blanks | |
self.encoding = args.encoding | |
self.pageNumbering = args.page_numbers | |
if self.pageNumbering: | |
self.pageNumberPlacement = \ | |
(pageWidth / 2, margins.bottom / 2) | |
def _update_styling_characters(self, data): | |
chars = { | |
"\x1bG": "<strong>", # Dodstrike | |
"\x1bH": "</strong>", # Candstrike | |
"\x1bE": "<b>", # Doemph | |
"\x1bF": "</b>", # Canemph | |
"\x1b4": "<i>", # Doital | |
"\x1b5": "</i>", # Canital | |
"\x1b-1": "<u>", # Douline | |
"\x1b-0": "</u>", # Canuline | |
"\x1bP": "", # P10cpi # Character per inch | |
"\x1bM": "", # P12cpi | |
"\x1bg": "", #P15cpi | |
# "\x0e": "", # chr(14) Doubles the font of remaining | |
# Don't replace chr(14), process it later font size *= 2 | |
"\x0f": "", # chr(15) # font size /= 2 | |
"\x12": "", # chr(18) # cancel chr(14) | |
"\x1a": "", # chr(26) | |
} | |
for key in chars: | |
data = data.replace(key, chars[key]) | |
return data | |
def _get_line_length(self, line): | |
pairs = [("<strong>", "</strong>"), ("<b>", "</b>"), ("<u>", "</u>"), ("<i>", "</i>")] | |
for i in pairs: | |
line = line.replace(i[0], "") | |
line = line.replace(i[1], "") | |
return len(line) | |
def _close_tags(self, lines): | |
pairs = [("<strong>", "</strong>"), ("<b>", "</b>"), ("<u>", "</u>"), ("<i>", "</i>")] | |
left_over_tags = "" | |
new_lines = [] | |
for line in lines: | |
if len(left_over_tags): | |
line = left_over_tags + line | |
left_over_tags = "" | |
for pair in pairs: | |
remaining = line.count(pair[0]) - line.count(pair[1]) | |
if remaining > 0: | |
line += pair[1]*remaining | |
left_over_tags += pair[0]*remaining | |
elif remaining < 0: | |
# Remove excess closing tags from behind | |
reversed_line = line[::-1] | |
reversed_tag = pair[1][::-1] | |
reversed_line = reversed_line.replace(reversed_tag, "", abs(remaining)) | |
line = reversed_line[::-1] | |
new_lines.append(line) | |
return new_lines | |
def _process_chr_14(self, lines): | |
new_lines = [] | |
for line in lines: | |
if "\x0e" in line: | |
line = line.replace("\x0e", f"<font size={self.fontSize*2}>") | |
line += "</font>" | |
new_lines.append(line) | |
return new_lines | |
def _process(self, data): | |
# Every line should have both closing and enclosing tags e.g. <b> and </b> | |
# Do not leave any line with an open styling tag | |
# This is because each line is converted to <para> and <para><b></para> will raise error | |
data = self._update_styling_characters(data) | |
_lines = data.split("\n") | |
lines = self._close_tags(_lines) | |
lines = self._process_chr_14(lines) | |
new_lines = [] | |
for line in lines: | |
if sys.version_info.major == 2: | |
new_lines.append(line.decode(self.encoding).rstrip('\r\n')) | |
else: | |
new_lines.append(line.rstrip('\r\n')) | |
return new_lines | |
def _readDocument(self): | |
with open(self.filename, 'r') as f: | |
data = f.read() | |
lines = self._process(data) | |
return lines | |
def _newpage(self): | |
textobject = self.canvas.beginText() | |
textobject.setFont(self.font, self.fontSize, leading=self.leading) | |
textobject.setTextOrigin(self.margins.left, self.top) | |
textobject.setCharSpace(self.kerning) | |
if self.pageNumbering: | |
self.canvas.drawString( | |
self.pageNumberPlacement[0], | |
self.pageNumberPlacement[1], | |
str(self.canvas.getPageNumber())) | |
return textobject | |
def _scribble(self, text): | |
if self.verbose: | |
sys.stderr.write(text + os.linesep) | |
def generate(self): | |
self._scribble( | |
"Writing '%s' with %d max characters per " | |
"line and %d lines per page..." % | |
(self.filename, self.max_chars_in_line, self.linesPerPage) | |
) | |
# pageno = self._generatePlain(self._readDocument()) | |
pageno = self._generateCustom(self._readDocument()) | |
self._scribble("PDF document: %d pages" % pageno) | |
def _generateCustom(self, data): | |
# Use Paragraphs for more customization | |
# Each line is a Paragraph | |
pstyleparent = ParagraphStyle("parent", fontName="Courier", fontSize=self.fontSize) | |
pageno = 1 | |
lineno = 0 | |
page = self._newpage() | |
all_lines = [] | |
for line in data: | |
lineno += 1 | |
if self._get_line_length(line) < self.max_chars_in_line: | |
p = XPreformatted(line, style=pstyleparent) | |
else: | |
final_font_size = 0 | |
while(True): | |
new_size = final_font_size + 1 | |
if self.font_size_vs_width[new_size]*self._get_line_length(line) < self.contentWidth: | |
final_font_size = new_size | |
else: | |
break | |
# print("final_font_size", final_font_size) | |
new_style = ParagraphStyle('new', parent=pstyleparent, fontSize=final_font_size) | |
p = XPreformatted(line, style=new_style) | |
all_lines.append(p) | |
template = SimpleDocTemplate(self.output, | |
leftMargin=self.margins.left, | |
rightMargin=self.margins.right) | |
template.build(all_lines) | |
return pageno | |
# def _generatePlain(self, data): | |
# pageno = 1 | |
# lineno = 0 | |
# page = self._newpage() | |
# for _, line in data: | |
# lineno += 1 | |
# # Handle form feed characters. | |
# (line, pageBreakCount) = re.subn(r'\f', r'', line) | |
# if pageBreakCount > 0 and lineno >= args.minimum_page_length: | |
# for _ in range(pageBreakCount): | |
# self.canvas.drawText(page) | |
# self.canvas.showPage() | |
# lineno = 0 | |
# pageno += 1 | |
# page = self._newpage() | |
# if args.minimum_page_length > 0: | |
# break | |
# page.textLine(line) | |
# if lineno == self.linesPerPage: | |
# self.canvas.drawText(page) | |
# self.canvas.showPage() | |
# lineno = 0 | |
# pageno += 1 | |
# page = self._newpage() | |
# self.canvas.drawText(page) | |
# if lineno > 0: | |
# self.canvas.drawText(page) | |
# else: | |
# pageno -= 1 | |
# self.canvas.save() | |
# return pageno | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filename') | |
parser.add_argument( | |
'--font', | |
'-f', | |
default='Courier', | |
help='Select a font (True Type format) by its full path') | |
parser.add_argument( | |
'--font-size', | |
'-s', | |
type=float, | |
default=9.0, | |
help='Size of the font') | |
parser.add_argument( | |
'--extra-vertical-space', | |
'-v', | |
type=float, | |
default=0.0, | |
help='Extra vertical space between lines') | |
parser.add_argument( | |
'--kerning', | |
'-k', | |
type=float, | |
default=0.0, | |
help='Extra horizontal space between characters') | |
parser.add_argument( | |
'--media', | |
'-m', | |
default='A4', | |
help='Select the size of the page (A4, A3, etc.)') | |
parser.add_argument( | |
'--minimum-page-length', | |
'-M', | |
type=int, | |
default=10, | |
help='The minimum number of lines before a form feed character will change the page') | |
parser.add_argument( | |
'--landscape', | |
'-l', | |
action="store_true", | |
default=False, | |
help='Select landscape mode') | |
parser.add_argument( | |
'--margin-left', | |
'-L', | |
type=float, | |
default=0.5, | |
help='Left margin (in cm unit)') | |
parser.add_argument( | |
'--margin-right', | |
'-R', | |
type=float, | |
default=0.5, | |
help='Right margin (in cm unit)') | |
parser.add_argument( | |
'--margin-top', | |
'-T', | |
type=float, | |
default=2.0, | |
help='Top margin (in cm unit)') | |
parser.add_argument( | |
'--margin-bottom', | |
'-B', | |
type=float, | |
default=2.0, | |
help='Bottom margin (in cm unit)') | |
parser.add_argument( | |
'--output', | |
'-o', | |
default='output.pdf', | |
help='Output file') | |
parser.add_argument( | |
'--author', | |
default='', | |
help='Author of the PDF document') | |
parser.add_argument( | |
'--title', | |
default='', | |
help='Title of the PDF document') | |
parser.add_argument( | |
'--quiet', | |
'-q', | |
action='store_true', | |
default=False, | |
help='Hide detailed information') | |
parser.add_argument('--subject',default='',help='Subject of the PDF document') | |
parser.add_argument('--keywords',default='',help='Keywords of the PDF document') | |
parser.add_argument( | |
'--break-on-blanks', | |
'-b', | |
action='store_true', | |
default=False, | |
help='Only break page on blank lines') | |
parser.add_argument( | |
'--encoding', | |
'-e', | |
type=str, | |
default='utf8', | |
help='Input encoding') | |
parser.add_argument( | |
'--page-numbers', | |
'-n', | |
action='store_true', | |
help='Add page numbers') | |
parser.add_argument( | |
'--line-numbers', | |
action='store_true', | |
help='Add line numbers') | |
args = parser.parse_args() | |
p = PDFCreator(args, Margins( | |
args.margin_right, | |
args.margin_left, | |
args.margin_top, | |
args.margin_bottom)).generate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment