Last active
June 21, 2018 08:04
-
-
Save lelit/9c1cba52fd6dd9f1123fe82ce4b788db to your computer and use it in GitHub Desktop.
Injecting Pyphen into RL Paragraph
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from reportlab.lib.pagesizes import A4 | |
from reportlab.platypus import ImageAndFlowables, SimpleDocTemplate, Image | |
from reportlab.lib.styles import getSampleStyleSheet | |
from reportlab.pdfbase.pdfmetrics import stringWidth, getFont, getAscentDescent | |
from reportlab.platypus.paragraph import (Paragraph, split, strip, _handleBulletWidth, | |
_processed_frags, _SplitText, _getFragWords, | |
_FK_BREAK, _FK_IMG, _FK_TEXT, _HSWord, sameFrag, | |
_SplitList, FragLine, ParaLines) | |
from pyphen import Pyphen | |
styles = getSampleStyleSheet() | |
doc = SimpleDocTemplate("sample1.pdf", pagesize=A4, | |
rightMargin=144, leftMargin=144, | |
topMargin=72, bottomMargin=18) | |
text = """\ | |
A differenza del primo caso, dove la libreria implementa uno standard eticamente superiore, in questo l'adozione senza un forte motivo non serve a compiere alcun obiettivo particolare, motivo per il quale non c'è alcun motivo valido per evitare il copyleft. Ma, se chiedete agli utenti della vostra libreria di rilasciare i propri programmi sotto copyleft, questi passeranno semplicemente ad una delle alternative disponibili, senza avanzare la nostra causa. La Lesser GPL è stata creata proprio per fare da ponte tra questi casi, permettendo agli sviluppatori di software proprietario di usare la libreria senza però privare gli utenti delle libertà relative al codice stesso della libreria. | |
""" | |
text2 = """\ | |
A differenza del primo caso, dove la libreria implementa uno standard eticamente superiore, in questo l'adozione senza un forte motivo non serve a compiere alcun obiettivo particolare, motivo per il quale non c'è alcun motivo valido per evitare il copyleft. Ma, se chiedete agli utenti della vostra libreria di rilasciare i propri programmi sotto copyleft, questi <b>passeranno</b> semplicemente ad una delle alternative disponibili, senza avanzare la nostra causa. La Lesser GPL è stata creata proprio per fare da ponte tra questi casi, permettendo agli sviluppatori di software proprietario di usare la libreria senza però privare gli utenti delle libertà relative al codice stesso della libreria. | |
""" | |
class PyphenParagraph(Paragraph): | |
def __init__(self, *args, hyphenator=None, **kwargs): | |
self.hyphenator = hyphenator | |
super().__init__(*args, **kwargs) | |
def hyphenateWord(self, word, availWidth, fontName, fontSize): | |
for head, tail in self.hyphenator.iterate(word): | |
head += '-' | |
width = stringWidth(head, fontName, fontSize, self.encoding) | |
if width <= availWidth: | |
return _SplitText(head), tail | |
def breakLines(self, width): | |
""" | |
Returns a broken line structure. There are two cases | |
A) For the simple case of a single formatting input fragment the output is | |
A fragment specifier with | |
- kind = 0 | |
- fontName, fontSize, leading, textColor | |
- lines= A list of lines | |
Each line has two items. | |
1. unused width in points | |
2. word list | |
B) When there is more than one input formatting fragment the output is | |
A fragment specifier with | |
- kind = 1 | |
- lines= A list of fragments each having fields | |
- extraspace (needed for justified) | |
- fontSize | |
- words=word list | |
each word is itself a fragment with | |
various settings | |
in addition frags becomes a frag word list | |
This structure can be used to easily draw paragraphs with the various alignments. | |
You can supply either a single width or a list of widths; the latter will have its | |
last item repeated until necessary. A 2-element list is useful when there is a | |
different first line indent; a longer list could be created to facilitate custom wraps | |
around irregular objects.""" | |
self._width_max = 0 | |
if not isinstance(width,(tuple,list)): maxWidths = [width] | |
else: maxWidths = width | |
lines = [] | |
self.height = lineno = 0 | |
maxlineno = len(maxWidths)-1 | |
style = self.style | |
spaceShrinkage = style.spaceShrinkage | |
splitLongWords = style.splitLongWords | |
self._splitLongWordCount = 0 | |
#for bullets, work out width and ensure we wrap the right amount onto line one | |
_handleBulletWidth(self.bulletText,style,maxWidths) | |
maxWidth = maxWidths[0] | |
autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading','')) | |
calcBounds = autoLeading not in ('','off') | |
frags = self.frags | |
nFrags= len(frags) | |
if (nFrags==1 | |
and not (style.endDots or hasattr(frags[0],'cbDefn') or hasattr(frags[0],'backColor') | |
or _processed_frags(frags))): | |
f = frags[0] | |
fontSize = f.fontSize | |
fontName = f.fontName | |
ascent, descent = getAscentDescent(fontName,fontSize) | |
if hasattr(f,'text'): | |
text = strip(f.text) | |
if not text: | |
return f.clone(kind=0, lines=[],ascent=ascent,descent=descent,fontSize=fontSize) | |
else: | |
words = split(text) | |
else: | |
words = f.words[:] | |
for w in words: | |
if strip(w): break | |
else: | |
return f.clone(kind=0, lines=[],ascent=ascent,descent=descent,fontSize=fontSize) | |
spaceWidth = stringWidth(' ', fontName, fontSize, self.encoding) | |
cLine = [] | |
currentWidth = -spaceWidth # hack to get around extra space for word 1 | |
while words: | |
word = words.pop(0) | |
#this underscores my feeling that Unicode throughout would be easier! | |
wordWidth = stringWidth(word, fontName, fontSize, self.encoding) | |
newWidth = currentWidth + spaceWidth + wordWidth | |
if newWidth>maxWidth: | |
if self.hyphenator is not None and not isinstance(word, _SplitText): | |
pair = self.hyphenateWord(word, maxWidth - spaceWidth - currentWidth, | |
fontName, fontSize) | |
if pair is not None: | |
words[0:0] = pair | |
continue | |
nmw = min(lineno,maxlineno) | |
if wordWidth>max(maxWidths[nmw:nmw+1]) and not isinstance(word,_SplitText) and splitLongWords: | |
#a long word | |
words[0:0] = _splitWord(word,maxWidth-spaceWidth-currentWidth,maxWidths,lineno,fontName,fontSize,self.encoding) | |
self._splitLongWordCount += 1 | |
continue | |
if newWidth <= maxWidth or not len(cLine): | |
# fit one more on this line | |
cLine.append(word) | |
currentWidth = newWidth | |
else: | |
if currentWidth > self._width_max: self._width_max = currentWidth | |
#end of line | |
lines.append((maxWidth - currentWidth, cLine)) | |
cLine = [word] | |
currentWidth = wordWidth | |
lineno += 1 | |
maxWidth = maxWidths[min(maxlineno,lineno)] | |
#deal with any leftovers on the final line | |
if cLine!=[]: | |
if currentWidth>self._width_max: self._width_max = currentWidth | |
lines.append((maxWidth - currentWidth, cLine)) | |
return f.clone(kind=0, lines=lines,ascent=ascent,descent=descent,fontSize=fontSize) | |
elif nFrags<=0: | |
return ParaLines(kind=0, fontSize=style.fontSize, fontName=style.fontName, | |
textColor=style.textColor, ascent=style.fontSize,descent=-0.2*style.fontSize, | |
lines=[]) | |
else: | |
njlbv = not style.justifyBreaks | |
words = [] | |
FW = [] | |
aFW = FW.append | |
_words = _getFragWords(frags,maxWidth) | |
sFW = 0 | |
while _words: | |
w = _words.pop(0) | |
aFW(w) | |
f = w[-1][0] | |
fontName = f.fontName | |
fontSize = f.fontSize | |
if not words: | |
n = space = spaceWidth = currentWidth = 0 | |
maxSize = fontSize | |
maxAscent, minDescent = getAscentDescent(fontName,fontSize) | |
wordWidth = w[0] | |
f = w[1][0] | |
if wordWidth>0: | |
newWidth = currentWidth + spaceWidth + wordWidth | |
else: | |
newWidth = currentWidth | |
#test to see if this frag is a line break. If it is we will only act on it | |
#if the current width is non-negative or the previous thing was a deliberate lineBreak | |
lineBreak = f._fkind==_FK_BREAK | |
if not lineBreak and newWidth>(maxWidth+space*spaceShrinkage) and not isinstance(w,_SplitList) and splitLongWords: | |
if self.hyphenator is not None: | |
pair = self.hyphenateWord(w[1][-1], maxWidth - spaceWidth - currentWidth, | |
fontName, fontSize) | |
if pair is not None: | |
head, tail = pair | |
hpara = w[1][0].clone() | |
h = [stringWidth(head, fontName, fontSize), (hpara, head)] | |
tpara = w[1][0].clone() | |
t = [stringWidth(tail, fontName, fontSize), (hpara, tail)] | |
_words[0:0] = _SplitList([h, _HSWord(t) if _words else t]) | |
FW.pop(-1) # remove this as we are doing this one again | |
continue | |
nmw = min(lineno,maxlineno) | |
if wordWidth>max(maxWidths[nmw:nmw+1]): | |
#a long word | |
_words[0:0] = _splitFragWord(w,maxWidth-spaceWidth-currentWidth,maxWidths,lineno) | |
FW.pop(-1) #remove this as we are doing this one again | |
self._splitLongWordCount += 1 | |
continue | |
endLine = (newWidth>(maxWidth+space*spaceShrinkage) and n>0) or lineBreak | |
if not endLine: | |
if lineBreak: continue #throw it away | |
nText = w[1][1] | |
if nText: n += 1 | |
fontSize = f.fontSize | |
if calcBounds: | |
if f._fkind==_FK_IMG: | |
descent,ascent = imgVRange(imgNormV(f.cbDefn.height,fontSize),f.cbDefn.valign,fontSize) | |
else: | |
ascent, descent = getAscentDescent(f.fontName,fontSize) | |
else: | |
ascent, descent = getAscentDescent(f.fontName,fontSize) | |
maxSize = max(maxSize,fontSize) | |
maxAscent = max(maxAscent,ascent) | |
minDescent = min(minDescent,descent) | |
if not words: | |
g = f.clone() | |
words = [g] | |
g.text = nText | |
elif not sameFrag(g,f): | |
if spaceWidth: | |
i = len(words)-1 | |
while i>=0: | |
wi = words[i] | |
i -= 1 | |
if wi._fkind==_FK_TEXT: | |
if not wi.text.endswith(' '): | |
wi.text += ' ' | |
space += spaceWidth | |
break | |
g = f.clone() | |
words.append(g) | |
g.text = nText | |
elif spaceWidth: | |
if not g.text.endswith(' '): | |
g.text += ' ' + nText | |
space += spaceWidth | |
else: | |
g.text += nText | |
else: | |
g.text += nText | |
spaceWidth = stringWidth(' ',fontName,fontSize) if isinstance(w,_HSWord) else 0 #of the space following this word | |
ni = 0 | |
for i in w[2:]: | |
g = i[0].clone() | |
g.text=i[1] | |
if g.text: ni = 1 | |
words.append(g) | |
fontSize = g.fontSize | |
if calcBounds: | |
if g._fkind==_FK_IMG: | |
descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize) | |
else: | |
ascent, descent = getAscentDescent(g.fontName,fontSize) | |
else: | |
ascent, descent = getAscentDescent(g.fontName,fontSize) | |
maxSize = max(maxSize,fontSize) | |
maxAscent = max(maxAscent,ascent) | |
minDescent = min(minDescent,descent) | |
if not nText and ni: | |
#one bit at least of the word was real | |
n+=1 | |
currentWidth = newWidth | |
else: #either it won't fit, or it's a lineBreak tag | |
if lineBreak: | |
g = f.clone() | |
#del g.lineBreak | |
words.append(g) | |
if currentWidth>self._width_max: self._width_max = currentWidth | |
#end of line | |
lines.append(FragLine(extraSpace=maxWidth-currentWidth, wordCount=n, | |
lineBreak=lineBreak and njlbv, words=words, fontSize=maxSize, ascent=maxAscent, descent=minDescent, maxWidth=maxWidth, | |
sFW=sFW)) | |
sFW = len(FW)-1 | |
#start new line | |
lineno += 1 | |
maxWidth = maxWidths[min(maxlineno,lineno)] | |
if lineBreak: | |
words = [] | |
continue | |
spaceWidth = stringWidth(' ',fontName,fontSize) if isinstance(w,_HSWord) else 0 #of the space following this word | |
currentWidth = wordWidth | |
n = 1 | |
space = 0 | |
g = f.clone() | |
maxSize = g.fontSize | |
if calcBounds: | |
if g._fkind==_FK_IMG: | |
descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize) | |
else: | |
maxAscent, minDescent = getAscentDescent(g.fontName,maxSize) | |
else: | |
maxAscent, minDescent = getAscentDescent(g.fontName,maxSize) | |
words = [g] | |
g.text = w[1][1] | |
for i in w[2:]: | |
g = i[0].clone() | |
g.text=i[1] | |
words.append(g) | |
fontSize = g.fontSize | |
if calcBounds: | |
if g._fkind==_FK_IMG: | |
descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize) | |
else: | |
ascent, descent = getAscentDescent(g.fontName,fontSize) | |
else: | |
ascent, descent = getAscentDescent(g.fontName,fontSize) | |
maxSize = max(maxSize,fontSize) | |
maxAscent = max(maxAscent,ascent) | |
minDescent = min(minDescent,descent) | |
#deal with any leftovers on the final line | |
if words: | |
if currentWidth>self._width_max: self._width_max = currentWidth | |
lines.append(ParaLines(extraSpace=(maxWidth - currentWidth),wordCount=n,lineBreak=False, | |
words=words, fontSize=maxSize,ascent=maxAscent,descent=minDescent,maxWidth=maxWidth,sFW=sFW)) | |
self.frags = FW | |
return ParaLines(kind=1, lines=lines) | |
def split(self, availWidth, availHeight): | |
# Propagate the hyphenator to the splitted paragraphs: parent's split() uses | |
# "self.__class__(foo, bar, spam=eggs)" to create them... | |
pair = super().split(availWidth, availHeight) | |
if pair: | |
pair[0].hyphenator = pair[1].hyphenator = self.hyphenator | |
return pair | |
hyphenator = Pyphen(lang='it_IT') | |
elts = [PyphenParagraph(text, styles['Normal'], hyphenator=hyphenator)] | |
doc.build([ImageAndFlowables(Image('pythonpowered.gif', 20, 20), elts, imageSide='left')] | |
+ elts | |
+ [PyphenParagraph(text2, styles['Normal'], hyphenator=hyphenator)]) | |
print("Created sample1.pdf") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment