Skip to content

Instantly share code, notes, and snippets.

@lelit
Last active June 21, 2018 08:04
Show Gist options
  • Save lelit/9c1cba52fd6dd9f1123fe82ce4b788db to your computer and use it in GitHub Desktop.
Save lelit/9c1cba52fd6dd9f1123fe82ce4b788db to your computer and use it in GitHub Desktop.
Injecting Pyphen into RL Paragraph
from reportlab.lib.pagesizes import A4
from reportlab.platypus import ImageAndFlowables, SimpleDocTemplate, Image
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.pdfbase.pdfmetrics import stringWidth, getFont, getAscentDescent
from reportlab.platypus.paragraph import (Paragraph, split, strip, _handleBulletWidth,
_processed_frags, _SplitText, _getFragWords,
_FK_BREAK, _FK_IMG, _FK_TEXT, _HSWord, sameFrag,
_SplitList, FragLine, ParaLines)
from pyphen import Pyphen
styles = getSampleStyleSheet()
doc = SimpleDocTemplate("sample1.pdf", pagesize=A4,
rightMargin=144, leftMargin=144,
topMargin=72, bottomMargin=18)
text = """\
A differenza del primo caso, dove la libreria implementa uno standard eticamente superiore, in questo l'adozione senza un forte motivo non serve a compiere alcun obiettivo particolare, motivo per il quale non c'è alcun motivo valido per evitare il copyleft. Ma, se chiedete agli utenti della vostra libreria di rilasciare i propri programmi sotto copyleft, questi passeranno semplicemente ad una delle alternative disponibili, senza avanzare la nostra causa. La Lesser GPL è stata creata proprio per fare da ponte tra questi casi, permettendo agli sviluppatori di software proprietario di usare la libreria senza però privare gli utenti delle libertà relative al codice stesso della libreria.
"""
text2 = """\
A differenza del primo caso, dove la libreria implementa uno standard eticamente superiore, in questo l'adozione senza un forte motivo non serve a compiere alcun obiettivo particolare, motivo per il quale non c'è alcun motivo valido per evitare il copyleft. Ma, se chiedete agli utenti della vostra libreria di rilasciare i propri programmi sotto copyleft, questi <b>passeranno</b> semplicemente ad una delle alternative disponibili, senza avanzare la nostra causa. La Lesser GPL è stata creata proprio per fare da ponte tra questi casi, permettendo agli sviluppatori di software proprietario di usare la libreria senza però privare gli utenti delle libertà relative al codice stesso della libreria.
"""
class PyphenParagraph(Paragraph):
def __init__(self, *args, hyphenator=None, **kwargs):
self.hyphenator = hyphenator
super().__init__(*args, **kwargs)
def hyphenateWord(self, word, availWidth, fontName, fontSize):
for head, tail in self.hyphenator.iterate(word):
head += '-'
width = stringWidth(head, fontName, fontSize, self.encoding)
if width <= availWidth:
return _SplitText(head), tail
def breakLines(self, width):
"""
Returns a broken line structure. There are two cases
A) For the simple case of a single formatting input fragment the output is
A fragment specifier with
- kind = 0
- fontName, fontSize, leading, textColor
- lines= A list of lines
Each line has two items.
1. unused width in points
2. word list
B) When there is more than one input formatting fragment the output is
A fragment specifier with
- kind = 1
- lines= A list of fragments each having fields
- extraspace (needed for justified)
- fontSize
- words=word list
each word is itself a fragment with
various settings
in addition frags becomes a frag word list
This structure can be used to easily draw paragraphs with the various alignments.
You can supply either a single width or a list of widths; the latter will have its
last item repeated until necessary. A 2-element list is useful when there is a
different first line indent; a longer list could be created to facilitate custom wraps
around irregular objects."""
self._width_max = 0
if not isinstance(width,(tuple,list)): maxWidths = [width]
else: maxWidths = width
lines = []
self.height = lineno = 0
maxlineno = len(maxWidths)-1
style = self.style
spaceShrinkage = style.spaceShrinkage
splitLongWords = style.splitLongWords
self._splitLongWordCount = 0
#for bullets, work out width and ensure we wrap the right amount onto line one
_handleBulletWidth(self.bulletText,style,maxWidths)
maxWidth = maxWidths[0]
autoLeading = getattr(self,'autoLeading',getattr(style,'autoLeading',''))
calcBounds = autoLeading not in ('','off')
frags = self.frags
nFrags= len(frags)
if (nFrags==1
and not (style.endDots or hasattr(frags[0],'cbDefn') or hasattr(frags[0],'backColor')
or _processed_frags(frags))):
f = frags[0]
fontSize = f.fontSize
fontName = f.fontName
ascent, descent = getAscentDescent(fontName,fontSize)
if hasattr(f,'text'):
text = strip(f.text)
if not text:
return f.clone(kind=0, lines=[],ascent=ascent,descent=descent,fontSize=fontSize)
else:
words = split(text)
else:
words = f.words[:]
for w in words:
if strip(w): break
else:
return f.clone(kind=0, lines=[],ascent=ascent,descent=descent,fontSize=fontSize)
spaceWidth = stringWidth(' ', fontName, fontSize, self.encoding)
cLine = []
currentWidth = -spaceWidth # hack to get around extra space for word 1
while words:
word = words.pop(0)
#this underscores my feeling that Unicode throughout would be easier!
wordWidth = stringWidth(word, fontName, fontSize, self.encoding)
newWidth = currentWidth + spaceWidth + wordWidth
if newWidth>maxWidth:
if self.hyphenator is not None and not isinstance(word, _SplitText):
pair = self.hyphenateWord(word, maxWidth - spaceWidth - currentWidth,
fontName, fontSize)
if pair is not None:
words[0:0] = pair
continue
nmw = min(lineno,maxlineno)
if wordWidth>max(maxWidths[nmw:nmw+1]) and not isinstance(word,_SplitText) and splitLongWords:
#a long word
words[0:0] = _splitWord(word,maxWidth-spaceWidth-currentWidth,maxWidths,lineno,fontName,fontSize,self.encoding)
self._splitLongWordCount += 1
continue
if newWidth <= maxWidth or not len(cLine):
# fit one more on this line
cLine.append(word)
currentWidth = newWidth
else:
if currentWidth > self._width_max: self._width_max = currentWidth
#end of line
lines.append((maxWidth - currentWidth, cLine))
cLine = [word]
currentWidth = wordWidth
lineno += 1
maxWidth = maxWidths[min(maxlineno,lineno)]
#deal with any leftovers on the final line
if cLine!=[]:
if currentWidth>self._width_max: self._width_max = currentWidth
lines.append((maxWidth - currentWidth, cLine))
return f.clone(kind=0, lines=lines,ascent=ascent,descent=descent,fontSize=fontSize)
elif nFrags<=0:
return ParaLines(kind=0, fontSize=style.fontSize, fontName=style.fontName,
textColor=style.textColor, ascent=style.fontSize,descent=-0.2*style.fontSize,
lines=[])
else:
njlbv = not style.justifyBreaks
words = []
FW = []
aFW = FW.append
_words = _getFragWords(frags,maxWidth)
sFW = 0
while _words:
w = _words.pop(0)
aFW(w)
f = w[-1][0]
fontName = f.fontName
fontSize = f.fontSize
if not words:
n = space = spaceWidth = currentWidth = 0
maxSize = fontSize
maxAscent, minDescent = getAscentDescent(fontName,fontSize)
wordWidth = w[0]
f = w[1][0]
if wordWidth>0:
newWidth = currentWidth + spaceWidth + wordWidth
else:
newWidth = currentWidth
#test to see if this frag is a line break. If it is we will only act on it
#if the current width is non-negative or the previous thing was a deliberate lineBreak
lineBreak = f._fkind==_FK_BREAK
if not lineBreak and newWidth>(maxWidth+space*spaceShrinkage) and not isinstance(w,_SplitList) and splitLongWords:
if self.hyphenator is not None:
pair = self.hyphenateWord(w[1][-1], maxWidth - spaceWidth - currentWidth,
fontName, fontSize)
if pair is not None:
head, tail = pair
hpara = w[1][0].clone()
h = [stringWidth(head, fontName, fontSize), (hpara, head)]
tpara = w[1][0].clone()
t = [stringWidth(tail, fontName, fontSize), (hpara, tail)]
_words[0:0] = _SplitList([h, _HSWord(t) if _words else t])
FW.pop(-1) # remove this as we are doing this one again
continue
nmw = min(lineno,maxlineno)
if wordWidth>max(maxWidths[nmw:nmw+1]):
#a long word
_words[0:0] = _splitFragWord(w,maxWidth-spaceWidth-currentWidth,maxWidths,lineno)
FW.pop(-1) #remove this as we are doing this one again
self._splitLongWordCount += 1
continue
endLine = (newWidth>(maxWidth+space*spaceShrinkage) and n>0) or lineBreak
if not endLine:
if lineBreak: continue #throw it away
nText = w[1][1]
if nText: n += 1
fontSize = f.fontSize
if calcBounds:
if f._fkind==_FK_IMG:
descent,ascent = imgVRange(imgNormV(f.cbDefn.height,fontSize),f.cbDefn.valign,fontSize)
else:
ascent, descent = getAscentDescent(f.fontName,fontSize)
else:
ascent, descent = getAscentDescent(f.fontName,fontSize)
maxSize = max(maxSize,fontSize)
maxAscent = max(maxAscent,ascent)
minDescent = min(minDescent,descent)
if not words:
g = f.clone()
words = [g]
g.text = nText
elif not sameFrag(g,f):
if spaceWidth:
i = len(words)-1
while i>=0:
wi = words[i]
i -= 1
if wi._fkind==_FK_TEXT:
if not wi.text.endswith(' '):
wi.text += ' '
space += spaceWidth
break
g = f.clone()
words.append(g)
g.text = nText
elif spaceWidth:
if not g.text.endswith(' '):
g.text += ' ' + nText
space += spaceWidth
else:
g.text += nText
else:
g.text += nText
spaceWidth = stringWidth(' ',fontName,fontSize) if isinstance(w,_HSWord) else 0 #of the space following this word
ni = 0
for i in w[2:]:
g = i[0].clone()
g.text=i[1]
if g.text: ni = 1
words.append(g)
fontSize = g.fontSize
if calcBounds:
if g._fkind==_FK_IMG:
descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize)
else:
ascent, descent = getAscentDescent(g.fontName,fontSize)
else:
ascent, descent = getAscentDescent(g.fontName,fontSize)
maxSize = max(maxSize,fontSize)
maxAscent = max(maxAscent,ascent)
minDescent = min(minDescent,descent)
if not nText and ni:
#one bit at least of the word was real
n+=1
currentWidth = newWidth
else: #either it won't fit, or it's a lineBreak tag
if lineBreak:
g = f.clone()
#del g.lineBreak
words.append(g)
if currentWidth>self._width_max: self._width_max = currentWidth
#end of line
lines.append(FragLine(extraSpace=maxWidth-currentWidth, wordCount=n,
lineBreak=lineBreak and njlbv, words=words, fontSize=maxSize, ascent=maxAscent, descent=minDescent, maxWidth=maxWidth,
sFW=sFW))
sFW = len(FW)-1
#start new line
lineno += 1
maxWidth = maxWidths[min(maxlineno,lineno)]
if lineBreak:
words = []
continue
spaceWidth = stringWidth(' ',fontName,fontSize) if isinstance(w,_HSWord) else 0 #of the space following this word
currentWidth = wordWidth
n = 1
space = 0
g = f.clone()
maxSize = g.fontSize
if calcBounds:
if g._fkind==_FK_IMG:
descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize)
else:
maxAscent, minDescent = getAscentDescent(g.fontName,maxSize)
else:
maxAscent, minDescent = getAscentDescent(g.fontName,maxSize)
words = [g]
g.text = w[1][1]
for i in w[2:]:
g = i[0].clone()
g.text=i[1]
words.append(g)
fontSize = g.fontSize
if calcBounds:
if g._fkind==_FK_IMG:
descent,ascent = imgVRange(imgNormV(g.cbDefn.height,fontSize),g.cbDefn.valign,fontSize)
else:
ascent, descent = getAscentDescent(g.fontName,fontSize)
else:
ascent, descent = getAscentDescent(g.fontName,fontSize)
maxSize = max(maxSize,fontSize)
maxAscent = max(maxAscent,ascent)
minDescent = min(minDescent,descent)
#deal with any leftovers on the final line
if words:
if currentWidth>self._width_max: self._width_max = currentWidth
lines.append(ParaLines(extraSpace=(maxWidth - currentWidth),wordCount=n,lineBreak=False,
words=words, fontSize=maxSize,ascent=maxAscent,descent=minDescent,maxWidth=maxWidth,sFW=sFW))
self.frags = FW
return ParaLines(kind=1, lines=lines)
def split(self, availWidth, availHeight):
# Propagate the hyphenator to the splitted paragraphs: parent's split() uses
# "self.__class__(foo, bar, spam=eggs)" to create them...
pair = super().split(availWidth, availHeight)
if pair:
pair[0].hyphenator = pair[1].hyphenator = self.hyphenator
return pair
hyphenator = Pyphen(lang='it_IT')
elts = [PyphenParagraph(text, styles['Normal'], hyphenator=hyphenator)]
doc.build([ImageAndFlowables(Image('pythonpowered.gif', 20, 20), elts, imageSide='left')]
+ elts
+ [PyphenParagraph(text2, styles['Normal'], hyphenator=hyphenator)])
print("Created sample1.pdf")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment