Created
February 14, 2020 10:26
-
-
Save gforcada/089ba00537e88983d8e3ed1d5560a744 to your computer and use it in GitHub Desktop.
Normalize all fields of any content type being created (one has to hook that to an event handler)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from plone.app.textfield import RichText | |
from plone.app.textfield.value import RichTextValue | |
from plone.behavior.interfaces import IBehavior | |
from plone.dexterity.interfaces import IDexterityFTI | |
from zope.component import getUtility | |
from zope.schema import Text | |
from zope.schema import TextLine | |
import unicodedata | |
def text_fields(obj, event): | |
"""Event handler that normalizes text fields of objects. | |
W3C recommends the use of Unicode normalization form C (NFC) on the web. | |
See http://www.w3.org/TR/charmod-norm/#sec-UnicodeNormalized | |
Our current font (TheAntiquaF) does not support, at least, the combining | |
diaeresis, and that brings rendering problems on some browsers, see: | |
https://bugzilla.mozilla.org/show_bug.cgi?id=940944 | |
This handler makes sure that all text fields are on NFC form. | |
""" | |
changed = _normalize_title(obj) | |
changed |= _normalize_description(obj) | |
changed |= _normalize_subject(obj) | |
for field_name, field_factory in _fields(obj.portal_type): | |
if isinstance(field_factory, (TextLine, Text)): | |
changed |= _normalize_text_field(obj, field_name) | |
elif isinstance(field_factory, RichText): | |
changed |= _normalize_richtext_field(obj, field_name, field_factory) | |
# only reindex if an attribute has changed | |
if changed: | |
obj.reindexObject() | |
def _normalize_title(obj): | |
"""Title is an encoded string and has its own setter""" | |
changed = False | |
data = getattr(obj, 'title', '') | |
if data: | |
result = normalize(data) | |
if result != data: | |
changed = True | |
obj.setTitle(result.encode('utf-8')) | |
return changed | |
def _normalize_description(obj): | |
"""Description is an encoded string and has its own setter""" | |
changed = False | |
data = getattr(obj, 'description', '') | |
if data: | |
result = normalize(data) | |
if result != data: | |
changed = True | |
obj.setDescription(result.encode('utf-8')) | |
return changed | |
def _normalize_subject(obj): | |
"""Subject (tags) is a tuple of encoded strings and has its own setter""" | |
subjects_changed = False | |
data = getattr(obj, 'subject', ()) | |
if len(data) > 0: | |
subjects = [] | |
for subject in data: | |
result = normalize(subject) | |
if result != subject: | |
subjects_changed = True | |
subjects.append(result) | |
if subjects_changed: | |
obj.setSubject(tuple(subjects)) | |
changed = False | |
if subjects_changed: | |
changed = True | |
return changed | |
def _normalize_text_field(obj, attribute): | |
"""Normalize the given attribute of the given object""" | |
changed = False | |
data = getattr(obj, attribute, '') | |
if data: | |
result = normalize(data) | |
if result != data: | |
changed = True | |
setattr(obj, attribute, result) | |
return changed | |
def _normalize_richtext_field(obj, attribute, field_factory): | |
"""Normalize the given richtext attribute of the given object""" | |
changed = False | |
data = getattr(obj, attribute, '') | |
if data: | |
result = normalize(data.raw) | |
if result != data.raw: | |
changed = True | |
result = RichTextValue( | |
raw=result, | |
mimeType=field_factory.default_mime_type, | |
outputMimeType=field_factory.output_mime_type, | |
) | |
setattr(obj, attribute, result) | |
return changed | |
def _fields(portal_type): | |
"""Get all fields and fields' descriptions from the given portal type""" | |
fti = getUtility(IDexterityFTI, name=portal_type) | |
schema = fti.lookupSchema() | |
fields = schema.namesAndDescriptions(all=True) | |
for behavior_name in fti.behaviors: | |
factory = getUtility(IBehavior, behavior_name) | |
behavior = factory.interface | |
fields += behavior.namesAndDescriptions() | |
return fields | |
def normalize(text): | |
"""Helper method to keep text normalized. | |
:param text: a string of text, either unicode or a normal string | |
:returns: the normalized version of 'text' as a unicode string | |
""" | |
result = text | |
if isinstance(text, unicode): | |
result = unicodedata.normalize('NFC', text) | |
elif isinstance(text, str): | |
result = unicodedata.normalize('NFC', text.decode('utf-8')) | |
# remove control characters, on text fields they do more harm than good | |
# see https://gitlab.com/der-freitag/zope/issues/2230 | |
# one liner from https://stackoverflow.com/questions/4324790 | |
if isinstance(text, (unicode, str)): | |
result = ''.join(x for x in result if unicodedata.category(x)[0] != 'C') | |
return result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment