Skip to content

Instantly share code, notes, and snippets.

@pferreir
Last active October 30, 2018 15:05
Show Gist options
  • Save pferreir/8517286f8cb91545613e2ba8da75f0ef to your computer and use it in GitHub Desktop.
Save pferreir/8517286f8cb91545613e2ba8da75f0ef to your computer and use it in GitHub Desktop.
import bleach
import re
import HTMLParser
h = HTMLParser.HTMLParser()
queries = [
Contribution.query.filter(Contribution.title.like('%</%>%') | Contribution.title.like('%&%;%')),
Session.query.filter(Session.title.like('%</%>%') | Session.title.like('%&%;%')),
SessionBlock.query.filter(SessionBlock.title.like('%</%>%') | SessionBlock.title.like('%&%;%')),
Break.query.filter(Break.title.like('%</%>%') | Break.title.like('%&%;%'))
]
for query in queries:
for c in query:
bleached = bleach.clean(c.title, strip=True, tags=['br', 'sub', 'sup'])
# unescape HTML entities
unescaped = h.unescape(re.sub(r'&rightarrow', '&rarr', bleached, re.IGNORECASE))
# convert <br> to ' - '
text = re.sub(r'<\w*br\w*/?>', ' - ', re.sub(r'<br/?>\w*$', '', unescaped, re.IGNORECASE)).strip()
c.title = text
print c.id, text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment