Skip to content

Instantly share code, notes, and snippets.

@eloyz
Created June 30, 2011 22:39
Show Gist options
  • Save eloyz/1057470 to your computer and use it in GitHub Desktop.
Save eloyz/1057470 to your computer and use it in GitHub Desktop.
Convert HTML Entity to Unicode in Django Pages Module
import re
import HTMLParser
from django.core.management.base import BaseCommand
class Command(BaseCommand):
"""
Converts title and content html entities to unicode
"""
def unescape(self, *args, **kwargs):
match = args[0]
entity = match.group(1)
return self.h.unescape(entity)
def handle(self, *args, **options):
from pages.models import Page
pages = Page.objects.all()
self.h = HTMLParser.HTMLParser()
pattern = re.compile('(&#\d+;)', re.IGNORECASE)
for page in pages:
page.title = re.sub(pattern, self.unescape, page.title)
page.content = re.sub(pattern, self.unescape, page.content)
page.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment