Created
June 30, 2011 22:39
-
-
Save eloyz/1057470 to your computer and use it in GitHub Desktop.
Convert HTML Entity to Unicode in Django Pages Module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import HTMLParser | |
from django.core.management.base import BaseCommand | |
class Command(BaseCommand): | |
""" | |
Converts title and content html entities to unicode | |
""" | |
def unescape(self, *args, **kwargs): | |
match = args[0] | |
entity = match.group(1) | |
return self.h.unescape(entity) | |
def handle(self, *args, **options): | |
from pages.models import Page | |
pages = Page.objects.all() | |
self.h = HTMLParser.HTMLParser() | |
pattern = re.compile('(&#\d+;)', re.IGNORECASE) | |
for page in pages: | |
page.title = re.sub(pattern, self.unescape, page.title) | |
page.content = re.sub(pattern, self.unescape, page.content) | |
page.save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment