Created
March 12, 2013 04:07
-
-
Save davesque/5140256 to your computer and use it in GitHub Desktop.
Utilities for updating links in rich text content
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Utilities for dynamically updating links in rich text fields. | |
We had a project in which many models had rich text fields. These rich text | |
fields contained links to model instance detail pages. Since these links were | |
being entered by hand, we needed a way to update them in the event that the | |
instance for the detail page they were pointing to changed. | |
The routines in this file allow links in rich text content to be automatically | |
updated with a python html parsing library called `BeautifulSoup`. | |
""" | |
from bs4 import BeautifulSoup as bs | |
import re | |
from django.contrib import admin | |
from django.db.models import get_model | |
from ckeditor.fields import RichTextField | |
def get_update_fields(model_class): | |
""" | |
For a given model class, returns a list of field names which specify | |
which fields on the model to search for URLs. | |
""" | |
return [f.name for f in model_class._meta.fields if isinstance(f, RichTextField)] | |
def get_url_regex(slug): | |
""" | |
Returns a regex for finding URLs to update. | |
""" | |
return re.compile(r'/products/.*/' + slug + '[\s/]*$', re.IGNORECASE) | |
def update_content_urls(content, regex, url): | |
""" | |
Updates all anchor tag hrefs that match ``regex`` to ``url`` in the | |
given ``content``. | |
""" | |
soup = bs(content) | |
links = soup.find_all('a', href=regex) | |
for l in links: | |
l.attrs['href'] = url | |
# Return soup and whether or not anything was updated | |
return soup, bool(links) | |
def update_instance_urls(instance, field_names, regex, url): | |
""" | |
Updates all anchor tag hrefs that match ``regex`` to ``url`` in the | |
fields specified by ``field_names`` on the given ``instance``. Returns | |
whether or not ``instance`` was updated. | |
""" | |
instance_updated = False | |
for field_name in field_names: | |
# Replace urls in field content | |
soup, field_updated = update_content_urls( | |
content=getattr(instance, field_name), | |
regex=regex, | |
url=url, | |
) | |
# Update field content | |
if field_updated: | |
setattr(instance, field_name, soup) | |
instance_updated = True | |
return instance_updated | |
class UpdateAbsoluteUrls(object): | |
""" | |
Mixin class for automatically updating absolute urls in field content. | |
""" | |
# Fields on these models are scanned with beautiful soup to update URLs | |
URL_UPDATE_MODELS = ( | |
('comparisongroup', 'comparisongroup'), | |
('products', 'category'), | |
('products', 'product'), | |
) | |
def update_absolute_urls(self, slug=None): | |
""" | |
Searches through fields found on the models specified in | |
``URL_UPDATE_MODELS`` for urls ending with ``slug`` and replaces them | |
with the current URL for this model instance. | |
""" | |
slug = slug or self.slug | |
regex = get_url_regex(slug) | |
url = self.get_absolute_url() | |
MODELS = [get_model(app, cls) for app, cls in self.URL_UPDATE_MODELS] | |
# Replace url in rich text content on instances of each model | |
for Model in MODELS: | |
field_names = get_update_fields(Model) | |
for instance in Model.objects.all(): | |
# Replace urls in field content on instance | |
instance_updated = update_instance_urls( | |
instance=instance, | |
field_names=field_names, | |
regex=regex, | |
url=url, | |
) | |
if instance_updated: | |
instance.save() | |
class UpdateAbsoluteUrlsAdmin(admin.ModelAdmin): | |
""" | |
ModelAdmin that automatically updates urls for models that inherit from the | |
UpdateAbsoluteUrls class. This was the best way I found to get this | |
functionality. Adding it into UpdateAbsoluteUrls.save or subclassing | |
forms.ModelForm would not work since many-to-many relationships are used in | |
Product.get_absolute_url and those are not updated by the admin until after | |
the model form or instance is saved. | |
""" | |
def response_change(self, request, obj): | |
# Update urls if changed | |
if self._old_url != obj.get_absolute_url(): | |
obj.update_absolute_urls(self._old_slug) | |
return super(UpdateAbsoluteUrlsAdmin, self).response_change(request, obj) | |
def change_view(self, request, object_id, **kwargs): | |
old_instance = self.get_object(request, object_id) | |
self._old_url = old_instance.get_absolute_url() | |
self._old_slug = old_instance.slug | |
return super(UpdateAbsoluteUrlsAdmin, self).change_view(request, object_id, **kwargs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment