Skip to content

Instantly share code, notes, and snippets.

@davesque
Created March 12, 2013 04:07
Show Gist options
  • Save davesque/5140256 to your computer and use it in GitHub Desktop.
Save davesque/5140256 to your computer and use it in GitHub Desktop.
Utilities for updating links in rich text content
"""
Utilities for dynamically updating links in rich text fields.
We had a project in which many models had rich text fields. These rich text
fields contained links to model instance detail pages. Since these links were
being entered by hand, we needed a way to update them in the event that the
instance for the detail page they were pointing to changed.
The routines in this file allow links in rich text content to be automatically
updated with a python html parsing library called `BeautifulSoup`.
"""
from bs4 import BeautifulSoup as bs
import re
from django.contrib import admin
from django.db.models import get_model
from ckeditor.fields import RichTextField
def get_update_fields(model_class):
"""
For a given model class, returns a list of field names which specify
which fields on the model to search for URLs.
"""
return [f.name for f in model_class._meta.fields if isinstance(f, RichTextField)]
def get_url_regex(slug):
"""
Returns a regex for finding URLs to update.
"""
return re.compile(r'/products/.*/' + slug + '[\s/]*$', re.IGNORECASE)
def update_content_urls(content, regex, url):
"""
Updates all anchor tag hrefs that match ``regex`` to ``url`` in the
given ``content``.
"""
soup = bs(content)
links = soup.find_all('a', href=regex)
for l in links:
l.attrs['href'] = url
# Return soup and whether or not anything was updated
return soup, bool(links)
def update_instance_urls(instance, field_names, regex, url):
"""
Updates all anchor tag hrefs that match ``regex`` to ``url`` in the
fields specified by ``field_names`` on the given ``instance``. Returns
whether or not ``instance`` was updated.
"""
instance_updated = False
for field_name in field_names:
# Replace urls in field content
soup, field_updated = update_content_urls(
content=getattr(instance, field_name),
regex=regex,
url=url,
)
# Update field content
if field_updated:
setattr(instance, field_name, soup)
instance_updated = True
return instance_updated
class UpdateAbsoluteUrls(object):
"""
Mixin class for automatically updating absolute urls in field content.
"""
# Fields on these models are scanned with beautiful soup to update URLs
URL_UPDATE_MODELS = (
('comparisongroup', 'comparisongroup'),
('products', 'category'),
('products', 'product'),
)
def update_absolute_urls(self, slug=None):
"""
Searches through fields found on the models specified in
``URL_UPDATE_MODELS`` for urls ending with ``slug`` and replaces them
with the current URL for this model instance.
"""
slug = slug or self.slug
regex = get_url_regex(slug)
url = self.get_absolute_url()
MODELS = [get_model(app, cls) for app, cls in self.URL_UPDATE_MODELS]
# Replace url in rich text content on instances of each model
for Model in MODELS:
field_names = get_update_fields(Model)
for instance in Model.objects.all():
# Replace urls in field content on instance
instance_updated = update_instance_urls(
instance=instance,
field_names=field_names,
regex=regex,
url=url,
)
if instance_updated:
instance.save()
class UpdateAbsoluteUrlsAdmin(admin.ModelAdmin):
"""
ModelAdmin that automatically updates urls for models that inherit from the
UpdateAbsoluteUrls class. This was the best way I found to get this
functionality. Adding it into UpdateAbsoluteUrls.save or subclassing
forms.ModelForm would not work since many-to-many relationships are used in
Product.get_absolute_url and those are not updated by the admin until after
the model form or instance is saved.
"""
def response_change(self, request, obj):
# Update urls if changed
if self._old_url != obj.get_absolute_url():
obj.update_absolute_urls(self._old_slug)
return super(UpdateAbsoluteUrlsAdmin, self).response_change(request, obj)
def change_view(self, request, object_id, **kwargs):
old_instance = self.get_object(request, object_id)
self._old_url = old_instance.get_absolute_url()
self._old_slug = old_instance.slug
return super(UpdateAbsoluteUrlsAdmin, self).change_view(request, object_id, **kwargs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment