Created
August 20, 2015 18:44
-
-
Save shacker/6a8c23acab97f22f61bf to your computer and use it in GitHub Desktop.
Django management command to import flat Drupal page sets into a wagtail tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
import json | |
import requests | |
import sys | |
from bs4 import BeautifulSoup | |
from django.core.management.base import BaseCommand | |
from django.contrib.auth.models import User | |
from people.models import Profile | |
from cms.models import StandardPage, StandardIndexPage | |
''' | |
Import content from Drupal JSON URL. | |
Be sure to tweak import URL first. | |
Assumes that we'll be deriving a hierarchy of pages from existing | |
Drupal URLs in the feed, e.g. Titles such as: | |
<a href="/academics/community-arts/visiting-artists">Visiting Artists</a> | |
<a href="/academics/graduate/social-practice/chair">Meet the Chair</a> | |
<a href="/academics/graduate/social-practice/apply">Apply</a> | |
<a href="/academics/graduate/social-practice/curriculum">Curriculum</a> | |
<a href="/academics/illustration/chair">Meet the Chair</a> | |
<a href="/academics/interaction-design/internships/tips">Tips for Success</a> | |
<a href="/academics/interaction-design/internships">Internships</a> | |
<a href="/academics/interior-design/internships/guidelines">Guidelines</a> | |
<a href="/academics/graphic-design/internships/guidelines">Guidelines</a> | |
<a href="/academics/industrial-design/internships/guidelines">Guidelines</a> | |
Should end up generating a hierarchy like this, where the branches will be | |
StandardIndexPage and the leaves StandardPage. | |
Academics | |
Community Arts | |
... | |
Graduate | |
Social Practice | |
Meet the Chair | |
Apply | |
Curriculum | |
... | |
If satisfied with import, use wagtail's management command to move the tree to a permanent location: | |
./manage.py move_pages 78 11 (move_pages from_id to_id) | |
where from and to are the parent IDs. | |
''' | |
json_url = 'http://exports-cca.gotpantheon.com/export-test-json' | |
import_slug = 'imported-content' | |
class Command(BaseCommand): | |
help = "Import content from Drupal JSON URL." | |
def handle(self, *args, **options): | |
print("PREVIOUSLY IMPORTED CONTENT WILL BE DELETED (IF YOU HAVEN'T MOVED IT ELSEWHERE)!!!") | |
print("JSON URL is '{u}'".format(u=json_url)) | |
print("New content will be imported under Wagtail parent page with slug '{p}'\n".format(p=import_slug)) | |
answer = input("Is this OK? (y/n) ") | |
if not answer == "y": | |
sys.exit() | |
# Start each run by deleting old imported content. | |
delpages = StandardIndexPage.objects.get(slug=import_slug).get_children() | |
delpages.delete() | |
# Pull JSON from Drupal | |
response = requests.get(json_url) | |
json_data = json.loads(response.text) | |
for node in json_data: | |
# Title comes through as HTML link - extract pieces with BeautifulSoup and | |
# split URL path into components which will dictate wagtail hierarchy | |
dtitle = BeautifulSoup(node['Title'], "html5lib") | |
# dtitle is e.g. <a href="/academics/graphic-design/internships/guidelines">Guidelines</a> | |
title = dtitle.find('a').contents[0] # Title is "Guidelines" | |
urlpath = dtitle.find('a')['href'][1:] # urlpath is academics/graphic-design/internships/guidelines | |
parts = urlpath.split("/") # List of segments | |
last = parts[-1] # "guidelines" - This will become final leaf node | |
body = node['Body'] | |
# Compose python datetime objects from string-formatted dates | |
post_datetime = datetime.strptime(node['Post date'], '%A, %B %d, %Y - %I:%M%p') | |
revised_datetime = datetime.strptime(node['Updated date'], '%A, %B %d, %Y - %I:%M%p') | |
username = node['Name'] | |
# Get or create corresponding Django user | |
user, created = User.objects.get_or_create(username=username) | |
if created: | |
Profile.objects.create(user=user) # Also create a linked Profile | |
# Make sure all components in the path hierarchy exist under the parent we're working on, | |
# creating if not. Each time through the loop we move the parent down a level | |
# (if it's not already the last). | |
parent = StandardIndexPage.objects.get(slug=import_slug) | |
print("urlpath is ", urlpath) | |
print("parent is ", parent) | |
print("parts is ", parts) | |
for slug in parts: | |
print("path part is ", slug) | |
# If this is the last part in the set, create a final page under this parent. | |
print("last is ", last, " slug is ", slug) | |
if slug == last: | |
print("Last element, creating final page") | |
page = StandardPage() | |
page.title = title | |
page.slug = slug | |
page.body = body | |
page.owner = user | |
page.post_datetime = post_datetime | |
page.revised_datetime = revised_datetime | |
page.show_in_menus = True | |
parent.add_child(instance=page) | |
page.save_revision().publish() | |
else: | |
# Otherwise create a parent hierarchy if needed | |
print("Getting or creating parent", slug) | |
children = parent.get_children() | |
qs = children.filter(slug=slug) | |
if qs.count() > 0: | |
print("Page with this slug already exists within parent") | |
parent = qs[0] # Make that page the new parent | |
print("New parent is ", parent) | |
else: | |
print("Page does not already exist within parent, creating") | |
print("Parent is ", parent, ", slug is ", slug) | |
page = StandardIndexPage() | |
page.title = slug.title().replace("-", " ") # Convert slug into usable page title | |
page.slug = slug | |
page.show_in_menus = True | |
parent.add_child(instance=page) | |
page.save_revision().publish() | |
# Increment down the hierarchy | |
parent = page | |
print("\n===\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment