Last active
December 5, 2018 15:00
-
-
Save mattbierner/91d90806fc6d3b414498 to your computer and use it in GitHub Desktop.
Automatic migration of ghost images to Jekyll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<figure class="image"> | |
{% capture image_src %}{% if include.file %}/content/{{ page.path | remove_first:'_posts/' | split:'.' | first }}/{{ include.file }}{% else %}{{ include.url }}{% endif %}{% endcapture %} | |
<a href="{{ image_src }}"> | |
<img src="{{ image_src }}" alt="{{ include.description }}" /> | |
</a> | |
{% if include.description %} | |
<figcaption>{{ include.description }}</figcaption> | |
{% endif %} | |
</figure> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Second step of the Ghost to Jekyll conversion. | |
The instructions here: http://import.jekyllrb.com/docs/ghost/ | |
cover how to port post content from Ghost to Jeklly, but this does not cover | |
migrating image assets uploaded to Ghost. This script handles downloading all | |
such images into Jekll and converting the post image links to use the new, | |
local image files.a | |
Example usage: | |
cd my_jekyll_blog | |
python migrate_ghost_images.py _posts --site http://my-super-blog.com | |
Writes all the images under a folder called `content`, with each post having its | |
own directory in `content`: | |
/content/2016-01-12-my-post/image1.png | |
The current ghost blog must be running for the image download part of this script to work. | |
`replace_image_tags_with_includes` all the markdown to use the `image.html` template to render the images themselves. | |
""" | |
import argparse | |
import os | |
import re | |
import urllib | |
from distutils.dir_util import mkpath | |
ALL_IMAGE_RE = re.compile(r"\!\[([^\]]*)\]\(([^\)]+)\)") | |
LOCAL_IMAGE_RE = re.compile(r"\!\[([^\]]*)\]\((/[^\)]+)\)") | |
def download_image(postname, path, output_dir): | |
"""Download an image to a file""" | |
output_file = path.rsplit('/', 1)[-1] | |
output_path = os.path.join(output_dir, output_file) | |
urllib.urlretrieve(path, output_path) | |
def replace_image_tags_with_includes(match): | |
"""Replace markdown image includes with template""" | |
image_desc = match.group(1) | |
image_path = match.group(2) | |
image_name = image_path.rsplit('/', 1)[-1] | |
return '{{% include image.html file="{1}" description="{0}" %}}'.format(image_desc, image_name) | |
def process_post(site, path, postname, post_content, local_only=False): | |
"""Download and rewrite a single post""" | |
output_dir = os.path.join('content', postname) | |
didRun = False | |
image_re = LOCAL_IMAGE_RE if local_only else ALL_IMAGE_RE | |
for image in re.finditer(image_re, post_content): | |
if not didRun: | |
didRun = True | |
mkpath(output_dir) | |
image_path = image.group(2) | |
if image_path[0] == '/': | |
image_path = site + image_path | |
download_image(filename, image_path, output_dir) | |
if not didRun: | |
return post_content | |
# Rewrite image includes | |
return re.sub(image_re, | |
replace_image_tags_with_includes, | |
post_content) | |
def process_post_file(site, filename, path, local_only=False): | |
"""Process a file in _posts""" | |
with open(path, 'r') as f: | |
post_content = f.read() | |
new_content = process_post(site, path, | |
os.path.splitext(filename)[0], | |
post_content, | |
local_only=local_only) | |
with open(path, 'w') as f: | |
f.write(new_content) | |
parser = argparse.ArgumentParser(description='Download images from ghost posts.') | |
parser.add_argument('path', help='path to `_posts` directory to process') | |
parser.add_argument('--site', dest='site', | |
help="Current url to running instance of blog. Used to grab site relative images.") | |
parser.add_argument('--local_only', dest='local_only', action='store_true', | |
default=False, | |
help="Only download images stored at an absolute url for the blog?") | |
args = parser.parse_args() | |
for filename in os.listdir(args.path): | |
process_post_file( | |
args.site, | |
filename, | |
os.path.join(args.path, filename), | |
local_only=args.local_only) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment