|
#!/usr/bin/env python3 |
|
|
|
import frontmatter |
|
import os |
|
import re |
|
import shlex |
|
import subprocess |
|
import urllib.request |
|
|
|
directory = os.getcwd() |
|
outputdir = os.path.join(directory, 'images') |
|
os.makedirs(outputdir, exist_ok=True) |
|
imageRegex = re.compile(r'\[!\[[^\]]+\]\([^\)]+\)\]\((http://journal.dcbeatty.com/wp-content/uploads/\d{4}/\d{2}/([\w\-]+\.\w{2,4}))\)') |
|
totalMatches = 0 |
|
|
|
for filename in os.listdir(directory): |
|
print('.', end='', flush=True) |
|
inputfile = os.path.join(directory, filename) |
|
outputfile = os.path.join(outputdir, filename) |
|
attachments = [] |
|
|
|
if os.path.isfile(inputfile) and filename[0].isdigit(): |
|
post = frontmatter.load(inputfile) |
|
content = post.content |
|
matches = imageRegex.findall(content) |
|
|
|
for url, imageFilename in matches: |
|
totalMatches += 1 |
|
imagePath = os.path.join(outputdir, imageFilename) |
|
attachments.append(imagePath) |
|
|
|
try: |
|
opener = urllib.request.build_opener() |
|
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36')] |
|
urllib.request.install_opener(opener) |
|
urllib.request.urlretrieve(url, imagePath) |
|
|
|
content = re.sub(imageRegex, '[{attachment}]', post.content) |
|
except: |
|
print("Failed to download image from ", url) |
|
|
|
content = '# %s\n\n%s'%(post['title'], content) |
|
args = ["dayone2", "--date=" + post['date']] |
|
|
|
if len(attachments) > 0: |
|
args.append("-a") |
|
args.append(attachments[0]) |
|
|
|
# The DayOne CLI docs say it can handle 10 attachments, but it |
|
# throws an error when more than one is provided, so for now all |
|
# attachments past the first have to be manually added. Eventually |
|
# this should do the trick: |
|
|
|
# for attachment in attachments: |
|
# args.append(attachment) |
|
|
|
if len(attachments) > 1: |
|
print("\n\nFile ", filename, " has more than 1 attachment:") |
|
for attachment in attachments: |
|
print("- ", attachment) |
|
print("") |
|
|
|
# This line is needed if multiple attachments is supported again: |
|
# args.append("--") |
|
|
|
tags = post.get('tags', []) + post.get('categories', []) |
|
|
|
if len(tags) > 0: |
|
args.append('-t') |
|
for tag in set(tags): |
|
args.append(shlex.quote(tag)) |
|
|
|
if len(attachments) > 0 or len(tags) > 0: |
|
args.append('--') |
|
|
|
args.append("new") |
|
result = subprocess.run(args, input=content.encode(), capture_output=True) |
|
|
|
if result.returncode != 0: |
|
print("Error while importing ", filename, "\n", result.stderr.decode()) |