Created
February 28, 2013 14:48
-
-
Save satooshi/5057260 to your computer and use it in GitHub Desktop.
Convert movable type text exported from hatena diary to markdown.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// decoder | |
function decodePost($data) | |
{ | |
$lines = explode("\n", $data); | |
$readMeta = true; | |
$post = array(); | |
$body = array(); | |
$meta = null; | |
foreach ($lines as $line) { | |
if (preg_match('/^-----$/', $line, $matches)) { | |
$readMeta = true; | |
} elseif ($readMeta && preg_match('/^(?P<meta>[\w\s]+?)\:$/', $line, $matches)) { | |
$readMeta = false; | |
$meta = strtolower($matches['meta']); | |
} elseif ($readMeta && preg_match('/^(?P<meta>[\w\s]+?)\: (?P<value>.*)$/', $line, $matches)) { | |
$key = strtolower($matches['meta']); | |
$value = $matches['value']; | |
if (array_key_exists($key, $post)) { | |
if (is_array($post[$key])) { | |
$post[$key][] = $value; | |
} else { | |
$old = $post[$key]; | |
$post[$key] = array($old, $value); | |
} | |
} else { | |
$post[$key] = $value; | |
} | |
} elseif ($meta === 'body') { | |
$body[] = $line; | |
} | |
} | |
$post['body'] = implode("\n", $body); | |
return $post; | |
} | |
function getPosts($path) | |
{ | |
$contents = file_get_contents($path); | |
$postContents = preg_split("/\n--------\n/", $contents); | |
$posts = array(); | |
foreach ($postContents as $data) { | |
$post = getPost($data); | |
if ($post !== null) { | |
$posts[] = $post; | |
} | |
} | |
return $posts; | |
} | |
function getPost($data) | |
{ | |
$data = trim($data); | |
if (empty($data)) { | |
return null; | |
} | |
$post = decodePost($data); | |
// post process | |
// category | |
if (array_key_exists('category', $post)) { | |
$post['category'] = (array)$post['category']; | |
} else { | |
$post['category'] = array(); | |
} | |
// to lower case | |
$post['category'] = categoryToLower($post['category']); | |
// date time | |
// "11/23/2009 00:09:52 PM" | |
if (isset($post['date'])) { | |
$post['datetime'] = \DateTime::createFromFormat('m/d/Y h:i:s A', $post['date']); | |
} | |
return $post; | |
} | |
function categoryToLower(array $categories) | |
{ | |
$lowers = array(); | |
foreach ($categories as $category) { | |
$lowers[] = strtolower($category); | |
} | |
return $lowers; | |
} | |
// converter | |
function toMarkdown(array $post) | |
{ | |
$meta = toMetaMarkdown($post); | |
$dir = '_tmp'; | |
if (!is_dir($dir)) { | |
mkdir($dir); | |
} | |
$body = toMarkdownBody($dir, $post['body']); | |
return sprintf("%s\n%s", $meta, $body); | |
} | |
function toMetaMarkdown(array $post) | |
{ | |
$template = "--- | |
layout: post | |
title: %s | |
date: %s | |
comments: false | |
categories: %s | |
published: false | |
--- | |
"; | |
if (!empty($post['category'])) { | |
$categories = sprintf('[%s]', implode(', ', $post['category'])); | |
} else { | |
$categories = ''; | |
} | |
return sprintf($template, $post['title'], $post['datetime']->format('Y-m-d H:i'), $categories); | |
} | |
function toMarkdownBody($dir, $body) | |
{ | |
$bodyHtml = 'body.html'; | |
$bodyPath = $dir . "/" . $bodyHtml; | |
file_put_contents($bodyPath, $body); | |
$filename = 'body.markdown'; | |
$bodyMarkdownPath = $dir . "/" . $filename; | |
$cmd = sprintf('cd %s; pandoc -f html -t markdown %s -o %s', $dir, $bodyHtml, $filename); | |
exec($cmd, $output, $returnCode); | |
unlink($bodyPath); | |
if ($returnCode !== 0 || !empty($output)) { | |
throw new \RuntimeException('pandoc failure.'); | |
} | |
$body = file_get_contents($bodyMarkdownPath); | |
unlink($bodyMarkdownPath); | |
return $body; | |
} | |
// dumper | |
function dump($dir, array $post) | |
{ | |
$markdown = toMarkdown($post); | |
$filename = sprintf('%s-%s.markdown', $post['datetime']->format('Y-m-d'), $post['datetime']->format('YmdHis')); | |
$path = $dir . '/' . $filename; | |
file_put_contents($path, $markdown); | |
} | |
function dumpAll($dir, array $posts) | |
{ | |
foreach ($posts as $post) { | |
dump($dir, $post); | |
} | |
} | |
// run | |
$dir = '_posts'; | |
if (!is_dir($dir)) { | |
mkdir($dir); | |
} | |
$file = 'movable_type.txt'; | |
$posts = getPosts($file); | |
dumpAll($dir, $posts); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment