Last active
August 29, 2015 14:07
-
-
Save magthe/744cdda3b59dc9abaa06 to your computer and use it in GitHub Desktop.
Bye bye WordPress
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env runhaskell | |
-- {{{1 imports | |
import Control.Arrow | |
import Data.List | |
import Data.Maybe | |
import Data.Time.Format | |
import Data.Time.LocalTime | |
import System.Directory | |
import System.Environment | |
import System.FilePath | |
import System.IO | |
import System.Locale | |
import Text.HTML.TagSoup | |
import Text.PrettyPrint.ANSI.Leijen hiding ((</>)) | |
-- {{{1 tagsoup parsing | |
getAllItems :: [Tag String] -> [[Tag String]] | |
getAllItems = partitions (~== TagOpen "item" []) | |
getElemText :: String -> [Tag String] -> String | |
getElemText n = fromTagText . (!! 1) . dropWhile (~/= TagOpen n []) | |
getPostTitle = getElemText "title" | |
getPostId = getElemText "wp:post_id" | |
getPostDate = getElemText "wp:post_date_gmt" | |
getPostName = getElemText "wp:post_name" | |
getPostContent = getElemText "content:encoded" | |
getPostTags :: [Tag String] -> [String] | |
getPostTags item = map (fromTagText . (!! 1)) tags | |
where | |
tags = partitions (~== TagOpen "category" [("domain", "post_tag")]) item | |
getPostComments :: [Tag String] -> [[Tag String]] | |
getPostComments = partitions (~== TagOpen "wp:comment" []) | |
getCommentId = getElemText "wp:comment_id" | |
getCommentAuthor = getElemText "wp:comment_author" | |
getCommentDate = getElemText "wp:comment_date_gmt" | |
getCommentContent = getElemText "wp:comment_content" | |
-- {{{1 types | |
data BlogPost = BlogPost | |
{ bpId :: String | |
, bpTitle :: String | |
, bpDate :: ZonedTime | |
, bpName :: String | |
, bpContent :: String | |
, bpTags :: [String] | |
} deriving (Show) | |
instance Pretty BlogPost where | |
pretty bp = vsep ls | |
where | |
ls = [ text "---" | |
, text "id: " <> text (bpId bp) | |
, text "title: " <> text (bpTitle bp) | |
, text "date: " <> text (formatTime defaultTimeLocale "%F" $ bpDate bp) | |
, text "name: " <> text (bpName bp) | |
, text "tags: " <> text (intercalate ", " $ bpTags bp) | |
, text "..." | |
, empty | |
, text (bpContent bp) | |
] | |
data BlogComment = BlogComment | |
{ bcId :: String | |
, bcAuthor :: String | |
, bcDate :: ZonedTime | |
, bcContent :: String | |
, bcBlogPost :: BlogPost | |
} deriving (Show) | |
instance Pretty BlogComment where | |
pretty bc = vsep ls | |
where | |
ls = [ text "---" | |
, text "id: " <> text (bcId bc) | |
, text "author: " <> text (bcAuthor bc) | |
, text "date: " <> text (formatTime defaultTimeLocale "%F" $ bcDate bc) | |
, text "..." | |
, empty | |
, text (bcContent bc) | |
] | |
parseSinglePost i = (post, comments) | |
where | |
post = parsePost i | |
comments = map (parseComment post) $ getPostComments i | |
parsePost ts = BlogPost | |
{ bpId = getPostId ts | |
, bpTitle = getPostTitle ts | |
, bpDate = fromJust $ parseTime defaultTimeLocale "%F %T" (getPostDate ts) | |
, bpName = getPostName ts | |
, bpContent = getPostContent ts | |
, bpTags = getPostTags ts | |
-- , bpComments = cs | |
} | |
parseComment p ts = BlogComment | |
{ bcId = getCommentId ts | |
, bcAuthor = getCommentAuthor ts | |
, bcDate = fromJust $ parseTime defaultTimeLocale "%F %T" (getCommentDate ts) | |
, bcContent = getCommentContent ts | |
, bcBlogPost = p | |
} | |
postFileName bp = "posts" </> postDate ++ "-" ++ (bpId bp) ++ "-" ++ (bpName bp) <.> "mkd" | |
where | |
postDate = formatTime defaultTimeLocale "%F" $ bpDate bp | |
commentFileName bc = "comments" </> (takeBaseName $ postFileName $ bcBlogPost bc) ++ "-c" ++ (bcId bc) <.> "mkd" | |
-- {{{1 IO functions | |
readExportFile :: FilePath -> IO ([BlogPost], [BlogComment]) | |
readExportFile fn = do | |
xml <- readFile fn | |
let xmlTags = parseTags xml | |
items = getAllItems xmlTags | |
postsNComments = second concat (unzip $ map parseSinglePost items) | |
return postsNComments | |
writePostsNComments (posts, comments) = do | |
createDirectoryIfMissing False "posts" | |
createDirectoryIfMissing False "comments" | |
mapM_ writeOnePost posts | |
mapM_ writeOneComment comments | |
where | |
writeOnePost p = do | |
h <- openFile (postFileName p) WriteMode | |
hPutDoc h $ pretty p | |
hClose h | |
writeOneComment c = do | |
h <- openFile (commentFileName c) WriteMode | |
hPutDoc h $ pretty c | |
hClose h | |
-- {{{1 main | |
main :: IO () | |
main = do | |
[fn] <- getArgs | |
readExportFile fn >>= writePostsNComments |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment