SaitoAtsushi · May 17, 2013 13:28
diff --git a/ypub.hs b/ypub.hs
 -- -*- mode: haskell; coding: utf-8 -*-
 -- Author: SAITO Atsushi

 import Network.HTTP
 import Data.Maybe
 import Text.Regex
 import Control.Monad
 import Data.Sequence (unfoldr)
 import Data.Foldable (toList)
 import Data.Word
 import Text.Regex.Posix.ByteString
 import Codec.Binary.UTF8.String
 import System.Locale
 import Data.Time.Clock
 import Data.Time.Format
 import System.Environment
 import Data.ByteString.Lazy.Char8 as BL (pack, hPutStr, fromChunks, unpack)
 import Data.ByteString.Lazy as BS (pack, unpack)
 import Data.ByteString (breakSubstring)
 import Codec.Archive.Zip
 import System.IO
 import Control.Monad.State

 data Item = Page { pageNum, subTitle, containts :: String }
          | Headline String

 data Novel = Novel { ncode,
                     title,
                     explanation,
                     author    :: String,
                     items     :: [Item] }
             deriving (Show)

 formattedCurrentTime =
    do t <- getCurrentTime
       return $ formatTime defaultTimeLocale "%FT%XZ" t

 ncx novel =
    "<?xml version='1.0' encoding='utf-8' ?>\n\
    \<!DOCTYPE ncx PUBLIC '-//NISO//DTD ncx 2005-1//EN'\
    \ 'http://www.daisy.org/z3986/2005/ncx-2005-1.dtd'>\n\
    \<ncx xmlns='http://www.daisy.org/z3986/2005/ncx/' xml:lang='en'\
    \ version='2005-1'>\n\
    \<head>\n\
    \<meta name='dtb:uid' content='" ++ ncode novel ++ "' />\n\
    \<meta name='dtb:depth' content='1' />\n\
    \<meta name='dtb:totalPageCount' content='0' />\n\
    \<meta name='dtb:maxPageNumber' content='0' />\n\
    \</head>\n\
    \<docTitle>\n\
    \<text>"++ title novel ++"</text>\n\
    \</docTitle>\n\
    \<navMap>\n"++novelToNavPoint novel++"</navMap>\n</ncx>"

 opf novel =
    do fct <- formattedCurrentTime
       return $
                  "<?xml version='1.0' encoding='utf-8' ?>\n\
                  \<package xmlns='http://www.idpf.org/2007/opf'\
                  \ unique-identifier='BookId' version='3.0'>\
                  \<metadata>\
                  \<dc:title xmlns:dc='http://purl.org/dc/elements/1.1/'>"
                  ++ title novel ++
                  "</dc:title>\
                  \<dc:creator xmlns:dc='http://purl.org/dc/elements/1.1/'>"
                  ++ author novel ++
                  "</dc:creator>\
                  \<dc:language xmlns:dc='http://purl.org/dc/elements/1.1/'>\
                  \ja</dc:language>\
                  \<dc:identifier\
                  \ xmlns:dc='http://purl.org/dc/elements/1.1/'\
                  \ id='BookId'>\
                  \ urn:"
                  ++ ncode novel ++
                  "</dc:identifier>\
                  \<dc:subject xmlns:dc='http://purl.org/dc/elements/1.1/'>\
                  \General Fiction\
                  \</dc:subject>\
                  \<dc:description\
                  \ xmlns:dc='http://purl.org/dc/elements/1.1/'>"
                  ++ explanation novel ++
                  "</dc:description>\
                  \<meta property='dcterms:modified'>"
                  ++ fct ++
                  "</meta></metadata>"
                  ++ manifest novel
                  ++ spine novel ++
                  "<guide>\
                  \<reference type='title-page' title='title'\
                  \ href='title.xhtml' />\
                  \<reference type='toc' title='Table of Contents'\
                  \ href='nav.xhtml' />\
                  \</guide>\
                  \</package>"

 manifest novel =
    "<manifest>\
    \<item id='toc' href='toc.ncx' media-type='application/x-dtbncx+xml'/>\
    \<item id='nav' href='nav.xhtml'\
    \ media-type='application/xhtml+xml' properties='nav'/>\
    \<item id='title' href='title.xhtml' media-type='application/xhtml+xml'/>\
    \<item id='style' href='style.css' media-type='text/css'/>"
    ++ (manifestItem $ items novel) ++
    "</manifest>"

 styleSheet =
    "html {\
    \ -epub-writing-mode: vertical-rl;\
    \ direction: ltr;\
    \ unicode-bidi:bidi-override;\
    \}\
    \ol {\
    \ list-style-type: none;\
    \ padding: 1em 0 0 1em;\
    \ margin: 0;\
    \}\
    \p {\
    \ margin: 0;\
    \ line-height: 150%\
    \}\
    \body {\
    \ margin: 0;\
    \ padding: 0;\
    \}\
    \dl {\
    \ border: solid thick black;\
    \}"
    
 spine novel=
    "<spine toc='toc' page-progression-direction='rtl'>\                  
    \<itemref idref='title'/>\
    \<itemref idref='nav'/>"
    ++ (manifestItemref $ items novel) ++
    "</spine>"

 manifestItem :: [Item]->String
 manifestItem (x@(Page num _ _): rest) =
    "<item id='id_"
    ++num++
    "' href='"
    ++num++
    ".xhtml' media-type='application/xhtml+xml'/>"
    ++manifestItem rest

 manifestItem [] = ""
 manifestItem ((Headline _): rest) = manifestItem rest

 manifestItemref :: [Item]->String
 manifestItemref ((Page num _ _): rest) =
    "<itemref idref='id_"++num++"' />"++manifestItemref rest

 manifestItemref [] = ""
 manifestItemref ((Headline _): rest) = manifestItemref rest

 novelToNavPoint novel = itemsToNavPoint 1 $ items novel

 itemsToNavPoint :: Int -> [Item] ->String
 itemsToNavPoint countNum (x@(Page _ _ _): rest) =
    pageToNavPoint x++itemsToNavPoint countNum rest

 itemsToNavPoint _ [] = ""
 itemsToNavPoint countNum ((Headline st): rest@((Page p _ _):_)) =
                "<navPoint id='id_chapter"++show countNum++"'>"
                ++ "<navLabel><text>"++st++"</text></navLabel>"
                ++ "<content src='"++p++".xhtml' />"
                ++ case span isPage rest of
                     (pages, nextChapter)->(concat $ map pageToNavPoint pages)
                                           ++ "</navPoint>\n"
                                           ++ itemsToNavPoint (countNum+1)nextChapter

 novelToToc :: Novel -> [Word8]
 novelToToc novel =
    encode "<?xml version='1.0' encoding='utf-8' ?>\n\
    \<!DOCTYPE html>\n\
    \<html xmlns='http://www.w3.org/1999/xhtml'\
    \ xmlns:epub='http://www.idpf.org/2007/ops' xml:lang='ja'>\n\
    \<head>\n\
    \<title>目次</title>\n\
    \<link rel='stylesheet' type='text/css' href='style.css' />\n\
    \</head>\n\
    \<body>\n\
    \<section epub:type='frontmatter toc'>\
    \<h1>目次</h1>\
    \<nav epub:type='toc' id='toc'>\
    \<ol>"
    ++ (BS.unpack $ BL.pack $ itemsToToc $ items novel) ++
    encode "</ol>\
    \</nav>\
    \</section>\
    \</body>\
    \</html>"

 itemsToToc :: [Item]->String
 itemsToToc (x@(Page _ _ _): rest) = pageToToc x++itemsToToc rest
 itemsToToc [] = ""
 itemsToToc ((Headline st): rest@((Page p _ _):_)) =
    "<li><span>"++st++"</span><ol>"
    ++ case span isPage rest of
         (pages, nextChapter)->(concat $ map pageToToc pages)
                               ++ "</ol></li>\n"
                               ++ itemsToToc nextChapter

 pageToToc (Page p t _) =
    "<li><a href='"++ p ++".xhtml'>"++t++"</a></li>\n"

 container =
    "<?xml version='1.0' ?>\n\
     \<container version='1.0'\
     \ xmlns='urn:oasis:names:tc:opendocument:xmlns:container'>\
     \<rootfiles>\
     \<rootfile full-path='OPS/content.opf'\
     \ media-type='application/oebps-package+xml'/>\
     \</rootfiles>\
     \</container>"

 pageToNavPoint (Page p t _) =
    "<navPoint id='id_"++p++"' playOrder='"++p++"'>"
    ++ "<navLabel><text>"++t++"</text></navLabel>"
    ++ "<content src='"++p++".xhtml' /></navPoint>"

 toHtml5Style str = subRegex (mkRegex "<rb>([^<]+)</rb>") str "\\1"

 splitBr = splitRegex (mkRegexWithOpts "<br />" False True)

 instance Show Item where
    show (Page p t c) = "<?xml version='1.0' encoding='utf-8' ?>\n"
             ++ "<!DOCTYPE html>\n"
             ++ "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='ja'>\n"
             ++ "<head>\n<title>"++t++"</title>\n"
             ++ "<link rel='stylesheet' type='text/css' href='style.css' />\n"
             ++ "</head>\n<body>\n<h2>"++t++"</h2>\n"
             ++ (brToParagraph $ toHtml5Style $ c)
             ++ "</body></html>"
    show (Headline t) = t

 brToParagraph = concat . map (\x -> "<p>"++x++"</p>") . splitBr

 download path = do
  url <- return ("http://ncode.syosetu.com" ++ path)
  response <- Network.HTTP.simpleHTTP (getRequest url)
  getResponseBody response

 getNovel ncode = do
  indexPage <- download ("/" ++ ncode)
  title <- return $ queryTitle indexPage
  explanation <- return $ queryExplanation indexPage
  author <- return $ queryAuthor indexPage
  novelBody <- getNovelBody $ toList $ querySubtitles indexPage
  return $ Novel ncode title explanation author novelBody

 titlePage novel =
    encode "<?xml version='1.0' encoding='utf-8'?>\
    \<!DOCTYPE html>\
    \<html xmlns='http://www.w3.org/1999/xhtml'\
    \ xmlns:epub='http://www.idpf.org/2007/ops' xml:lang='ja'>\
    \<head>\
    \<link rel='stylesheet' type='text/css' href='style.css'/>\
    \<title>" ++ (BS.unpack $ BL.pack $ title novel) ++
    encode "</title>\
    \</head>\
    \<body>\
    \<h1>" ++ (BS.unpack $ BL.pack $ title novel) ++ encode "</h1>\
    \<div style='text-align:right'>"
    ++ (BS.unpack $ BL.pack $ author novel)++
    encode "</div>\
    \<h2>あらすじ</h2>\
    \<p>"
    ++(BS.unpack $ BL.pack $ explanation novel)++
    encode "</p></body>\
    \</html>"
         
 getNovelBody urls = sequence $ map f urls
      where f ["", url, title] =
                do body <- (download url)
                   return $  Page (pathToNum url) title (queryNovelBody body)
            f [title, "", ""]  = return $ Headline title

 matchRegexOne reg str = matchRegex reg str >>= (return . head)

 matchRegexAll' reg = unfoldr $ scan
    where scan target =
              case matchRegexAll reg target of
                Just (_, _, a, xs) -> Just (xs, a)
                Nothing -> Nothing

 queryTitle x = fromJust $ matchRegexOne (mkRegex "<title>([^<]+)</title>") x

 queryAuthor x = re ((fromJust $ (matchRegex (mkRegexWithOpts "<div class=\"novel_writername\">([^>]+>)?([^<]+)</" False True) x)) !! 1)
    where
      re x = fromJust $ matchRegexOne (mkRegexWithOpts "^([^\r\n]+)" False True) x

 queryNovelBody str =
    fromJust (matchRegexOne (mkRegexWithOpts "<div class=\"novel_view\" id=\"novel_view\">(.+)" False True) str
    >>= (matchRegexAll $ mkRegex "</div>")
    >>= (\x -> case x of (before,_,_,_) -> Just before))

 querySubtitles = matchRegexAll' $ mkRegex "<tr><td class=\"chapter\" colspan=\"4\">([^<]+)</td></tr>|<td class=\"[^_]+_subtitle\"><a href=\"([^\"]+)\">([^<]+)</a></td>"

 pathToNum str = fromJust $ matchRegexOne (mkRegex "^/.+/(.+)/$") str

 queryExplanation x =
    fromJust $
    matchRegexOne
    (mkRegexWithOpts "<div class=\"novel_ex\">([^>]+)</div>" False True)
    x

 isPage :: Item -> Bool
 isPage (Page _ _ _)= True
 isPage _ = False

 onlyPage :: [Item] -> [Item]
 onlyPage x = filter isPage x

 code = decode . BS.unpack . BL.pack

 epubize ncode =
    do novel <- getNovel ncode
       h <- openFile ("["++(code $author novel)++"]"++(code $title novel)++".epub") WriteMode
       archive <- return $ foldr (\x arc->(addEntryToArchive (page2entry x) arc)) emptyArchive (onlyPage $ items novel)
       archive <- return $ addEntryToArchive (toEntry "OPS/toc.ncx" 0 $ BL.pack $ ncx novel) archive
       archive <- return $ addEntryToArchive (toEntry "OPS/style.css" 0 $ BL.pack styleSheet) archive
       nopf    <- opf novel
       archive <- return $ addEntryToArchive (toEntry "OPS/content.opf" 0 $ BL.pack nopf) archive
       archive <- return $ addEntryToArchive (toEntry "OPS/nav.xhtml" 0 $ BS.pack $ novelToToc novel) archive
       archive <- return $ addEntryToArchive (toEntry "META-INF/container.xml" 0 $ BL.pack container) archive
       archive <- return $ addEntryToArchive (toEntry "OPS/title.xhtml" 0 $ BS.pack $ titlePage novel) archive
       archive <- return $ addEntryToArchive (toEntry "mimetype" 0 $ BL.pack "application/epub+zip" ) archive
       BL.hPutStr h $ fromArchive archive
       hClose h
    where
      page2entry x=(toEntry ("OPS/"++pageNum x++".xhtml") 0 (BL.pack (show x)))

 usage = putStr "Usage: ypub ncode ...\n"

 main =
    do args <- getArgs
       if args==[]
       then usage
       else mapM_ epubize args
	-- -- mode: haskell; coding: utf-8 --
	-- Author: SAITO Atsushi

	import Network.HTTP
	import Data.Maybe
	import Text.Regex
	import Control.Monad
	import Data.Sequence (unfoldr)
	import Data.Foldable (toList)
	import Data.Word
	import Text.Regex.Posix.ByteString
	import Codec.Binary.UTF8.String
	import System.Locale
	import Data.Time.Clock
	import Data.Time.Format
	import System.Environment
	import Data.ByteString.Lazy.Char8 as BL (pack, hPutStr, fromChunks, unpack)
	import Data.ByteString.Lazy as BS (pack, unpack)
	import Data.ByteString (breakSubstring)
	import Codec.Archive.Zip
	import System.IO
	import Control.Monad.State

	data Item = Page { pageNum, subTitle, containts :: String }
	\| Headline String

	data Novel = Novel { ncode,
	title,
	explanation,
	author :: String,
	items :: [Item] }
	deriving (Show)

	formattedCurrentTime =
	do t <- getCurrentTime
	return $ formatTime defaultTimeLocale "%FT%XZ" t

	ncx novel =
	"<?xml version='1.0' encoding='utf-8' ?>\n\
	\<!DOCTYPE ncx PUBLIC '-//NISO//DTD ncx 2005-1//EN'\
	\ 'http://www.daisy.org/z3986/2005/ncx-2005-1.dtd'>\n\
	\<ncx xmlns='http://www.daisy.org/z3986/2005/ncx/' xml:lang='en'\
	\ version='2005-1'>\n\
	\<head>\n\
	\<meta name='dtb:uid' content='" ++ ncode novel ++ "' />\n\
	\<meta name='dtb:depth' content='1' />\n\
	\<meta name='dtb:totalPageCount' content='0' />\n\
	\<meta name='dtb:maxPageNumber' content='0' />\n\
	\</head>\n\
	\<docTitle>\n\
	\<text>"++ title novel ++"</text>\n\
	\</docTitle>\n\
	\<navMap>\n"++novelToNavPoint novel++"</navMap>\n</ncx>"

	opf novel =
	do fct <- formattedCurrentTime
	return $
	"<?xml version='1.0' encoding='utf-8' ?>\n\
	\<package xmlns='http://www.idpf.org/2007/opf'\
	\ unique-identifier='BookId' version='3.0'>\
	\<metadata>\
	\<dc:title xmlns:dc='http://purl.org/dc/elements/1.1/'>"
	++ title novel ++
	"</dc:title>\
	\<dc:creator xmlns:dc='http://purl.org/dc/elements/1.1/'>"
	++ author novel ++
	"</dc:creator>\
	\<dc:language xmlns:dc='http://purl.org/dc/elements/1.1/'>\
	\ja</dc:language>\
	\<dc:identifier\
	\ xmlns:dc='http://purl.org/dc/elements/1.1/'\
	\ id='BookId'>\
	\ urn:"
	++ ncode novel ++
	"</dc:identifier>\
	\<dc:subject xmlns:dc='http://purl.org/dc/elements/1.1/'>\
	\General Fiction\
	\</dc:subject>\
	\<dc:description\
	\ xmlns:dc='http://purl.org/dc/elements/1.1/'>"
	++ explanation novel ++
	"</dc:description>\
	\<meta property='dcterms:modified'>"
	++ fct ++
	"</meta></metadata>"
	++ manifest novel
	++ spine novel ++
	"<guide>\
	\<reference type='title-page' title='title'\
	\ href='title.xhtml' />\
	\<reference type='toc' title='Table of Contents'\
	\ href='nav.xhtml' />\
	\</guide>\
	\</package>"

	manifest novel =
	"<manifest>\
	\<item id='toc' href='toc.ncx' media-type='application/x-dtbncx+xml'/>\
	\<item id='nav' href='nav.xhtml'\
	\ media-type='application/xhtml+xml' properties='nav'/>\
	\<item id='title' href='title.xhtml' media-type='application/xhtml+xml'/>\
	\<item id='style' href='style.css' media-type='text/css'/>"
	++ (manifestItem $ items novel) ++
	"</manifest>"

	styleSheet =
	"html {\
	\ -epub-writing-mode: vertical-rl;\
	\ direction: ltr;\
	\ unicode-bidi:bidi-override;\
	\}\
	\ol {\
	\ list-style-type: none;\
	\ padding: 1em 0 0 1em;\
	\ margin: 0;\
	\}\
	\p {\
	\ margin: 0;\
	\ line-height: 150%\
	\}\
	\body {\
	\ margin: 0;\
	\ padding: 0;\
	\}\
	\dl {\
	\ border: solid thick black;\
	\}"

	spine novel=
	"<spine toc='toc' page-progression-direction='rtl'>\
	\<itemref idref='title'/>\
	\<itemref idref='nav'/>"
	++ (manifestItemref $ items novel) ++
	"</spine>"

	manifestItem :: [Item]->String
	manifestItem (x@(Page num _ _): rest) =
	"<item id='id_"
	++num++
	"' href='"
	++num++
	".xhtml' media-type='application/xhtml+xml'/>"
	++manifestItem rest

	manifestItem [] = ""
	manifestItem ((Headline _): rest) = manifestItem rest

	manifestItemref :: [Item]->String
	manifestItemref ((Page num _ _): rest) =
	"<itemref idref='id_"++num++"' />"++manifestItemref rest

	manifestItemref [] = ""
	manifestItemref ((Headline _): rest) = manifestItemref rest

	novelToNavPoint novel = itemsToNavPoint 1 $ items novel

	itemsToNavPoint :: Int -> [Item] ->String
	itemsToNavPoint countNum (x@(Page _ _ _): rest) =
	pageToNavPoint x++itemsToNavPoint countNum rest

	itemsToNavPoint _ [] = ""
	itemsToNavPoint countNum ((Headline st): rest@((Page p _ _):_)) =
	"<navPoint id='id_chapter"++show countNum++"'>"
	++ "<navLabel><text>"++st++"</text></navLabel>"
	++ "<content src='"++p++".xhtml' />"
	++ case span isPage rest of
	(pages, nextChapter)->(concat $ map pageToNavPoint pages)
	++ "</navPoint>\n"
	++ itemsToNavPoint (countNum+1)nextChapter

	novelToToc :: Novel -> [Word8]
	novelToToc novel =
	encode "<?xml version='1.0' encoding='utf-8' ?>\n\
	\<!DOCTYPE html>\n\
	\<html xmlns='http://www.w3.org/1999/xhtml'\
	\ xmlns:epub='http://www.idpf.org/2007/ops' xml:lang='ja'>\n\
	\<head>\n\
	\<title>目次</title>\n\
	\<link rel='stylesheet' type='text/css' href='style.css' />\n\
	\</head>\n\
	\<body>\n\
	\<section epub:type='frontmatter toc'>\
	\<h1>目次</h1>\
	\<nav epub:type='toc' id='toc'>\
	\<ol>"
	++ (BS.unpack $ BL.pack $ itemsToToc $ items novel) ++
	encode "</ol>\
	\</nav>\
	\</section>\
	\</body>\
	\</html>"

	itemsToToc :: [Item]->String
	itemsToToc (x@(Page _ _ _): rest) = pageToToc x++itemsToToc rest
	itemsToToc [] = ""
	itemsToToc ((Headline st): rest@((Page p _ _):_)) =
	"<li><span>"++st++"</span><ol>"
	++ case span isPage rest of
	(pages, nextChapter)->(concat $ map pageToToc pages)
	++ "</ol></li>\n"
	++ itemsToToc nextChapter

	pageToToc (Page p t _) =
	"<li><a href='"++ p ++".xhtml'>"++t++"</a></li>\n"

	container =
	"<?xml version='1.0' ?>\n\
	\<container version='1.0'\
	\ xmlns='urn:oasis:names:tc:opendocument:xmlns:container'>\
	\<rootfiles>\
	\<rootfile full-path='OPS/content.opf'\
	\ media-type='application/oebps-package+xml'/>\
	\</rootfiles>\
	\</container>"

	pageToNavPoint (Page p t _) =
	"<navPoint id='id_"++p++"' playOrder='"++p++"'>"
	++ "<navLabel><text>"++t++"</text></navLabel>"
	++ "<content src='"++p++".xhtml' /></navPoint>"

	toHtml5Style str = subRegex (mkRegex "<rb>([^<]+)</rb>") str "\\1"

	splitBr = splitRegex (mkRegexWithOpts "<br />" False True)

	instance Show Item where
	show (Page p t c) = "<?xml version='1.0' encoding='utf-8' ?>\n"
	++ "<!DOCTYPE html>\n"
	++ "<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='ja'>\n"
	++ "<head>\n<title>"++t++"</title>\n"
	++ "<link rel='stylesheet' type='text/css' href='style.css' />\n"
	++ "</head>\n<body>\n<h2>"++t++"</h2>\n"
	++ (brToParagraph $ toHtml5Style $ c)
	++ "</body></html>"
	show (Headline t) = t

	brToParagraph = concat . map (\x -> "<p>"++x++"</p>") . splitBr

	download path = do
	url <- return ("http://ncode.syosetu.com" ++ path)
	response <- Network.HTTP.simpleHTTP (getRequest url)
	getResponseBody response

	getNovel ncode = do
	indexPage <- download ("/" ++ ncode)
	title <- return $ queryTitle indexPage
	explanation <- return $ queryExplanation indexPage
	author <- return $ queryAuthor indexPage
	novelBody <- getNovelBody $ toList $ querySubtitles indexPage
	return $ Novel ncode title explanation author novelBody

	titlePage novel =
	encode "<?xml version='1.0' encoding='utf-8'?>\
	\<!DOCTYPE html>\
	\<html xmlns='http://www.w3.org/1999/xhtml'\
	\ xmlns:epub='http://www.idpf.org/2007/ops' xml:lang='ja'>\
	\<head>\
	\<link rel='stylesheet' type='text/css' href='style.css'/>\
	\<title>" ++ (BS.unpack $ BL.pack $ title novel) ++
	encode "</title>\
	\</head>\
	\<body>\
	\<h1>" ++ (BS.unpack $ BL.pack $ title novel) ++ encode "</h1>\
	\<div style='text-align:right'>"
	++ (BS.unpack $ BL.pack $ author novel)++
	encode "</div>\
	\<h2>あらすじ</h2>\
	\<p>"
	++(BS.unpack $ BL.pack $ explanation novel)++
	encode "</p></body>\
	\</html>"

	getNovelBody urls = sequence $ map f urls
	where f ["", url, title] =
	do body <- (download url)
	return $ Page (pathToNum url) title (queryNovelBody body)
	f [title, "", ""] = return $ Headline title

	matchRegexOne reg str = matchRegex reg str >>= (return . head)

	matchRegexAll' reg = unfoldr $ scan
	where scan target =
	case matchRegexAll reg target of
	Just (_, _, a, xs) -> Just (xs, a)
	Nothing -> Nothing

	queryTitle x = fromJust $ matchRegexOne (mkRegex "<title>([^<]+)</title>") x

	queryAuthor x = re ((fromJust $ (matchRegex (mkRegexWithOpts "<div class=\"novel_writername\">([^>]+>)?([^<]+)</" False True) x)) !! 1)
	where
	re x = fromJust $ matchRegexOne (mkRegexWithOpts "^([^\r\n]+)" False True) x

	queryNovelBody str =
	fromJust (matchRegexOne (mkRegexWithOpts "<div class=\"novel_view\" id=\"novel_view\">(.+)" False True) str
	>>= (matchRegexAll $ mkRegex "</div>")
	>>= (\x -> case x of (before,_,_,_) -> Just before))

	querySubtitles = matchRegexAll' $ mkRegex "<tr><td class=\"chapter\" colspan=\"4\">([^<]+)</td></tr>\|<td class=\"[^_]+_subtitle\"><a href=\"([^\"]+)\">([^<]+)</a></td>"

	pathToNum str = fromJust $ matchRegexOne (mkRegex "^/.+/(.+)/$") str

	queryExplanation x =
	fromJust $
	matchRegexOne
	(mkRegexWithOpts "<div class=\"novel_ex\">([^>]+)</div>" False True)
	x

	isPage :: Item -> Bool
	isPage (Page _ _ _)= True
	isPage _ = False

	onlyPage :: [Item] -> [Item]
	onlyPage x = filter isPage x

	code = decode . BS.unpack . BL.pack

	epubize ncode =
	do novel <- getNovel ncode
	h <- openFile ("["++(code $author novel)++"]"++(code $title novel)++".epub") WriteMode
	archive <- return $ foldr (\x arc->(addEntryToArchive (page2entry x) arc)) emptyArchive (onlyPage $ items novel)
	archive <- return $ addEntryToArchive (toEntry "OPS/toc.ncx" 0 $ BL.pack $ ncx novel) archive
	archive <- return $ addEntryToArchive (toEntry "OPS/style.css" 0 $ BL.pack styleSheet) archive
	nopf <- opf novel
	archive <- return $ addEntryToArchive (toEntry "OPS/content.opf" 0 $ BL.pack nopf) archive
	archive <- return $ addEntryToArchive (toEntry "OPS/nav.xhtml" 0 $ BS.pack $ novelToToc novel) archive
	archive <- return $ addEntryToArchive (toEntry "META-INF/container.xml" 0 $ BL.pack container) archive
	archive <- return $ addEntryToArchive (toEntry "OPS/title.xhtml" 0 $ BS.pack $ titlePage novel) archive
	archive <- return $ addEntryToArchive (toEntry "mimetype" 0 $ BL.pack "application/epub+zip" ) archive
	BL.hPutStr h $ fromArchive archive
	hClose h
	where
	page2entry x=(toEntry ("OPS/"++pageNum x++".xhtml") 0 (BL.pack (show x)))

	usage = putStr "Usage: ypub ncode ...\n"

	main =
	do args <- getArgs
	if args==[]
	then usage
	else mapM_ epubize args