Created
January 5, 2012 11:45
-
-
Save master-q/1564930 to your computer and use it in GitHub Desktop.
GetAllPagePan_Fin.hs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env runhaskell | |
import qualified Text.Pandoc as P | |
import System.Process | |
import System.Exit | |
import Data.ByteString.Char8 () | |
import Codec.Binary.UTF8.String | |
{-- | |
http://groups.google.com/group/haskell-jp/browse_thread/thread/b15a016985d8c426 | |
によるとghc 7.0.Xまでのsystem関数はlocale encodingが通らない。変換が必要。 | |
--} | |
inlineToString :: [P.Inline] -> String | |
inlineToString inlines = foldr (++) "" $ fmap go inlines | |
where | |
go :: P.Inline -> String | |
go (P.Str s) = s | |
go P.Space = " " | |
go e = error "inlineToString can't understand: " ++ show e | |
findLink' :: [P.Inline] -> [(String, String)] | |
findLink' inlines = concat $ fmap go inlines | |
where | |
go :: P.Inline -> [(String, String)] | |
go (P.Link inline (url, _)) = [(url, inlineToString inline)] | |
go _ = [] | |
findLink :: [P.Block] -> [(String, String)] | |
findLink blocks = concat $ fmap go blocks | |
where | |
go :: P.Block -> [(String, String)] | |
go (P.Plain inlines) = findLink' inlines | |
go (P.Para inlines) = findLink' inlines | |
go (P.BulletList blockss) = concat $ fmap findLink blockss | |
go _ = [] | |
htmlToPandoc :: String -> P.Pandoc | |
htmlToPandoc = P.readHtml P.defaultParserState{ P.stateStandalone = True } | |
curlIt :: (String, String) -> IO ExitCode | |
curlIt ss = rawSystem' "curl" ["-d", | |
"p=" ++ postp, | |
"-d", | |
"c=e", | |
"http://www.sampou.org/cgi-bin/haskell.cgi", | |
"-o", | |
outfile] | |
where postp = tail $ dropWhile (/= '?') $ fst ss | |
outfile = (snd ss) ++ ".html" | |
rawSystem' cmd = rawSystem cmd . map encodeString | |
main :: IO () | |
main = do | |
con <- getContents | |
let P.Pandoc _ blocks = htmlToPandoc con | |
mapM_ curlIt $ findLink blocks |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment