Created
February 1, 2014 04:12
-
-
Save aavogt/8747831 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE DeriveDataTypeable, ViewPatterns, QuasiQuotes OverloadedStrings, ScopedTypeVariables #-} | |
import Data.Aeson | |
import Control.Applicative | |
import qualified Data.ByteString.Lazy as L | |
import qualified Data.HashMap.Strict as M | |
import qualified Data.Text.Lazy as T | |
import qualified Data.Text.Lazy.IO as T | |
import qualified Data.Text as TS | |
import HMQQ (q) | |
import qualified Data.Vector as V | |
import Data.Vector (Vector) | |
import Data.Monoid | |
import Data.Maybe | |
import Control.Monad | |
import System.Console.CmdArgs | |
import System.FilePath | |
import Data.Char | |
data LhsStyle = LhsStyle { codePrefix, outputPrefix :: T.Text, | |
codeDelims, outputDelims :: (T.Text,T.Text) } | |
lhsStyles = | |
[("bird", LhsStyle "> " "\n<< " ("","") ("","")), | |
("latex", LhsStyle "" "\n" ("\\begin{code}","\\end{code}") | |
("\\begin{verbatim}","\\end{verbatim}"))] | |
data Format = LHS_Markdown | IPYNB | |
deriving (Data,Typeable, Show, Eq) | |
data Convert = Convert { input :: String, output :: Maybe String, | |
from, to :: Maybe Format, | |
style :: String } | |
deriving (Data,Typeable,Show) | |
args0 = Convert { input = "Ihaskell.ipynb" &= typFile &= args, | |
output = Nothing &= typFile, | |
style = "bird" &= help "bird (default) or latex (not fully supported yet)", | |
from = Nothing &= help "guessed from file extension", | |
to = Nothing &= help "guessed from file extension" } | |
&= help "convert between ipynb and lhs" | |
&= details ["Examples all do the same thing, with increasing verbosity:", "", | |
"convertNotebook IHaskell.ipynb", | |
"convertNotebook -i IHaskell.ipynb", | |
"convertNotebook -i IHaskell.ipynb -o IHaskell.lhs", | |
"convertNotebook --input IHaskell.ipynb --output IHaskell.lhs", | |
"convertNotebook IHaskell.ipynb --from=IPYNB --to=LHS_Markdown"] | |
fromExt s = case map toLower (takeExtension s) of | |
".lhs" -> Just LHS_Markdown | |
".ipynb" -> Just IPYNB | |
_ -> Nothing | |
formatToExtension x = case x of | |
LHS_Markdown -> ".lhs" | |
IPYNB -> ".ipynb" | |
otherFmt x = case x of | |
LHS_Markdown -> IPYNB | |
IPYNB -> LHS_Markdown | |
main = do | |
args1 <- cmdArgs args0 | |
case args1 of | |
Convert i o | |
((`mplus` fromExt i) -> Just f) | |
(fromMaybe (otherFmt f) -> t) | |
style | |
| f /= t, | |
o <- replaceExtension i (formatToExtension t) `fromMaybe` o, | |
sty <- fromMaybe (snd (head lhsStyles)) (lookup style lhsStyles) -> | |
case (f,t) of | |
(IPYNB, LHS_Markdown) -> ipynbTolhs sty i o | |
(LHS_Markdown, IPYNB) -> lhsToIpynb sty i o | |
_ -> error $ "unimplemented: " ++ show f ++ " -> " ++ show t | |
lhsToIpynb :: LhsStyle -> FilePath -> FilePath -> IO () | |
lhsToIpynb sty from to = do | |
classed <- classifyLines sty . T.lines <$> T.readFile from | |
L.writeFile to . encode . encodeCells $ groupClassified classed | |
data CellLine a = CodeLine a | OutputLine a | MarkdownLine a | |
deriving Show | |
isCode (CodeLine _) = True | |
isCode _ = False | |
isOutput (OutputLine _) = True | |
isOutput _ = False | |
isMD (MarkdownLine _) = True | |
isMD _ = False | |
isEmptyMD (MarkdownLine a) = a == mempty | |
isEmptyMD _ = False | |
untag (CodeLine a) = a | |
untag (OutputLine a) = a | |
untag (MarkdownLine a) = a | |
data Cell a = Code a a | Markdown a | |
deriving (Show) | |
encodeCells :: [Cell [T.Text]] -> Value | |
encodeCells xs = object $ | |
[ "worksheets" .= Array (V.singleton (object | |
[ "cells" .= Array (V.fromList (map cellToVal xs)) ] )) | |
] ++ boilerplate | |
cellToVal :: Cell [T.Text] -> Value | |
cellToVal (Code i o) = object $ | |
[ "cell_type" .= String "code", | |
"collapsed" .= Bool False, | |
"language" .= String "python", -- is what it IPython gives us | |
"metadata" .= object [], | |
"input" .= arrayFromTxt i, | |
"outputs" .= Array | |
(V.fromList ( | |
[ object ["text" .= arrayFromTxt o, | |
"metadata" .= object [], | |
"output_type" .= String "display_data" ] | |
| _ <- take 1 o])) ] | |
cellToVal (Markdown txt) = object $ | |
[ "cell_type" .= String "markdown", | |
"metadata" .= object [], | |
"source" .= arrayFromTxt txt ] | |
arrayFromTxt i = Array (V.fromList (map (String . T.toStrict) i)) | |
boilerplate = | |
[ "metadata" .= object [ "language" .= String "haskell", "name" .= String ""], | |
"nbformat" .= Number 3, | |
"nbformat_minor" .= Number 0 ] | |
groupClassified :: [CellLine T.Text] -> [Cell [T.Text]] | |
groupClassified (CodeLine a : x) | |
| (c,x) <- span isCode x, | |
(_,x) <- span isEmptyMD x, | |
(o,x) <- span isOutput x = Code (a : map untag c) (map untag o) : groupClassified x | |
groupClassified (MarkdownLine a : x) | |
| (m,x) <- span isMD x = Markdown (a: map untag m) : groupClassified x | |
groupClassified (OutputLine a : x ) = Markdown [a] : groupClassified x | |
groupClassified [] = [] | |
classifyLines sty@(LhsStyle c o _ _) (l:ls) = case (sp c, sp o) of | |
(Just a, Nothing) -> CodeLine a : classifyLines sty ls | |
(Nothing, Just a) -> OutputLine a : classifyLines sty ls | |
(Nothing,Nothing) -> MarkdownLine l : classifyLines sty ls | |
where sp c = T.stripPrefix (T.dropWhile isSpace c) (T.dropWhile isSpace l) | |
classifyLines _ [] = [] | |
ipynbTolhs :: LhsStyle -> FilePath -> FilePath -> IO () | |
ipynbTolhs sty from to = do | |
Just (js :: Object) <- decode <$> L.readFile from | |
case js of | |
[q| worksheets : Array x |] | |
| [ Object [q| cells : Array x |] ] <- V.toList x -> | |
T.writeFile to $ T.unlines $ V.toList | |
$ V.map (\(Object y) -> convCell sty y) x | |
unString :: T.Text -> Vector Value -> Maybe T.Text | |
unString p arr = T.concat . map (p <>) . V.toList <$> V.mapM toStr arr | |
toStr (String x) = Just (T.fromStrict x) | |
toStr _ = Nothing | |
convOutputs sty arr = | |
(fst (outputDelims sty) <>) . | |
(<> snd (outputDelims sty)) . T.concat . V.toList | |
<$> V.mapM (getTexts (outputPrefix sty)) arr | |
getTexts p (Object [q| text : Array x |]) = unString p x | |
getTexts _ _ = Nothing | |
convCell :: LhsStyle -> Object -> T.Text | |
convCell sty | |
[q| cell_type : String "markdown", | |
source : Array xs |] | |
| ~ (Just s) <- unString "" xs = s | |
convCell sty | |
[q| cell_type : String "code", | |
input : Array i, | |
outputs : Array o | |
|] | |
| ~ (Just i) <- wrapDelims (codeDelims sty) <$> | |
unString (codePrefix sty) i, | |
o <- fromMaybe mempty (convOutputs sty o) = "\n" <> i <> "\n" <> o <> "\n" | |
convCell _ x = "unknown" | |
wrapDelims (a,b) x = a <> x <> b |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE ViewPatterns, TemplateHaskell #-} | |
module HMQQ where | |
import Language.Haskell.TH.Quote | |
import Language.Haskell.TH | |
import Language.Haskell.TH.Syntax | |
import Language.Haskell.Meta | |
import Data.Aeson | |
import Control.Monad | |
import qualified Data.HashMap.Strict as M | |
import qualified Data.Text as T | |
-- | so you can write | |
-- > f [p| "abc de" : v1, abc : v2 |] = g v1 v2 | |
fromK (VarP n) = lift (show n) | |
fromK (LitP x) = litE x | |
fromK x = error ("invalid key:"++show x) | |
-- | incomplete conversion of haskell literals into literals that stand for | |
-- aeson's Value | |
fixP s@(LitP (StringL {})) = conP 'String [return s] | |
fixP x = return x | |
extractJSONs :: Pat -> Maybe (ExpQ, PatQ) | |
extractJSONs (ListP ps) = Just $ | |
let (e,p) = unzip [ (\m -> [| M.lookup $(fromK k `sigE` [t| T.Text |]) $m |], | |
conP 'Just [fixP p]) | |
| ~ (UInfixP k cons p) <- ps, | |
~ True <- [cons == '(:)] ] | |
in ([| \m -> $(tupE (map ($ [| m |]) e)) |], tupP p) | |
extractJSONs _ = Nothing | |
q = QuasiQuoter | |
{ quotePat = \s -> case parsePat ("[" ++ s ++ "]") of | |
Right (extractJSONs -> Just (e,p)) -> viewP e p | |
x -> fail (show x), | |
quoteExp = error "q", | |
quoteType = error "q", | |
quoteDec = error "q" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment