scan · May 5, 2012 21:57 · michaelt · May 5, 2012 · scan · May 6, 2012
diff --git a/gistfile1.txt b/gistfile1.txt
 *Jade> parseTest tag "bind(tag=\"longname\")\n\t| Einstein\n\t| and Others\n"
 Element {elementTag = "bind", elementAttrs = [("tag","longname")], elementChildren = [TextNode "Einstein"]}
 *Jade> parseTest (many1 tag) "bind(tag=\"longname\")\n\t| Einstein\n\t| and Others\nkit(l=\"k\")"
 [Element {elementTag = "bind", elementAttrs = [("tag","longname")], elementChildren = [TextNode "Einstein"]}]
diff --git a/Jade.hs b/Jade.hs
 module Jade where

 import Text.Parsec
 import qualified Text.Parsec.Token as L
 import Text.Parsec.Language (emptyDef)
 import Text.Parsec.Text (Parser)
 import Data.Text (Text)
 import Control.Monad (mzero)
 import Control.Applicative ((<$>))
 import Data.Maybe (isJust, fromMaybe)
 import qualified Data.Text as T
 import qualified Text.XmlHtml as X

 lexer = L.makeTokenParser emptyDef {
                                   L.identStart = letter <|> oneOf "-_:",
                                   L.identLetter = alphaNum <|> oneOf "-_:"
                                   }

 whiteSpace = L.whiteSpace lexer
 lexeme = L.lexeme lexer
 symbol = L.symbol lexer
 natural = L.natural lexer
 parens = L.parens lexer
 semi = L.semi lexer
 squares = L.squares lexer
 stringLiteral = L.stringLiteral lexer
 identifier = L.identifier lexer
 reserved = L.reserved lexer
 reservedOp = L.reservedOp lexer
 commaSep1 = L.commaSep1 lexer

 notSameIndent p = (eof >> return []) <|> do
    pos <- getPosition
    if sourceColumn p == sourceColumn pos then mzero else return []

 block = do
    pos <- getPosition
    manyTill1 tag $ notSameIndent pos

 tag = do
    pos <- getPosition
    try $ do
        t <- lexeme $ textP <|> tagP
        ts <- (eol >> return []) <|> block
        case t of
            e@(X.Element _ _ c) -> return $ e { X.elementChildren = c ++ ts }
            n -> return n

 tagP = do
    t <- optionMaybe identifier
    i <- optionMaybe $ char '#' >> identifier
    c <- optionMaybe $ many1 $ char '.' >> identifier
    a <- option [] $ parens $ commaSep1 attribute
    if isJust t || isJust i || isJust c
        then return $ X.Element {
                       X.elementTag = fromMaybe "div" $ fmap T.pack t,
                       X.elementAttrs = a ++ (concat $ map (fromMaybe []) $ [fmap ((:[]) . ((,) "id") . T.pack) i, fmap ((:[]) . ((,) "class") . (T.intercalate " ") . map T.pack) c]),
                       X.elementChildren = []
                       }
        else mzero

 textP = do
    lexeme $ char '|'
    txt <- many $ noneOf "\n"
    return $ X.TextNode $ T.pack txt

 attribute = do
    k <- T.pack <$> identifier
    symbol "="
    cs <- T.pack <$> stringLiteral
    return (k, cs)

 manyTill1 p e = do
    ms <- manyTill p e
    if null ms then mzero else return ms

 isInline p = do
    p2 <- getPosition
    if sourceLine p == sourceLine p2 then return [] else mzero

 eol = newline <|> (eof >> return '\n')

 isSameIndent p1 p2 = sourceColumn p1 == sourceColumn p2
	*Jade> parseTest tag "bind(tag=\"longname\")\n\t\| Einstein\n\t\| and Others\n"
	Element {elementTag = "bind", elementAttrs = [("tag","longname")], elementChildren = [TextNode "Einstein"]}
	*Jade> parseTest (many1 tag) "bind(tag=\"longname\")\n\t\| Einstein\n\t\| and Others\nkit(l=\"k\")"
	[Element {elementTag = "bind", elementAttrs = [("tag","longname")], elementChildren = [TextNode "Einstein"]}]
	module Jade where

	import Text.Parsec
	import qualified Text.Parsec.Token as L
	import Text.Parsec.Language (emptyDef)
	import Text.Parsec.Text (Parser)
	import Data.Text (Text)
	import Control.Monad (mzero)
	import Control.Applicative ((<$>))
	import Data.Maybe (isJust, fromMaybe)
	import qualified Data.Text as T
	import qualified Text.XmlHtml as X

	lexer = L.makeTokenParser emptyDef {
	L.identStart = letter <\|> oneOf "-_:",
	L.identLetter = alphaNum <\|> oneOf "-_:"
	}

	whiteSpace = L.whiteSpace lexer
	lexeme = L.lexeme lexer
	symbol = L.symbol lexer
	natural = L.natural lexer
	parens = L.parens lexer
	semi = L.semi lexer
	squares = L.squares lexer
	stringLiteral = L.stringLiteral lexer
	identifier = L.identifier lexer
	reserved = L.reserved lexer
	reservedOp = L.reservedOp lexer
	commaSep1 = L.commaSep1 lexer

	notSameIndent p = (eof >> return []) <\|> do
	pos <- getPosition
	if sourceColumn p == sourceColumn pos then mzero else return []

	block = do
	pos <- getPosition
	manyTill1 tag $ notSameIndent pos

	tag = do
	pos <- getPosition
	try $ do
	t <- lexeme $ textP <\|> tagP
	ts <- (eol >> return []) <\|> block
	case t of
	e@(X.Element _ _ c) -> return $ e { X.elementChildren = c ++ ts }
	n -> return n

	tagP = do
	t <- optionMaybe identifier
	i <- optionMaybe $ char '#' >> identifier
	c <- optionMaybe $ many1 $ char '.' >> identifier
	a <- option [] $ parens $ commaSep1 attribute
	if isJust t \|\| isJust i \|\| isJust c
	then return $ X.Element {
	X.elementTag = fromMaybe "div" $ fmap T.pack t,
	X.elementAttrs = a ++ (concat $ map (fromMaybe []) $ [fmap ((:[]) . ((,) "id") . T.pack) i, fmap ((:[]) . ((,) "class") . (T.intercalate " ") . map T.pack) c]),
	X.elementChildren = []
	}
	else mzero

	textP = do
	lexeme $ char '\|'
	txt <- many $ noneOf "\n"
	return $ X.TextNode $ T.pack txt

	attribute = do
	k <- T.pack <$> identifier
	symbol "="
	cs <- T.pack <$> stringLiteral
	return (k, cs)

	manyTill1 p e = do
	ms <- manyTill p e
	if null ms then mzero else return ms

	isInline p = do
	p2 <- getPosition
	if sourceLine p == sourceLine p2 then return [] else mzero

	eol = newline <\|> (eof >> return '\n')

	isSameIndent p1 p2 = sourceColumn p1 == sourceColumn p2