Skip to content

Instantly share code, notes, and snippets.

@kayvank
Created July 29, 2025 17:52
Show Gist options
  • Select an option

  • Save kayvank/76139c0f15e88f8172c38fb811aa1c4b to your computer and use it in GitHub Desktop.

Select an option

Save kayvank/76139c0f15e88f8172c38fb811aa1c4b to your computer and use it in GitHub Desktop.
A simple, incomplete, Markdown to Html converter using megaparsec
#!/usr/bin/env nix-shell
#!nix-shell --pure -p "haskellPackages.ghcWithPackages(pkgs:[pkgs.text pkgs.megaparsec])" -i runghc
{- |
A megaparsec prototype to convert markdown to html.
Note: This is incomplete. There are a few more markdown `nodes` to convert to html
TODO
convert Paragrapsh, links, images
-}
{-# LANGUAGE OverloadedStrings #-}
-- import Control.Applicative
import Control.Monad (void)
import Data.Text (Text)
import Data.Text qualified as T
import qualified Data.Text.Internal.Search as T.Search
import Data.Void (Void)
import Text.Megaparsec (
MonadParsec (takeWhile1P, try, parseError, takeP),
Parsec,
choice,
optional,
parseMaybe,
some,
(<|>), lookAhead, ParseError (..), parse,
)
import Text.Megaparsec.Char (
char,
space,
string, newline,
)
import Data.Maybe (fromMaybe)
type Parser = Parsec Void Text
data MListItem = MListItem MarkDownElement [MarkDownElement]
deriving (Eq, Show)
-- | store the Markdown elements
data MarkDownElement
= MHeading Int MarkDownElement
| MBold MarkDownElement
| MItalic MarkDownElement
| MBoldItalic MarkDownElement
| MLine [MarkDownElement]
| MUnorderedList [MListItem]
| MOrderedList [MListItem]
| Only Text -- Bottom of all types
deriving (Eq, Show)
excludeCharacters :: [Char]
excludeCharacters =
[ '#'
, '!'
, '\n'
, '_'
, '*'
, '['
, ']'
]
parseOnly :: Parser MarkDownElement
parseOnly =
space >> takeWhile1P Nothing (`notElem` excludeCharacters) >>= pure . Only
toHTML :: [MarkDownElement] -> Text
toHTML = foldr (\c acc -> toHtmlHelper c <> acc) ""
toHtmlHelper :: MarkDownElement -> Text
toHtmlHelper (MLine md) = toHTML md
toHtmlHelper (MUnorderedList itemList) = "<ol>" <> itemToHTML itemList <> "</ol>"
toHtmlHelper (MOrderedList itemList) = "<ul>" <> itemToHTML itemList <> "</ul>"
toHtmlHelper (Only t) = t
toHtmlHelper (MBold x) = "<strong>" <> toHtmlHelper x <> "</strong>"
toHtmlHelper (MBoldItalic x) = "<strong><em>" <> toHtmlHelper x <> "</em></strong>"
toHtmlHelper (MItalic x) = "<em>" <> toHtmlHelper x <> "</em>"
toHtmlHelper (MHeading n md) =
let
tag = "h" <> (T.pack $ show n)
in "<" <> tag <> ">"
<> toHtmlHelper md
<> "</" <> tag <> ">"
itemToHTML :: [MListItem] -> Text
itemToHTML = foldr (\x acc -> "<li>" <> itemToHTML_ x <> "</li>" <> acc) ""
itemToHTML_ :: MListItem -> Text
itemToHTML_ (MListItem md mds) = toHtmlHelper md <> toHTML mds
-- | main parser
mainParser :: Parser MarkDownElement
mainParser = do
choice
[ try parseHeading
, try parseLine
, try parseBoldItalic
, try parseBold
, try parseItalic
, try parseOrderedList
, try parseUnorderedList
]
parseList :: [Parser MarkDownElement]
parseList =
[ try parseHeading
, try parseBoldItalic
, try parseBold
, try parseItalic
, try parseOrderedList
, try parseUnorderedList
, try parseOnly
]
-- | line parser
parseLine :: Parser MarkDownElement
parseLine = MLine <$> some (choice parseList)
parseEverythingTillStarOrUnderscore :: Parser MarkDownElement
parseEverythingTillStarOrUnderscore =
takeWhile1P
Nothing
( \x ->
(x /= '*')
&& (x /= '_')
&& (x /= '\n')
)
>>= pure . Only
parseListItem :: Parser a -> Parser MListItem
parseListItem numberingParser = do
space
void numberingParser
content <- takeWhile1P Nothing (/='\n')
subItems <- optional $ try (some (newline >> char ' ' >> char ' ' >> parseLine))
void $ optional newline
case parseMaybe parseLine content of
Nothing -> return $ MListItem (Only content) $ fromMaybe [] subItems
Just parsedContent -> return $ MListItem parsedContent $ fromMaybe [] subItems
-- | parse ordered list
parseOrderedList :: Parser MarkDownElement
parseOrderedList = MOrderedList <$> some (parseListItem (char '*'))
-- | parse un-ordered list
parseUnorderedList :: Parser MarkDownElement
parseUnorderedList = MUnorderedList <$> some (parseListItem (char '-'))
parseHeading :: Parser MarkDownElement
parseHeading = do
space
hashesCnt <- T.length <$> takeWhile1P Nothing (== '#')
MHeading hashesCnt <$> parseLine
parseBold :: Parser MarkDownElement
parseBold = space >> parseBold_ "**" <|> parseBold_ "__"
where
parseBold_ str = do
space
void $ string str
content <- lookAhead $ takeWhile1P Nothing (/='\n')
let res = T.Search.indices str content
case res of
[] -> parseError (TrivialError 1 Nothing mempty) -- if the ending ** is not found, parseBold will fail
(indexOfStarStar:_) -> do
void $ takeP Nothing indexOfStarStar
void $ string str
let contentWithoutStarStar = T.take indexOfStarStar content
case parseMaybe parseLineForBold contentWithoutStarStar of
Nothing -> return $ MBold (Only contentWithoutStarStar)
Just x -> return $ MBold x
parseItalic :: Parser MarkDownElement
parseItalic = space >> go '*' <|> go '_'
where
go ch = do
space
void $ char ch
content <- lookAhead $ takeWhile1P Nothing (/='\n')
let mRes = T.findIndex (==ch) content
case mRes of
Nothing -> parseError (TrivialError 1 Nothing mempty) -- if the ending ** is not found, parseBold will fail
Just indexOfStarStar -> do
void $ takeP Nothing indexOfStarStar
void $ char ch
let contentWithoutStarStar = T.take indexOfStarStar content
case parseMaybe parseLineForBold contentWithoutStarStar of
Nothing -> return $ MItalic (Only contentWithoutStarStar)
Just x -> return $ MItalic x
parseLineForItalic :: Parser MarkDownElement
parseLineForItalic = MLine <$> some (choice parserListForItalic)
parseLineForBold :: Parser MarkDownElement
parseLineForBold = MLine <$> some (choice parserListForBold)
parserListForItalic :: [Parser MarkDownElement]
parserListForItalic = [
try parseBold
, try parseUnorderedList
, try parseOrderedList
, try parseOnly
, try parseEverythingTillStarOrUnderscore
]
parserListForBold :: [Parser MarkDownElement]
parserListForBold = [
try parseItalic
, try parseUnorderedList
, try parseOrderedList
, try parseOnly
, try parseEverythingTillStarOrUnderscore
]
parseBoldItalic :: Parser MarkDownElement
parseBoldItalic = space >> parseBold_ "***" <|> parseBold_ "___"
where
parseBold_ str = do
space
void $ string str
content <- lookAhead $ takeWhile1P Nothing (/='\n')
let res = T.Search.indices str content
case res of
[] -> parseError (TrivialError 1 Nothing mempty) -- if the ending ** is not found, parseBold will fail
(indexOfStarStar:_) -> do
void $ takeP Nothing indexOfStarStar
void $ string str
let contentWithoutStarStar = T.take indexOfStarStar content
case parseMaybe parseLineForBold contentWithoutStarStar of
Nothing -> return $ MBoldItalic (Only contentWithoutStarStar)
Just x -> return $ MBoldItalic x
main :: IO ()
main
= do
case parse mainParser "" "# Heading-1\nDescription\n# Heading-2\nMore descriptions\n" of
Left _ -> putStrLn "error in parsing"
Right x -> print $ toHTML [x]
@kayvank
Copy link
Author

kayvank commented Jul 29, 2025

usage:

chmod 755 markdownToHtml.hs
./markdownToHtml.hs

Should print

"<h1>Heading-1Description<h1>Heading-2More descriptions</h1></h1>"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment