Created
July 29, 2025 17:52
-
-
Save kayvank/76139c0f15e88f8172c38fb811aa1c4b to your computer and use it in GitHub Desktop.
A simple, incomplete, Markdown to Html converter using megaparsec
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env nix-shell | |
| #!nix-shell --pure -p "haskellPackages.ghcWithPackages(pkgs:[pkgs.text pkgs.megaparsec])" -i runghc | |
| {- | | |
| A megaparsec prototype to convert markdown to html. | |
| Note: This is incomplete. There are a few more markdown `nodes` to convert to html | |
| TODO | |
| convert Paragrapsh, links, images | |
| -} | |
| {-# LANGUAGE OverloadedStrings #-} | |
| -- import Control.Applicative | |
| import Control.Monad (void) | |
| import Data.Text (Text) | |
| import Data.Text qualified as T | |
| import qualified Data.Text.Internal.Search as T.Search | |
| import Data.Void (Void) | |
| import Text.Megaparsec ( | |
| MonadParsec (takeWhile1P, try, parseError, takeP), | |
| Parsec, | |
| choice, | |
| optional, | |
| parseMaybe, | |
| some, | |
| (<|>), lookAhead, ParseError (..), parse, | |
| ) | |
| import Text.Megaparsec.Char ( | |
| char, | |
| space, | |
| string, newline, | |
| ) | |
| import Data.Maybe (fromMaybe) | |
| type Parser = Parsec Void Text | |
| data MListItem = MListItem MarkDownElement [MarkDownElement] | |
| deriving (Eq, Show) | |
| -- | store the Markdown elements | |
| data MarkDownElement | |
| = MHeading Int MarkDownElement | |
| | MBold MarkDownElement | |
| | MItalic MarkDownElement | |
| | MBoldItalic MarkDownElement | |
| | MLine [MarkDownElement] | |
| | MUnorderedList [MListItem] | |
| | MOrderedList [MListItem] | |
| | Only Text -- Bottom of all types | |
| deriving (Eq, Show) | |
| excludeCharacters :: [Char] | |
| excludeCharacters = | |
| [ '#' | |
| , '!' | |
| , '\n' | |
| , '_' | |
| , '*' | |
| , '[' | |
| , ']' | |
| ] | |
| parseOnly :: Parser MarkDownElement | |
| parseOnly = | |
| space >> takeWhile1P Nothing (`notElem` excludeCharacters) >>= pure . Only | |
| toHTML :: [MarkDownElement] -> Text | |
| toHTML = foldr (\c acc -> toHtmlHelper c <> acc) "" | |
| toHtmlHelper :: MarkDownElement -> Text | |
| toHtmlHelper (MLine md) = toHTML md | |
| toHtmlHelper (MUnorderedList itemList) = "<ol>" <> itemToHTML itemList <> "</ol>" | |
| toHtmlHelper (MOrderedList itemList) = "<ul>" <> itemToHTML itemList <> "</ul>" | |
| toHtmlHelper (Only t) = t | |
| toHtmlHelper (MBold x) = "<strong>" <> toHtmlHelper x <> "</strong>" | |
| toHtmlHelper (MBoldItalic x) = "<strong><em>" <> toHtmlHelper x <> "</em></strong>" | |
| toHtmlHelper (MItalic x) = "<em>" <> toHtmlHelper x <> "</em>" | |
| toHtmlHelper (MHeading n md) = | |
| let | |
| tag = "h" <> (T.pack $ show n) | |
| in "<" <> tag <> ">" | |
| <> toHtmlHelper md | |
| <> "</" <> tag <> ">" | |
| itemToHTML :: [MListItem] -> Text | |
| itemToHTML = foldr (\x acc -> "<li>" <> itemToHTML_ x <> "</li>" <> acc) "" | |
| itemToHTML_ :: MListItem -> Text | |
| itemToHTML_ (MListItem md mds) = toHtmlHelper md <> toHTML mds | |
| -- | main parser | |
| mainParser :: Parser MarkDownElement | |
| mainParser = do | |
| choice | |
| [ try parseHeading | |
| , try parseLine | |
| , try parseBoldItalic | |
| , try parseBold | |
| , try parseItalic | |
| , try parseOrderedList | |
| , try parseUnorderedList | |
| ] | |
| parseList :: [Parser MarkDownElement] | |
| parseList = | |
| [ try parseHeading | |
| , try parseBoldItalic | |
| , try parseBold | |
| , try parseItalic | |
| , try parseOrderedList | |
| , try parseUnorderedList | |
| , try parseOnly | |
| ] | |
| -- | line parser | |
| parseLine :: Parser MarkDownElement | |
| parseLine = MLine <$> some (choice parseList) | |
| parseEverythingTillStarOrUnderscore :: Parser MarkDownElement | |
| parseEverythingTillStarOrUnderscore = | |
| takeWhile1P | |
| Nothing | |
| ( \x -> | |
| (x /= '*') | |
| && (x /= '_') | |
| && (x /= '\n') | |
| ) | |
| >>= pure . Only | |
| parseListItem :: Parser a -> Parser MListItem | |
| parseListItem numberingParser = do | |
| space | |
| void numberingParser | |
| content <- takeWhile1P Nothing (/='\n') | |
| subItems <- optional $ try (some (newline >> char ' ' >> char ' ' >> parseLine)) | |
| void $ optional newline | |
| case parseMaybe parseLine content of | |
| Nothing -> return $ MListItem (Only content) $ fromMaybe [] subItems | |
| Just parsedContent -> return $ MListItem parsedContent $ fromMaybe [] subItems | |
| -- | parse ordered list | |
| parseOrderedList :: Parser MarkDownElement | |
| parseOrderedList = MOrderedList <$> some (parseListItem (char '*')) | |
| -- | parse un-ordered list | |
| parseUnorderedList :: Parser MarkDownElement | |
| parseUnorderedList = MUnorderedList <$> some (parseListItem (char '-')) | |
| parseHeading :: Parser MarkDownElement | |
| parseHeading = do | |
| space | |
| hashesCnt <- T.length <$> takeWhile1P Nothing (== '#') | |
| MHeading hashesCnt <$> parseLine | |
| parseBold :: Parser MarkDownElement | |
| parseBold = space >> parseBold_ "**" <|> parseBold_ "__" | |
| where | |
| parseBold_ str = do | |
| space | |
| void $ string str | |
| content <- lookAhead $ takeWhile1P Nothing (/='\n') | |
| let res = T.Search.indices str content | |
| case res of | |
| [] -> parseError (TrivialError 1 Nothing mempty) -- if the ending ** is not found, parseBold will fail | |
| (indexOfStarStar:_) -> do | |
| void $ takeP Nothing indexOfStarStar | |
| void $ string str | |
| let contentWithoutStarStar = T.take indexOfStarStar content | |
| case parseMaybe parseLineForBold contentWithoutStarStar of | |
| Nothing -> return $ MBold (Only contentWithoutStarStar) | |
| Just x -> return $ MBold x | |
| parseItalic :: Parser MarkDownElement | |
| parseItalic = space >> go '*' <|> go '_' | |
| where | |
| go ch = do | |
| space | |
| void $ char ch | |
| content <- lookAhead $ takeWhile1P Nothing (/='\n') | |
| let mRes = T.findIndex (==ch) content | |
| case mRes of | |
| Nothing -> parseError (TrivialError 1 Nothing mempty) -- if the ending ** is not found, parseBold will fail | |
| Just indexOfStarStar -> do | |
| void $ takeP Nothing indexOfStarStar | |
| void $ char ch | |
| let contentWithoutStarStar = T.take indexOfStarStar content | |
| case parseMaybe parseLineForBold contentWithoutStarStar of | |
| Nothing -> return $ MItalic (Only contentWithoutStarStar) | |
| Just x -> return $ MItalic x | |
| parseLineForItalic :: Parser MarkDownElement | |
| parseLineForItalic = MLine <$> some (choice parserListForItalic) | |
| parseLineForBold :: Parser MarkDownElement | |
| parseLineForBold = MLine <$> some (choice parserListForBold) | |
| parserListForItalic :: [Parser MarkDownElement] | |
| parserListForItalic = [ | |
| try parseBold | |
| , try parseUnorderedList | |
| , try parseOrderedList | |
| , try parseOnly | |
| , try parseEverythingTillStarOrUnderscore | |
| ] | |
| parserListForBold :: [Parser MarkDownElement] | |
| parserListForBold = [ | |
| try parseItalic | |
| , try parseUnorderedList | |
| , try parseOrderedList | |
| , try parseOnly | |
| , try parseEverythingTillStarOrUnderscore | |
| ] | |
| parseBoldItalic :: Parser MarkDownElement | |
| parseBoldItalic = space >> parseBold_ "***" <|> parseBold_ "___" | |
| where | |
| parseBold_ str = do | |
| space | |
| void $ string str | |
| content <- lookAhead $ takeWhile1P Nothing (/='\n') | |
| let res = T.Search.indices str content | |
| case res of | |
| [] -> parseError (TrivialError 1 Nothing mempty) -- if the ending ** is not found, parseBold will fail | |
| (indexOfStarStar:_) -> do | |
| void $ takeP Nothing indexOfStarStar | |
| void $ string str | |
| let contentWithoutStarStar = T.take indexOfStarStar content | |
| case parseMaybe parseLineForBold contentWithoutStarStar of | |
| Nothing -> return $ MBoldItalic (Only contentWithoutStarStar) | |
| Just x -> return $ MBoldItalic x | |
| main :: IO () | |
| main | |
| = do | |
| case parse mainParser "" "# Heading-1\nDescription\n# Heading-2\nMore descriptions\n" of | |
| Left _ -> putStrLn "error in parsing" | |
| Right x -> print $ toHTML [x] |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
usage:
Should print