ptitfred · May 2, 2024 14:55
diff --git a/convert-netex-tarifs.hs b/convert-netex-tarifs.hs
 #!/usr/bin/env nix-shell
 #!nix-shell --pure -i runghc -p "haskellPackages.ghcWithPackages (p: [ p.pandoc p.pretty-show ])"

 {-# OPTIONS_GHC -Wall #-}

 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}

 import           Control.Monad          ((<=<))
 import           Control.Monad.IO.Class (liftIO)
 import qualified Data.ByteString.Char8  as C8
 import           Data.Foldable          (fold, traverse_)
 import           Data.List              (delete)
 import           Data.Maybe             (fromMaybe, mapMaybe)
 import           Data.String            (IsString, fromString)
 import qualified Data.Text              as T
 import           Data.Text.Encoding     (decodeUtf8, encodeUtf8)
 import           System.FilePath        (replaceExtension, (</>))
 import           Text.Pandoc
 import           Text.Pandoc.App        (Opt (..), defaultOpts)
 import           Text.Pandoc.Builder    (Blocks, Inlines, cell, emph, fromList,
                                         header, headerWith, para, plain,
                                         singleton, spanWith, str, strong, text,
                                         toList)
 import           Text.Pandoc.MediaBag   (mediaDirectory)
 import           Text.Pandoc.MIME       (MimeType)
 import           Text.Pandoc.Walk       as Walk

 {-
 -- Requires package pretty-show
 import qualified Debug.Trace            as DT
 import qualified Text.Show.Pretty       as P

 -- `walk` and `query` can be helpful to debug the AST and what's wrong in the source.

 ppDebug :: Show a => a -> b -> b
 ppDebug = DT.traceShow . P.ppDoc
 -}

 main :: IO ()
 main =
  let source = "originaux/NF_Profil.NeTEx.pour.les.Tarifs.F.-.v1.3-nettoye.patched.docx"
      destination = "NeTEx/tarifs"
      result = destination </> "index.md"
   in runIOorExplode $ do
        -- Parse the Word document
        document <- extractMedia destination =<< parseSourceDocument source

        -- This is the main entry point
        patchedDoc <- cleanDocument destination document

        -- Final touch and output
        markdown <- renderMarkdown patchedDoc
        frontMatter <- readFrontMatter
        stylesheet <- readStylesheet
        write result (frontMatter <> markdown <> stylesheet)

        -- Lists the medias needing a conversion (not automated)
        advertiseMediaConversions

 -- https://gohugo.io/content-management/front-matter/
 readFrontMatter :: PandocIO C8.ByteString
 readFrontMatter =
  let wrap content = content <> "\n"
   in wrap <$> readFileStrict "./tarifs.yml"

 readStylesheet :: PandocMonad m => m C8.ByteString
 readStylesheet =
  let wrap content = "\n<style>\n" <> content <> "</style>\n"
   in wrap <$> readFileStrict "./tarifs.css"

 -- Document procession

 cleanDocument :: PandocMonad m => FilePath -> Pandoc -> m Pandoc
 cleanDocument destination =
  pure . wrapAnnexes <=<
  injectXMLExamples <=<
  reformatTablesCaptions .
  cleanupStyling .
  cleanupDocumentStructure destination

 -- Table captions

 reformatTablesCaptions :: PandocMonad m => Pandoc -> m Pandoc
 reformatTablesCaptions =
  let wrap = wrapDiv "table-title" . concatMap dropEmphasis
   in walkM $ \case
        Para (Str "Table" : Space : Str _nbr : Space : Str "\8211" : Space : rest) -> wrap rest
        Para (Str "Table" : Space : Str _nbr : Space : Str "\8211" :         rest) -> wrap rest
        Para (Str "Table" : Space : Str _nbr : Space : Str "\8212" :         rest) -> wrap rest
        untouched -> pure untouched

 wrapDiv :: PandocMonad m => T.Text -> [Inline] -> m Block
 wrapDiv className inner =
  let render = writeMarkdown def . Pandoc nullMeta . pure . Plain
   in do
       -- This is dirty but required to avoid wrapping the content in a <p> after rendering by hugo
        md <- render inner
        pure $
          RawBlock (Format "html") $
            "<div class='" <> className <> "'>" <> T.strip md <> "</div>"

 -- Styling

 cleanupStyling :: Pandoc -> Pandoc
 cleanupStyling =
  adjustDefinitionsStyling .
  enforceTypographyRules .
  colors

 adjustDefinitionsStyling :: Pandoc -> Pandoc
 adjustDefinitionsStyling =
  let boldenDefinition b1 [] = [b1]
      boldenDefinition b1 (b2: bs) =
        case (b1, b2) of
          (Para something, Div ( "" , [ "Definition" ] , [] ) definition) -> Header 2 nullAttr something : definition ++ bs
          _ -> b1 : b2 : bs
   in walk $ topLevel $ adjoin boldenDefinition

 -- Typographic rules

 enforceTypographyRules :: Pandoc -> Pandoc
 enforceTypographyRules = fixSpaces . fixQuotes . cleanChevrons

 fixSpaces :: Pandoc -> Pandoc
 fixSpaces =
  fixNonBreakingSpaces .
  walk (concatMap fixSpacingAroundDashes)

 fixNonBreakingSpaces :: Pandoc -> Pandoc
 fixNonBreakingSpaces =
  let glueSpaces i1 [] = [i1]
      glueSpaces i1 (i2: is) =
        case (i1, i2) of
          (Str left, Space)     -> if "\160" `T.isSuffixOf` left  then Str left  : is else Str left : Space     : is
          (Space,    Str right) -> if "\160" `T.isPrefixOf` right then Str right : is else Space    : Str right : is
          _ -> i1 : i2 : is
   in walk $ adjoin glueSpaces

 fixSpacingAroundDashes :: Inline -> [ Inline ]
 fixSpacingAroundDashes (Str "\8211") = [ Str "\8211" ]
 fixSpacingAroundDashes (Str s)
  | "\8211" `T.isPrefixOf` s = [ Str "\8211", Space, Str (T.replace "\8211" "" s) ]
  | "\8211" `T.isSuffixOf` s = [ Str (T.replace "\8211" "" s), Space, Str "\8211" ]
 fixSpacingAroundDashes untouched = [ untouched ]

 fixQuotes :: Pandoc -> Pandoc
 fixQuotes = walk (fixClosingQuotes . fixOpeningQuotes)

 fixOpeningQuotes :: Inline -> Inline
 fixOpeningQuotes (Str "\171") = Str "\171\160"
 fixOpeningQuotes (Str s)
  | "\171\160" `T.isInfixOf` s = Str s
    -- Ensure non breaking space:
  | "\171 " `T.isInfixOf` s = Str (T.replace "\171 " "\171\160" s)
  | "\171"  `T.isInfixOf` s = Str (T.replace "\171"  "\171\160" s)
 fixOpeningQuotes untouched = untouched

 fixClosingQuotes :: Inline -> Inline
 fixClosingQuotes (Str "\187") = Str "\160\187"
 fixClosingQuotes (Str s)
  | "\160\187" `T.isInfixOf` s = Str s
    -- Ensure non breaking space:
  | " \187" `T.isInfixOf` s = Str (T.replace " \187" "\160\187" s)
  |  "\187" `T.isInfixOf` s = Str (T.replace  "\187" "\160\187" s)
 fixClosingQuotes untouched = untouched

 cleanChevrons :: Pandoc -> Pandoc
 cleanChevrons = walk $ \case
  Str t -> Str
         . T.replace "&lt;" "<"
         . T.replace "&gt;" ">"
         $ t
  inline -> inline

 colors :: Pandoc -> Pandoc
 colors = adjustMarkingColors . redText

 adjustMarkingColors :: Pandoc -> Pandoc
 adjustMarkingColors = walk $ \case
  original@(Span (id_, classes, extras) inlines) ->
    if "mark" `elem` classes && isKnownBlueMarkings inlines
    then Span (id_, "mark-blue" : delete "mark" classes, extras) inlines
    else original
  original -> original

 isKnownBlueMarkings :: [Inline] -> Bool
 isKnownBlueMarkings inlines =
  fromList inlines `elem`
    [ text "bleu"
    , text "Nombe d'unités désignée."
    , str "GeneralGroupOfEntitiesRef"
    , str "Units" -- FIXME not precise enough, might hit wrong markings
    , str "xsd:decimal" -- FIXME not precise enough, might hit wrong markings
    , str "0:1" -- FIXME not precise enough, might hit wrong markings
    , text "RRefrence to GENERAL GROUP OF ENTITIES associé au Third PARTY product."
    ]

 redText :: Pandoc -> Pandoc
 redText = walk $ \case
  untouched@(Para [ Strong inlines ]) ->
    if fromList inlines == text "UNIQUEMENT UTILE SI L’ALIMENTATION D’UN SYSTÈME BILLETTIQUE EST ENVISAGEE !"
    then Para [ Span ("", ["red"], []) [ Strong inlines ] ]
    else untouched
  untouched -> untouched

 -- Document structure

 cleanupDocumentStructure :: FilePath -> Pandoc -> Pandoc
 cleanupDocumentStructure destination =
  fixTermsAndDefinitions .
  cleanImages destination .
  cleanAnnexBNumbering .
  adjoinFigureCaptions .
  walk (concatMap (toList . cleanUpSummaryTitles)) .
  dropHiddenBlocks .
  patchTables .
  dropOriginalToC .
  dropCoverPage

 patchTables :: Pandoc -> Pandoc
 patchTables =
  dropEmptyTables .
  dropEmptyRows .
  fixMissingTableHead .
  fixOrphanTable .
  dropTableColWidths

 dropEmptyTables :: Pandoc -> Pandoc
 dropEmptyTables =
  let isEmptyTableHead (TableHead _     rows) = allRowsAreEmpty rows
      isEmptyTableBody (TableBody _ _ _ rows) = allRowsAreEmpty rows
      allRowsAreEmpty = all isEmptyRow
   in walk . mapMaybe $ \case
        table@(Table _ _ _ tableHead tableBodies _) ->
          if all isEmptyTableBody tableBodies && isEmptyTableHead tableHead
          then Nothing
          else Just table
        untouched -> Just untouched

 dropEmptyRows :: Pandoc -> Pandoc
 dropEmptyRows =
  let dropEmptyRows' (TableBody attr rhc rows' rows) = TableBody attr rhc (filter (not . isEmptyRow) rows') (filter (not . isEmptyRow) rows)
   in walk $ \case
        Table attr caption colSpecs tableHead tableBodies tableFoot ->
          Table attr caption colSpecs tableHead (dropEmptyRows' <$> tableBodies) tableFoot
        untouched -> untouched

 isEmptyRow :: Row -> Bool
 isEmptyRow (Row _ cells) =
  let isEmptyInline = \case
        Plain [] -> True
        _        -> False
      isEmptyCell = \case
        Cell _ _ _ _ inlines -> all isEmptyInline inlines
      isHiddenRow cells' = cells' `elem` hardcodedHiddenRows
   in all isEmptyCell cells || isHiddenRow cells

 hardcodedHiddenRows :: [ [ Cell ] ]
 hardcodedHiddenRows =
  mkNameOnlyRow <$>
    -- Table numbers in this list are according to the source Word document
    [ -- Last row of "Table 19 – TimeDemandType – Element"
      "TimeDemandTimingsGroup"
    , -- 3rd row of "Table 32 – TimeStructureFactor – Element"
      "TariffRef"
    , -- last row of "Table 66 – SalesOfferPackage – Element"
      "salesOfferPackageSubstitutions"
    , -- 5th row of "Table 68 – SalesOfferPackageElement – Element"
      "SalesOffer-PackageRef"
    , -- last row of "Table 88 – UsageParameter – Element (abstrait)"
      "fareTables"
    ]

 mkNameOnlyRow :: T.Text -> [ Cell ]
 mkNameOnlyRow name =
  let mkCell = cell AlignDefault (RowSpan 1) (ColSpan 1)
   in mkCell <$>
        [ mempty
        , plain $ emph $ strong $ marking (str name)
        , mempty
        , mempty
        , mempty
        ]

 dropTableColWidths :: Pandoc -> Pandoc
 dropTableColWidths = walk $ \case
  Table attr caption colSpecs tableHead tableBodies tableFoot ->
    Table attr caption ((AlignDefault, ColWidthDefault) <$ colSpecs) tableHead tableBodies tableFoot
  untouched -> untouched

 fixMissingTableHead :: Pandoc -> Pandoc
 fixMissingTableHead = walk $ \case
  table@(Table attr caption colSpecs tableHead tableBodies tableFoot) ->
    let headIsEmpty (TableHead _ rows) = null rows
        tableHead' =
          case head tableBodies of
            TableBody attr' _ _ rows -> TableHead attr' (take 1 rows)
        tableBodies' =
          case head tableBodies of
            TableBody attr' rhc r' rows -> TableBody attr' rhc r' (drop 1 rows) : tail tableBodies
     in if headIsEmpty tableHead
        then Table attr caption colSpecs tableHead' tableBodies' tableFoot
        else table
  untouched -> untouched

 fixOrphanTable :: Pandoc -> Pandoc
 fixOrphanTable =
  let joinSplitTables b1 [] = [b1]
      joinSplitTables b1 (b2:bs) =
        case (b1, b2) of
          (Table attr caption colSpecs tableHead tableBodies1 _, Table _ _ _ _ tableBodies2 tableFoot) ->
            Table attr caption colSpecs tableHead (tableBodies1 <> tableBodies2) tableFoot : bs
          _                            -> b1 : b2 : bs
   in walk $ topLevel $ adjoin joinSplitTables

 dropHiddenBlocks :: Pandoc -> Pandoc
 dropHiddenBlocks =
  walk $ topLevel $ mapMaybe $ \b ->
    if b `elem` hardcodedHiddenBlocks
    then Nothing
    else Just b

 marking :: Inlines -> Inlines
 marking = spanWith ("", ["mark"], [])

 hardcodedHiddenBlocks :: Blocks
 hardcodedHiddenBlocks =
  fold
    [ para $ marking $
        text "Table 45 \8211 " <> emph (str "AccessRightInProduct") <> text " \8211 Element"
    , para $ marking $
        text "Table 109 \8211 " <> emph (str "LimitingRule") <> text " \8211 Element"
    ]

 cleanAnnexBNumbering :: Pandoc -> Pandoc
 cleanAnnexBNumbering = walk $ concatMap $ \case
  OrderedList ( 2 , UpperAlpha , DefaultDelim )
    [ untouched@[ Header 1 ( "" , [] , [] ) [ Str "Exemples" ] ] ] -> untouched
  untouched -> [ untouched ]

 cleanUpSummaryTitles :: Block -> Blocks
 cleanUpSummaryTitles b
  | isAnchored "Avant-propos"  b = para $ strong $ text "Avant-propos"
  | isAnchored "Introduction"  b = para $ strong $ text "Introduction"

  | isAnchored "Usage Parameters" b = header 1 $ text "Usage Parameters"
  | isAnchored "Exemples"         b = header 1 $ text "Exemples"
  | isAnchored "Bibliographie"    b = headerWith ("bibliographie", [], []) 1 $ text "Bibliographie"

  | otherwise = singleton b

 dropCoverPage :: Pandoc -> Pandoc
 dropCoverPage = walk $ topLevel $ dropUntil (isAnchored "Avant-propos")

 isAnchored :: T.Text -> Block -> Bool
 isAnchored textContent = \case
  Para (Span (_, ["anchor"], []) [] : content) ->
    content == toList (text textContent)
  OrderedList _ [ [ Para ( LineBreak : Span ( _ , [ "anchor" ] , [] ) [] : content) ] ] ->
    content == toList (text textContent)
  _ -> False

 dropOriginalToC :: Pandoc -> Pandoc
 dropOriginalToC =
  let isSommaireTitle b =
        b == Para [ Str "Sommaire" , Space , Str "Page" ]
      isIntroductionTitle = isAnchored "Introduction"
   in walk $ dropWindow isSommaireTitle isIntroductionTitle

 -- Wrapping annexes in a div let us properly number the annexes in the document via css
 wrapAnnexes :: Pandoc -> Pandoc
 wrapAnnexes =
  let startingPoint = \case
        Header 1 _ content -> fromList content == text "Usage Parameters"
        _                  -> False
      keepBody = takeUntil startingPoint
      cutAnnexes = dropUntil startingPoint
      doWrap blocks = [ Div ("", ["annexes"], []) blocks ]
   in walk $ topLevel $ keepBody <> doWrap . cutAnnexes

 injectXMLExamples :: PandocMonad m => Pandoc -> m Pandoc
 injectXMLExamples = walkM (topLevelM replaceSnippets)

 replaceSnippets :: PandocMonad m => [Block] -> m [Block]
 replaceSnippets =
  let replaceExamples examples = foldl (<=<) pure (replaceExample' <$> examples)
      replaceExample' (a,b,c) = replaceExample a b c
   in replaceExamples
        [ ( "6.4.1.1.md", "exemples"                     , "régle-dapplication-des-caractéristiques-qualitystructurefactor"  )
        , ( "6.4.2.1.md", "exemple"                      , "tarif-version-de-loffre-tarifaire-tariff"                        )
        , ( "6.4.3.1.md", "exemple-1"                    , "les-éléments-de-structure-de-tarification-temporelle"            )
        , ( "6.4.4.4.md", "exemple-2"                    , "les-éléments-de-structure-de-tarification-géographique"          )
        , ( "6.4.5.4.md", "exemple-3"                    , "élément-de-matrice-de-distances-distancematrixelement"           )
        , ( "6.4.5.7.md", "exemple-4"                    , "les-élément-validables-validableelement"                         )
        , ( "6.5.1.md"  , "exemple-5"                    , "les-élément-contrôlable-controllableelement"                     )
        , ( "6.7.2.md"  , "exemple-6"                    , "les-offre-à-la-vente-salespackageoffer"                          )
        , ( "6.8.1.md"  , "exemple-7"                    , "document-de-voyage"                                              )
        , ( "6.8.2.1.md", "exemples-1"                   , "distributionchannel"                                             )
        , ( "6.8.3.1.md", "exemples-2"                   , "les-droits-daccès-et-conditions-de-validité-validity-parameters" )
        , ( "6.9.2.md"  , "exemples-3"                   , "conditions-dutilisation-usage-parameter"                         )
        , ( "6.10.1.md" , "exemples-4"                   , "les-grilles-tarifaires-faretable"                                )
        , ( "6.11.2.md" , "exemples-5"                   , "les-prix"                                                        )
        , ( "6.12.1.md" , "exemple-minimal"              , "entêtes-netex"                                                   )
        , ( "B.2.md"    , "tarif-simple"                 , "tarif-minimal-ultra-simplifié"                                   )
        , ( "B.3.md"    , "tarif-minimal-ultra-simplifié", "bus-ile-de-france"                                               )
        , ( "B.4.md"    , "bus-ile-de-france"            , "tgv-paris-lille"                                                 )
        , ( "B.5.md"    , "tgv-paris-lille"              , "leman-express"                                                   )
        , ( "B.6.md"    , "leman-express"                , "tarif-kilométrique-ferré-ter"                                    )
        , ( "B.7.md"    , "tarif-kilométrique-ferré-ter" , "bibliographie"                                                        )
        ]

 replaceExample :: PandocMonad m => FilePath -> BlockTarget -> BlockTarget -> [Block] -> m [Block]
 replaceExample source startTarget endTarget content =
  let isStart = matchTarget startTarget
      isEnd   = matchTarget endTarget
   in do
        replacement <- readExample source
        pure $ replaceWindow replacement isStart isEnd content

 readExample :: PandocMonad m => FilePath -> m [Block]
 readExample path =
  let getContent (Pandoc _ bs) = bs
      opts = def
        { readerExtensions = githubMarkdownExtensions
        }
   in getContent <$> (readMarkdown opts . decodeUtf8 =<< readFileStrict ("NeTEx/tarifs/examples" </> path))

 cleanImages :: FilePath -> Pandoc -> Pandoc
 cleanImages mediaLocation = walk $ \case
  Image _attrs inlines target -> Image nullAttr inlines (cleanImageTarget mediaLocation target)
  inline -> inline

 cleanImageTarget :: FilePath -> Target -> Target
 cleanImageTarget prefix (url, title) =
  let url' = T.replace ".emf" ".png"
           $ fromMaybe url
           $ T.stripPrefix (fromString prefix <> "/") url
   in (url', title)

 fixTermsAndDefinitions :: Pandoc -> Pandoc
 fixTermsAndDefinitions =
  let joinNumberingWithTitle b1 [] = [b1]
      joinNumberingWithTitle b1 (b2:bs) =
        case (b1, b2) of
          (Para [ Str "3." ], Para i2) -> Header 2 nullAttr (walk (concatMap dropStrong) i2) : bs
          _                            -> b1 : b2 : bs
   in walk $ topLevel $ adjoin joinNumberingWithTitle

 adjoinFigureCaptions :: Pandoc -> Pandoc
 adjoinFigureCaptions =
  let fixEmphasis inlines = [ Emph (dropFigureNumber $ walk (concatMap dropEmphasis) inlines) ]
      dropFigureNumber = \case
        Str "Figure" : Space : Str _ : Space : Str "\8211" : Space : rest -> rest
        untouched -> untouched
      joinFigureWithCaption b1 [] = [b1]
      joinFigureWithCaption b1 (b2:bs) =
        case (b1, b2) of
          (Para i1@[Image {}], Para i2) -> Para (i1 <> [SoftBreak] <> fixEmphasis i2) : bs
          _ -> b1 : b2 : bs
   in walk $ adjoin joinFigureWithCaption

 -- Pandoc utilities

 parseSourceDocument :: FilePath -> PandocIO Pandoc
 parseSourceDocument = readDocx def <=< readFileLazy

 renderMarkdown :: Pandoc -> PandocIO C8.ByteString
 renderMarkdown doc =
  let opts = defaultOpts
      writerOptions = def
        { writerTabStop           = optTabStop opts
        , writerTableOfContents   = optTableOfContents opts
        , writerHTMLMathMethod    = optHTMLMathMethod opts
        , writerIncremental       = optIncremental opts
        , writerCiteMethod        = optCiteMethod opts
        , writerNumberSections    = optNumberSections opts
        , writerNumberOffset      = optNumberOffset opts
        , writerSectionDivs       = optSectionDivs opts
        , writerExtensions        = githubMarkdownExtensions
        , writerReferenceLinks    = optReferenceLinks opts
        , writerReferenceLocation = optReferenceLocation opts
        , writerDpi               = optDpi opts
        , writerWrapText          = optWrap opts
        , writerColumns           = optColumns opts
        , writerEmailObfuscation  = optEmailObfuscation opts
        , writerIdentifierPrefix  = optIdentifierPrefix opts
        , writerHtmlQTags         = optHtmlQTags opts
        , writerTopLevelDivision  = optTopLevelDivision opts
        , writerListings          = optListings opts
        , writerSlideLevel        = optSlideLevel opts
        , writerSetextHeaders     = optSetextHeaders opts
        , writerListTables        = optListTables opts
        , writerEpubSubdirectory  = T.pack $ optEpubSubdirectory opts
        , writerEpubFonts         = optEpubFonts opts
        , writerEpubTitlePage     = optEpubTitlePage opts
        , writerSplitLevel        = optSplitLevel opts
        , writerTOCDepth          = optTOCDepth opts
        , writerReferenceDoc      = optReferenceDoc opts
        , writerPreferAscii       = optAscii opts
        }
   in encodeUtf8 <$> writeCommonMark writerOptions doc

 write :: FilePath -> C8.ByteString -> PandocIO ()
 write path = liftIO . C8.writeFile path

 advertiseMediaConversions :: PandocIO ()
 advertiseMediaConversions = do
  medias <- mediaDirectory <$> getMediaBag
  liftIO $ traverse_ advertiseMediaConversion (filter isToBeConverted medias)

 isToBeConverted :: (FilePath, MimeType, Int) -> Bool
 isToBeConverted (_, mt, _) = mt == "image/x-emf"

 advertiseMediaConversion :: (FilePath, MimeType, Int) -> IO ()
 advertiseMediaConversion (file, mt, _) =
  case mt of
    "image/x-emf" -> let convertedFile = replaceExtension file ".png"
                      in print $ file <> " -> " <> convertedFile
    _             -> pure ()

 -- Pandoc AST utilities

 -- Utility to potentially merge 2 following items in a list
 adjoin :: (item -> [item] -> [item]) -> [item] -> [item]
 adjoin f = foldr f []

 -- Restrict walking to the top level (to be used with `walk`).
 topLevel :: ([Block] -> [Block]) -> Pandoc -> Pandoc
 topLevel f (Pandoc meta blocks) = Pandoc meta (f blocks)

 -- Monadic variant of `topLevel` (to be used with `walkM`).
 topLevelM :: Functor m => ([Block] -> m [Block]) -> Pandoc -> m Pandoc
 topLevelM f (Pandoc meta blocks) = Pandoc meta <$> f blocks

 data BlockTarget = HasId T.Text | IsHeader Int

 instance IsString BlockTarget where
  fromString = HasId . fromString

 matchTarget :: BlockTarget -> Block -> Bool
 matchTarget = \case
  HasId    i   -> hasId i
  IsHeader lvl -> isHeaderOfLevel lvl

 hasId :: T.Text -> Block -> Bool
 hasId expected = \case
  Header _ (observed, _, _) _ -> observed == expected
  _                           -> False

 isHeaderOfLevel :: Int -> Block -> Bool
 isHeaderOfLevel expected = \case
  Header observed _ _ -> observed == expected
  _ -> False

 dropWindow :: (a -> Bool) -> (a -> Bool) -> [a] -> [a]
 dropWindow = replaceWindow []

 replaceWindow :: [a] -> (a -> Bool) -> (a -> Bool) -> [a] -> [a]
 replaceWindow replacement isStart isEndExcluded =
  let cutEnd = dropUntil isEndExcluded . dropUntil isStart
      cutStart = takeUntil isStart
   in cutStart <> const replacement <> cutEnd

 dropEmphasis :: Inline -> [Inline]
 dropEmphasis (Emph inlines) = inlines
 dropEmphasis inline         = pure inline

 dropStrong :: Inline -> [ Inline ]
 dropStrong = \case
  Strong inlines -> inlines
  inline -> [ inline ]

 dropUntil :: (a -> Bool) -> [a] -> [a]
 dropUntil p = dropWhile (not . p)

 takeUntil :: (a -> Bool) -> [a] -> [a]
 takeUntil p = takeWhile (not . p)
	#!/usr/bin/env nix-shell
	#!nix-shell --pure -i runghc -p "haskellPackages.ghcWithPackages (p: [ p.pandoc p.pretty-show ])"

	{-# OPTIONS_GHC -Wall #-}

	{-# LANGUAGE LambdaCase #-}
	{-# LANGUAGE OverloadedStrings #-}

	import Control.Monad ((<=<))
	import Control.Monad.IO.Class (liftIO)
	import qualified Data.ByteString.Char8 as C8
	import Data.Foldable (fold, traverse_)
	import Data.List (delete)
	import Data.Maybe (fromMaybe, mapMaybe)
	import Data.String (IsString, fromString)
	import qualified Data.Text as T
	import Data.Text.Encoding (decodeUtf8, encodeUtf8)
	import System.FilePath (replaceExtension, (</>))
	import Text.Pandoc
	import Text.Pandoc.App (Opt (..), defaultOpts)
	import Text.Pandoc.Builder (Blocks, Inlines, cell, emph, fromList,
	header, headerWith, para, plain,
	singleton, spanWith, str, strong, text,
	toList)
	import Text.Pandoc.MediaBag (mediaDirectory)
	import Text.Pandoc.MIME (MimeType)
	import Text.Pandoc.Walk as Walk

	{-
	-- Requires package pretty-show
	import qualified Debug.Trace as DT
	import qualified Text.Show.Pretty as P

	-- `walk` and `query` can be helpful to debug the AST and what's wrong in the source.

	ppDebug :: Show a => a -> b -> b
	ppDebug = DT.traceShow . P.ppDoc
	-}

	main :: IO ()
	main =
	let source = "originaux/NF_Profil.NeTEx.pour.les.Tarifs.F.-.v1.3-nettoye.patched.docx"
	destination = "NeTEx/tarifs"
	result = destination </> "index.md"
	in runIOorExplode $ do
	-- Parse the Word document
	document <- extractMedia destination =<< parseSourceDocument source

	-- This is the main entry point
	patchedDoc <- cleanDocument destination document

	-- Final touch and output
	markdown <- renderMarkdown patchedDoc
	frontMatter <- readFrontMatter
	stylesheet <- readStylesheet
	write result (frontMatter <> markdown <> stylesheet)

	-- Lists the medias needing a conversion (not automated)
	advertiseMediaConversions

	-- https://gohugo.io/content-management/front-matter/
	readFrontMatter :: PandocIO C8.ByteString
	readFrontMatter =
	let wrap content = content <> "\n"
	in wrap <$> readFileStrict "./tarifs.yml"

	readStylesheet :: PandocMonad m => m C8.ByteString
	readStylesheet =
	let wrap content = "\n<style>\n" <> content <> "</style>\n"
	in wrap <$> readFileStrict "./tarifs.css"

	-- Document procession

	cleanDocument :: PandocMonad m => FilePath -> Pandoc -> m Pandoc
	cleanDocument destination =
	pure . wrapAnnexes <=<
	injectXMLExamples <=<
	reformatTablesCaptions .
	cleanupStyling .
	cleanupDocumentStructure destination

	-- Table captions

	reformatTablesCaptions :: PandocMonad m => Pandoc -> m Pandoc
	reformatTablesCaptions =
	let wrap = wrapDiv "table-title" . concatMap dropEmphasis
	in walkM $ \case
	Para (Str "Table" : Space : Str _nbr : Space : Str "\8211" : Space : rest) -> wrap rest
	Para (Str "Table" : Space : Str _nbr : Space : Str "\8211" : rest) -> wrap rest
	Para (Str "Table" : Space : Str _nbr : Space : Str "\8212" : rest) -> wrap rest
	untouched -> pure untouched

	wrapDiv :: PandocMonad m => T.Text -> [Inline] -> m Block
	wrapDiv className inner =
	let render = writeMarkdown def . Pandoc nullMeta . pure . Plain
	in do
	-- This is dirty but required to avoid wrapping the content in a <p> after rendering by hugo
	md <- render inner
	pure $
	RawBlock (Format "html") $
	"<div class='" <> className <> "'>" <> T.strip md <> "</div>"

	-- Styling

	cleanupStyling :: Pandoc -> Pandoc
	cleanupStyling =
	adjustDefinitionsStyling .
	enforceTypographyRules .
	colors

	adjustDefinitionsStyling :: Pandoc -> Pandoc
	adjustDefinitionsStyling =
	let boldenDefinition b1 [] = [b1]
	boldenDefinition b1 (b2: bs) =
	case (b1, b2) of
	(Para something, Div ( "" , [ "Definition" ] , [] ) definition) -> Header 2 nullAttr something : definition ++ bs
	_ -> b1 : b2 : bs
	in walk $ topLevel $ adjoin boldenDefinition

	-- Typographic rules

	enforceTypographyRules :: Pandoc -> Pandoc
	enforceTypographyRules = fixSpaces . fixQuotes . cleanChevrons

	fixSpaces :: Pandoc -> Pandoc
	fixSpaces =
	fixNonBreakingSpaces .
	walk (concatMap fixSpacingAroundDashes)

	fixNonBreakingSpaces :: Pandoc -> Pandoc
	fixNonBreakingSpaces =
	let glueSpaces i1 [] = [i1]
	glueSpaces i1 (i2: is) =
	case (i1, i2) of
	(Str left, Space) -> if "\160" `T.isSuffixOf` left then Str left : is else Str left : Space : is
	(Space, Str right) -> if "\160" `T.isPrefixOf` right then Str right : is else Space : Str right : is
	_ -> i1 : i2 : is
	in walk $ adjoin glueSpaces

	fixSpacingAroundDashes :: Inline -> [ Inline ]
	fixSpacingAroundDashes (Str "\8211") = [ Str "\8211" ]
	fixSpacingAroundDashes (Str s)
	\| "\8211" `T.isPrefixOf` s = [ Str "\8211", Space, Str (T.replace "\8211" "" s) ]
	\| "\8211" `T.isSuffixOf` s = [ Str (T.replace "\8211" "" s), Space, Str "\8211" ]
	fixSpacingAroundDashes untouched = [ untouched ]

	fixQuotes :: Pandoc -> Pandoc
	fixQuotes = walk (fixClosingQuotes . fixOpeningQuotes)

	fixOpeningQuotes :: Inline -> Inline
	fixOpeningQuotes (Str "\171") = Str "\171\160"
	fixOpeningQuotes (Str s)
	\| "\171\160" `T.isInfixOf` s = Str s
	-- Ensure non breaking space:
	\| "\171 " `T.isInfixOf` s = Str (T.replace "\171 " "\171\160" s)
	\| "\171" `T.isInfixOf` s = Str (T.replace "\171" "\171\160" s)
	fixOpeningQuotes untouched = untouched

	fixClosingQuotes :: Inline -> Inline
	fixClosingQuotes (Str "\187") = Str "\160\187"
	fixClosingQuotes (Str s)
	\| "\160\187" `T.isInfixOf` s = Str s
	-- Ensure non breaking space:
	\| " \187" `T.isInfixOf` s = Str (T.replace " \187" "\160\187" s)
	\| "\187" `T.isInfixOf` s = Str (T.replace "\187" "\160\187" s)
	fixClosingQuotes untouched = untouched

	cleanChevrons :: Pandoc -> Pandoc
	cleanChevrons = walk $ \case
	Str t -> Str
	. T.replace "<" "<"
	. T.replace ">" ">"
	$ t
	inline -> inline

	colors :: Pandoc -> Pandoc
	colors = adjustMarkingColors . redText

	adjustMarkingColors :: Pandoc -> Pandoc
	adjustMarkingColors = walk $ \case
	original@(Span (id_, classes, extras) inlines) ->
	if "mark" `elem` classes && isKnownBlueMarkings inlines
	then Span (id_, "mark-blue" : delete "mark" classes, extras) inlines
	else original
	original -> original

	isKnownBlueMarkings :: [Inline] -> Bool
	isKnownBlueMarkings inlines =
	fromList inlines `elem`
	[ text "bleu"
	, text "Nombe d'unités désignée."
	, str "GeneralGroupOfEntitiesRef"
	, str "Units" -- FIXME not precise enough, might hit wrong markings
	, str "xsd:decimal" -- FIXME not precise enough, might hit wrong markings
	, str "0:1" -- FIXME not precise enough, might hit wrong markings
	, text "RRefrence to GENERAL GROUP OF ENTITIES associé au Third PARTY product."
	]

	redText :: Pandoc -> Pandoc
	redText = walk $ \case
	untouched@(Para [ Strong inlines ]) ->
	if fromList inlines == text "UNIQUEMENT UTILE SI L’ALIMENTATION D’UN SYSTÈME BILLETTIQUE EST ENVISAGEE !"
	then Para [ Span ("", ["red"], []) [ Strong inlines ] ]
	else untouched
	untouched -> untouched

	-- Document structure

	cleanupDocumentStructure :: FilePath -> Pandoc -> Pandoc
	cleanupDocumentStructure destination =
	fixTermsAndDefinitions .
	cleanImages destination .
	cleanAnnexBNumbering .
	adjoinFigureCaptions .
	walk (concatMap (toList . cleanUpSummaryTitles)) .
	dropHiddenBlocks .
	patchTables .
	dropOriginalToC .
	dropCoverPage

	patchTables :: Pandoc -> Pandoc
	patchTables =
	dropEmptyTables .
	dropEmptyRows .
	fixMissingTableHead .
	fixOrphanTable .
	dropTableColWidths

	dropEmptyTables :: Pandoc -> Pandoc
	dropEmptyTables =
	let isEmptyTableHead (TableHead _ rows) = allRowsAreEmpty rows
	isEmptyTableBody (TableBody _ _ _ rows) = allRowsAreEmpty rows
	allRowsAreEmpty = all isEmptyRow
	in walk . mapMaybe $ \case
	table@(Table _ _ _ tableHead tableBodies _) ->
	if all isEmptyTableBody tableBodies && isEmptyTableHead tableHead
	then Nothing
	else Just table
	untouched -> Just untouched

	dropEmptyRows :: Pandoc -> Pandoc
	dropEmptyRows =
	let dropEmptyRows' (TableBody attr rhc rows' rows) = TableBody attr rhc (filter (not . isEmptyRow) rows') (filter (not . isEmptyRow) rows)
	in walk $ \case
	Table attr caption colSpecs tableHead tableBodies tableFoot ->
	Table attr caption colSpecs tableHead (dropEmptyRows' <$> tableBodies) tableFoot
	untouched -> untouched

	isEmptyRow :: Row -> Bool
	isEmptyRow (Row _ cells) =
	let isEmptyInline = \case
	Plain [] -> True
	_ -> False
	isEmptyCell = \case
	Cell _ _ _ _ inlines -> all isEmptyInline inlines
	isHiddenRow cells' = cells' `elem` hardcodedHiddenRows
	in all isEmptyCell cells \|\| isHiddenRow cells

	hardcodedHiddenRows :: [ [ Cell ] ]
	hardcodedHiddenRows =
	mkNameOnlyRow <$>
	-- Table numbers in this list are according to the source Word document
	[ -- Last row of "Table 19 – TimeDemandType – Element"
	"TimeDemandTimingsGroup"
	, -- 3rd row of "Table 32 – TimeStructureFactor – Element"
	"TariffRef"
	, -- last row of "Table 66 – SalesOfferPackage – Element"
	"salesOfferPackageSubstitutions"
	, -- 5th row of "Table 68 – SalesOfferPackageElement – Element"
	"SalesOffer-PackageRef"
	, -- last row of "Table 88 – UsageParameter – Element (abstrait)"
	"fareTables"
	]

	mkNameOnlyRow :: T.Text -> [ Cell ]
	mkNameOnlyRow name =
	let mkCell = cell AlignDefault (RowSpan 1) (ColSpan 1)
	in mkCell <$>
	[ mempty
	, plain $ emph $ strong $ marking (str name)
	, mempty
	, mempty
	, mempty
	]

	dropTableColWidths :: Pandoc -> Pandoc
	dropTableColWidths = walk $ \case
	Table attr caption colSpecs tableHead tableBodies tableFoot ->
	Table attr caption ((AlignDefault, ColWidthDefault) <$ colSpecs) tableHead tableBodies tableFoot
	untouched -> untouched

	fixMissingTableHead :: Pandoc -> Pandoc
	fixMissingTableHead = walk $ \case
	table@(Table attr caption colSpecs tableHead tableBodies tableFoot) ->
	let headIsEmpty (TableHead _ rows) = null rows
	tableHead' =
	case head tableBodies of
	TableBody attr' _ _ rows -> TableHead attr' (take 1 rows)
	tableBodies' =
	case head tableBodies of
	TableBody attr' rhc r' rows -> TableBody attr' rhc r' (drop 1 rows) : tail tableBodies
	in if headIsEmpty tableHead
	then Table attr caption colSpecs tableHead' tableBodies' tableFoot
	else table
	untouched -> untouched

	fixOrphanTable :: Pandoc -> Pandoc
	fixOrphanTable =
	let joinSplitTables b1 [] = [b1]
	joinSplitTables b1 (b2:bs) =
	case (b1, b2) of
	(Table attr caption colSpecs tableHead tableBodies1 _, Table _ _ _ _ tableBodies2 tableFoot) ->
	Table attr caption colSpecs tableHead (tableBodies1 <> tableBodies2) tableFoot : bs
	_ -> b1 : b2 : bs
	in walk $ topLevel $ adjoin joinSplitTables

	dropHiddenBlocks :: Pandoc -> Pandoc
	dropHiddenBlocks =
	walk $ topLevel $ mapMaybe $ \b ->
	if b `elem` hardcodedHiddenBlocks
	then Nothing
	else Just b

	marking :: Inlines -> Inlines
	marking = spanWith ("", ["mark"], [])

	hardcodedHiddenBlocks :: Blocks
	hardcodedHiddenBlocks =
	fold
	[ para $ marking $
	text "Table 45 \8211 " <> emph (str "AccessRightInProduct") <> text " \8211 Element"
	, para $ marking $
	text "Table 109 \8211 " <> emph (str "LimitingRule") <> text " \8211 Element"
	]

	cleanAnnexBNumbering :: Pandoc -> Pandoc
	cleanAnnexBNumbering = walk $ concatMap $ \case
	OrderedList ( 2 , UpperAlpha , DefaultDelim )
	[ untouched@[ Header 1 ( "" , [] , [] ) [ Str "Exemples" ] ] ] -> untouched
	untouched -> [ untouched ]

	cleanUpSummaryTitles :: Block -> Blocks
	cleanUpSummaryTitles b
	\| isAnchored "Avant-propos" b = para $ strong $ text "Avant-propos"
	\| isAnchored "Introduction" b = para $ strong $ text "Introduction"

	\| isAnchored "Usage Parameters" b = header 1 $ text "Usage Parameters"
	\| isAnchored "Exemples" b = header 1 $ text "Exemples"
	\| isAnchored "Bibliographie" b = headerWith ("bibliographie", [], []) 1 $ text "Bibliographie"

	\| otherwise = singleton b

	dropCoverPage :: Pandoc -> Pandoc
	dropCoverPage = walk $ topLevel $ dropUntil (isAnchored "Avant-propos")

	isAnchored :: T.Text -> Block -> Bool
	isAnchored textContent = \case
	Para (Span (_, ["anchor"], []) [] : content) ->
	content == toList (text textContent)
	OrderedList _ [ [ Para ( LineBreak : Span ( _ , [ "anchor" ] , [] ) [] : content) ] ] ->
	content == toList (text textContent)
	_ -> False

	dropOriginalToC :: Pandoc -> Pandoc
	dropOriginalToC =
	let isSommaireTitle b =
	b == Para [ Str "Sommaire" , Space , Str "Page" ]
	isIntroductionTitle = isAnchored "Introduction"
	in walk $ dropWindow isSommaireTitle isIntroductionTitle

	-- Wrapping annexes in a div let us properly number the annexes in the document via css
	wrapAnnexes :: Pandoc -> Pandoc
	wrapAnnexes =
	let startingPoint = \case
	Header 1 _ content -> fromList content == text "Usage Parameters"
	_ -> False
	keepBody = takeUntil startingPoint
	cutAnnexes = dropUntil startingPoint
	doWrap blocks = [ Div ("", ["annexes"], []) blocks ]
	in walk $ topLevel $ keepBody <> doWrap . cutAnnexes

	injectXMLExamples :: PandocMonad m => Pandoc -> m Pandoc
	injectXMLExamples = walkM (topLevelM replaceSnippets)

	replaceSnippets :: PandocMonad m => [Block] -> m [Block]
	replaceSnippets =
	let replaceExamples examples = foldl (<=<) pure (replaceExample' <$> examples)
	replaceExample' (a,b,c) = replaceExample a b c
	in replaceExamples
	[ ( "6.4.1.1.md", "exemples" , "régle-dapplication-des-caractéristiques-qualitystructurefactor" )
	, ( "6.4.2.1.md", "exemple" , "tarif-version-de-loffre-tarifaire-tariff" )
	, ( "6.4.3.1.md", "exemple-1" , "les-éléments-de-structure-de-tarification-temporelle" )
	, ( "6.4.4.4.md", "exemple-2" , "les-éléments-de-structure-de-tarification-géographique" )
	, ( "6.4.5.4.md", "exemple-3" , "élément-de-matrice-de-distances-distancematrixelement" )
	, ( "6.4.5.7.md", "exemple-4" , "les-élément-validables-validableelement" )
	, ( "6.5.1.md" , "exemple-5" , "les-élément-contrôlable-controllableelement" )
	, ( "6.7.2.md" , "exemple-6" , "les-offre-à-la-vente-salespackageoffer" )
	, ( "6.8.1.md" , "exemple-7" , "document-de-voyage" )
	, ( "6.8.2.1.md", "exemples-1" , "distributionchannel" )
	, ( "6.8.3.1.md", "exemples-2" , "les-droits-daccès-et-conditions-de-validité-validity-parameters" )
	, ( "6.9.2.md" , "exemples-3" , "conditions-dutilisation-usage-parameter" )
	, ( "6.10.1.md" , "exemples-4" , "les-grilles-tarifaires-faretable" )
	, ( "6.11.2.md" , "exemples-5" , "les-prix" )
	, ( "6.12.1.md" , "exemple-minimal" , "entêtes-netex" )
	, ( "B.2.md" , "tarif-simple" , "tarif-minimal-ultra-simplifié" )
	, ( "B.3.md" , "tarif-minimal-ultra-simplifié", "bus-ile-de-france" )
	, ( "B.4.md" , "bus-ile-de-france" , "tgv-paris-lille" )
	, ( "B.5.md" , "tgv-paris-lille" , "leman-express" )
	, ( "B.6.md" , "leman-express" , "tarif-kilométrique-ferré-ter" )
	, ( "B.7.md" , "tarif-kilométrique-ferré-ter" , "bibliographie" )
	]

	replaceExample :: PandocMonad m => FilePath -> BlockTarget -> BlockTarget -> [Block] -> m [Block]
	replaceExample source startTarget endTarget content =
	let isStart = matchTarget startTarget
	isEnd = matchTarget endTarget
	in do
	replacement <- readExample source
	pure $ replaceWindow replacement isStart isEnd content

	readExample :: PandocMonad m => FilePath -> m [Block]
	readExample path =
	let getContent (Pandoc _ bs) = bs
	opts = def
	{ readerExtensions = githubMarkdownExtensions
	}
	in getContent <$> (readMarkdown opts . decodeUtf8 =<< readFileStrict ("NeTEx/tarifs/examples" </> path))

	cleanImages :: FilePath -> Pandoc -> Pandoc
	cleanImages mediaLocation = walk $ \case
	Image _attrs inlines target -> Image nullAttr inlines (cleanImageTarget mediaLocation target)
	inline -> inline

	cleanImageTarget :: FilePath -> Target -> Target
	cleanImageTarget prefix (url, title) =
	let url' = T.replace ".emf" ".png"
	$ fromMaybe url
	$ T.stripPrefix (fromString prefix <> "/") url
	in (url', title)

	fixTermsAndDefinitions :: Pandoc -> Pandoc
	fixTermsAndDefinitions =
	let joinNumberingWithTitle b1 [] = [b1]
	joinNumberingWithTitle b1 (b2:bs) =
	case (b1, b2) of
	(Para [ Str "3." ], Para i2) -> Header 2 nullAttr (walk (concatMap dropStrong) i2) : bs
	_ -> b1 : b2 : bs
	in walk $ topLevel $ adjoin joinNumberingWithTitle

	adjoinFigureCaptions :: Pandoc -> Pandoc
	adjoinFigureCaptions =
	let fixEmphasis inlines = [ Emph (dropFigureNumber $ walk (concatMap dropEmphasis) inlines) ]
	dropFigureNumber = \case
	Str "Figure" : Space : Str _ : Space : Str "\8211" : Space : rest -> rest
	untouched -> untouched
	joinFigureWithCaption b1 [] = [b1]
	joinFigureWithCaption b1 (b2:bs) =
	case (b1, b2) of
	(Para i1@[Image {}], Para i2) -> Para (i1 <> [SoftBreak] <> fixEmphasis i2) : bs
	_ -> b1 : b2 : bs
	in walk $ adjoin joinFigureWithCaption

	-- Pandoc utilities

	parseSourceDocument :: FilePath -> PandocIO Pandoc
	parseSourceDocument = readDocx def <=< readFileLazy

	renderMarkdown :: Pandoc -> PandocIO C8.ByteString
	renderMarkdown doc =
	let opts = defaultOpts
	writerOptions = def
	{ writerTabStop = optTabStop opts
	, writerTableOfContents = optTableOfContents opts
	, writerHTMLMathMethod = optHTMLMathMethod opts
	, writerIncremental = optIncremental opts
	, writerCiteMethod = optCiteMethod opts
	, writerNumberSections = optNumberSections opts
	, writerNumberOffset = optNumberOffset opts
	, writerSectionDivs = optSectionDivs opts
	, writerExtensions = githubMarkdownExtensions
	, writerReferenceLinks = optReferenceLinks opts
	, writerReferenceLocation = optReferenceLocation opts
	, writerDpi = optDpi opts
	, writerWrapText = optWrap opts
	, writerColumns = optColumns opts
	, writerEmailObfuscation = optEmailObfuscation opts
	, writerIdentifierPrefix = optIdentifierPrefix opts
	, writerHtmlQTags = optHtmlQTags opts
	, writerTopLevelDivision = optTopLevelDivision opts
	, writerListings = optListings opts
	, writerSlideLevel = optSlideLevel opts
	, writerSetextHeaders = optSetextHeaders opts
	, writerListTables = optListTables opts
	, writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts
	, writerEpubFonts = optEpubFonts opts
	, writerEpubTitlePage = optEpubTitlePage opts
	, writerSplitLevel = optSplitLevel opts
	, writerTOCDepth = optTOCDepth opts
	, writerReferenceDoc = optReferenceDoc opts
	, writerPreferAscii = optAscii opts
	}
	in encodeUtf8 <$> writeCommonMark writerOptions doc

	write :: FilePath -> C8.ByteString -> PandocIO ()
	write path = liftIO . C8.writeFile path

	advertiseMediaConversions :: PandocIO ()
	advertiseMediaConversions = do
	medias <- mediaDirectory <$> getMediaBag
	liftIO $ traverse_ advertiseMediaConversion (filter isToBeConverted medias)

	isToBeConverted :: (FilePath, MimeType, Int) -> Bool
	isToBeConverted (_, mt, _) = mt == "image/x-emf"

	advertiseMediaConversion :: (FilePath, MimeType, Int) -> IO ()
	advertiseMediaConversion (file, mt, _) =
	case mt of
	"image/x-emf" -> let convertedFile = replaceExtension file ".png"
	in print $ file <> " -> " <> convertedFile
	_ -> pure ()

	-- Pandoc AST utilities

	-- Utility to potentially merge 2 following items in a list
	adjoin :: (item -> [item] -> [item]) -> [item] -> [item]
	adjoin f = foldr f []

	-- Restrict walking to the top level (to be used with `walk`).
	topLevel :: ([Block] -> [Block]) -> Pandoc -> Pandoc
	topLevel f (Pandoc meta blocks) = Pandoc meta (f blocks)

	-- Monadic variant of `topLevel` (to be used with `walkM`).
	topLevelM :: Functor m => ([Block] -> m [Block]) -> Pandoc -> m Pandoc
	topLevelM f (Pandoc meta blocks) = Pandoc meta <$> f blocks

	data BlockTarget = HasId T.Text \| IsHeader Int

	instance IsString BlockTarget where
	fromString = HasId . fromString

	matchTarget :: BlockTarget -> Block -> Bool
	matchTarget = \case
	HasId i -> hasId i
	IsHeader lvl -> isHeaderOfLevel lvl

	hasId :: T.Text -> Block -> Bool
	hasId expected = \case
	Header _ (observed, _, _) _ -> observed == expected
	_ -> False

	isHeaderOfLevel :: Int -> Block -> Bool
	isHeaderOfLevel expected = \case
	Header observed _ _ -> observed == expected
	_ -> False

	dropWindow :: (a -> Bool) -> (a -> Bool) -> [a] -> [a]
	dropWindow = replaceWindow []

	replaceWindow :: [a] -> (a -> Bool) -> (a -> Bool) -> [a] -> [a]
	replaceWindow replacement isStart isEndExcluded =
	let cutEnd = dropUntil isEndExcluded . dropUntil isStart
	cutStart = takeUntil isStart
	in cutStart <> const replacement <> cutEnd

	dropEmphasis :: Inline -> [Inline]
	dropEmphasis (Emph inlines) = inlines
	dropEmphasis inline = pure inline

	dropStrong :: Inline -> [ Inline ]
	dropStrong = \case
	Strong inlines -> inlines
	inline -> [ inline ]

	dropUntil :: (a -> Bool) -> [a] -> [a]
	dropUntil p = dropWhile (not . p)

	takeUntil :: (a -> Bool) -> [a] -> [a]
	takeUntil p = takeWhile (not . p)