Created
May 2, 2024 14:55
-
-
Save ptitfred/be104d1d87292f74cc2ddd10c1e46775 to your computer and use it in GitHub Desktop.
Pandoc script to transform NeTEx tarifs docx to a suitable markdown file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env nix-shell | |
#!nix-shell --pure -i runghc -p "haskellPackages.ghcWithPackages (p: [ p.pandoc p.pretty-show ])" | |
{-# OPTIONS_GHC -Wall #-} | |
{-# LANGUAGE LambdaCase #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
import Control.Monad ((<=<)) | |
import Control.Monad.IO.Class (liftIO) | |
import qualified Data.ByteString.Char8 as C8 | |
import Data.Foldable (fold, traverse_) | |
import Data.List (delete) | |
import Data.Maybe (fromMaybe, mapMaybe) | |
import Data.String (IsString, fromString) | |
import qualified Data.Text as T | |
import Data.Text.Encoding (decodeUtf8, encodeUtf8) | |
import System.FilePath (replaceExtension, (</>)) | |
import Text.Pandoc | |
import Text.Pandoc.App (Opt (..), defaultOpts) | |
import Text.Pandoc.Builder (Blocks, Inlines, cell, emph, fromList, | |
header, headerWith, para, plain, | |
singleton, spanWith, str, strong, text, | |
toList) | |
import Text.Pandoc.MediaBag (mediaDirectory) | |
import Text.Pandoc.MIME (MimeType) | |
import Text.Pandoc.Walk as Walk | |
{- | |
-- Requires package pretty-show | |
import qualified Debug.Trace as DT | |
import qualified Text.Show.Pretty as P | |
-- `walk` and `query` can be helpful to debug the AST and what's wrong in the source. | |
ppDebug :: Show a => a -> b -> b | |
ppDebug = DT.traceShow . P.ppDoc | |
-} | |
main :: IO () | |
main = | |
let source = "originaux/NF_Profil.NeTEx.pour.les.Tarifs.F.-.v1.3-nettoye.patched.docx" | |
destination = "NeTEx/tarifs" | |
result = destination </> "index.md" | |
in runIOorExplode $ do | |
-- Parse the Word document | |
document <- extractMedia destination =<< parseSourceDocument source | |
-- This is the main entry point | |
patchedDoc <- cleanDocument destination document | |
-- Final touch and output | |
markdown <- renderMarkdown patchedDoc | |
frontMatter <- readFrontMatter | |
stylesheet <- readStylesheet | |
write result (frontMatter <> markdown <> stylesheet) | |
-- Lists the medias needing a conversion (not automated) | |
advertiseMediaConversions | |
-- https://gohugo.io/content-management/front-matter/ | |
readFrontMatter :: PandocIO C8.ByteString | |
readFrontMatter = | |
let wrap content = content <> "\n" | |
in wrap <$> readFileStrict "./tarifs.yml" | |
readStylesheet :: PandocMonad m => m C8.ByteString | |
readStylesheet = | |
let wrap content = "\n<style>\n" <> content <> "</style>\n" | |
in wrap <$> readFileStrict "./tarifs.css" | |
-- Document procession | |
cleanDocument :: PandocMonad m => FilePath -> Pandoc -> m Pandoc | |
cleanDocument destination = | |
pure . wrapAnnexes <=< | |
injectXMLExamples <=< | |
reformatTablesCaptions . | |
cleanupStyling . | |
cleanupDocumentStructure destination | |
-- Table captions | |
reformatTablesCaptions :: PandocMonad m => Pandoc -> m Pandoc | |
reformatTablesCaptions = | |
let wrap = wrapDiv "table-title" . concatMap dropEmphasis | |
in walkM $ \case | |
Para (Str "Table" : Space : Str _nbr : Space : Str "\8211" : Space : rest) -> wrap rest | |
Para (Str "Table" : Space : Str _nbr : Space : Str "\8211" : rest) -> wrap rest | |
Para (Str "Table" : Space : Str _nbr : Space : Str "\8212" : rest) -> wrap rest | |
untouched -> pure untouched | |
wrapDiv :: PandocMonad m => T.Text -> [Inline] -> m Block | |
wrapDiv className inner = | |
let render = writeMarkdown def . Pandoc nullMeta . pure . Plain | |
in do | |
-- This is dirty but required to avoid wrapping the content in a <p> after rendering by hugo | |
md <- render inner | |
pure $ | |
RawBlock (Format "html") $ | |
"<div class='" <> className <> "'>" <> T.strip md <> "</div>" | |
-- Styling | |
cleanupStyling :: Pandoc -> Pandoc | |
cleanupStyling = | |
adjustDefinitionsStyling . | |
enforceTypographyRules . | |
colors | |
adjustDefinitionsStyling :: Pandoc -> Pandoc | |
adjustDefinitionsStyling = | |
let boldenDefinition b1 [] = [b1] | |
boldenDefinition b1 (b2: bs) = | |
case (b1, b2) of | |
(Para something, Div ( "" , [ "Definition" ] , [] ) definition) -> Header 2 nullAttr something : definition ++ bs | |
_ -> b1 : b2 : bs | |
in walk $ topLevel $ adjoin boldenDefinition | |
-- Typographic rules | |
enforceTypographyRules :: Pandoc -> Pandoc | |
enforceTypographyRules = fixSpaces . fixQuotes . cleanChevrons | |
fixSpaces :: Pandoc -> Pandoc | |
fixSpaces = | |
fixNonBreakingSpaces . | |
walk (concatMap fixSpacingAroundDashes) | |
fixNonBreakingSpaces :: Pandoc -> Pandoc | |
fixNonBreakingSpaces = | |
let glueSpaces i1 [] = [i1] | |
glueSpaces i1 (i2: is) = | |
case (i1, i2) of | |
(Str left, Space) -> if "\160" `T.isSuffixOf` left then Str left : is else Str left : Space : is | |
(Space, Str right) -> if "\160" `T.isPrefixOf` right then Str right : is else Space : Str right : is | |
_ -> i1 : i2 : is | |
in walk $ adjoin glueSpaces | |
fixSpacingAroundDashes :: Inline -> [ Inline ] | |
fixSpacingAroundDashes (Str "\8211") = [ Str "\8211" ] | |
fixSpacingAroundDashes (Str s) | |
| "\8211" `T.isPrefixOf` s = [ Str "\8211", Space, Str (T.replace "\8211" "" s) ] | |
| "\8211" `T.isSuffixOf` s = [ Str (T.replace "\8211" "" s), Space, Str "\8211" ] | |
fixSpacingAroundDashes untouched = [ untouched ] | |
fixQuotes :: Pandoc -> Pandoc | |
fixQuotes = walk (fixClosingQuotes . fixOpeningQuotes) | |
fixOpeningQuotes :: Inline -> Inline | |
fixOpeningQuotes (Str "\171") = Str "\171\160" | |
fixOpeningQuotes (Str s) | |
| "\171\160" `T.isInfixOf` s = Str s | |
-- Ensure non breaking space: | |
| "\171 " `T.isInfixOf` s = Str (T.replace "\171 " "\171\160" s) | |
| "\171" `T.isInfixOf` s = Str (T.replace "\171" "\171\160" s) | |
fixOpeningQuotes untouched = untouched | |
fixClosingQuotes :: Inline -> Inline | |
fixClosingQuotes (Str "\187") = Str "\160\187" | |
fixClosingQuotes (Str s) | |
| "\160\187" `T.isInfixOf` s = Str s | |
-- Ensure non breaking space: | |
| " \187" `T.isInfixOf` s = Str (T.replace " \187" "\160\187" s) | |
| "\187" `T.isInfixOf` s = Str (T.replace "\187" "\160\187" s) | |
fixClosingQuotes untouched = untouched | |
cleanChevrons :: Pandoc -> Pandoc | |
cleanChevrons = walk $ \case | |
Str t -> Str | |
. T.replace "<" "<" | |
. T.replace ">" ">" | |
$ t | |
inline -> inline | |
colors :: Pandoc -> Pandoc | |
colors = adjustMarkingColors . redText | |
adjustMarkingColors :: Pandoc -> Pandoc | |
adjustMarkingColors = walk $ \case | |
original@(Span (id_, classes, extras) inlines) -> | |
if "mark" `elem` classes && isKnownBlueMarkings inlines | |
then Span (id_, "mark-blue" : delete "mark" classes, extras) inlines | |
else original | |
original -> original | |
isKnownBlueMarkings :: [Inline] -> Bool | |
isKnownBlueMarkings inlines = | |
fromList inlines `elem` | |
[ text "bleu" | |
, text "Nombe d'unités désignée." | |
, str "GeneralGroupOfEntitiesRef" | |
, str "Units" -- FIXME not precise enough, might hit wrong markings | |
, str "xsd:decimal" -- FIXME not precise enough, might hit wrong markings | |
, str "0:1" -- FIXME not precise enough, might hit wrong markings | |
, text "RRefrence to GENERAL GROUP OF ENTITIES associé au Third PARTY product." | |
] | |
redText :: Pandoc -> Pandoc | |
redText = walk $ \case | |
untouched@(Para [ Strong inlines ]) -> | |
if fromList inlines == text "UNIQUEMENT UTILE SI L’ALIMENTATION D’UN SYSTÈME BILLETTIQUE EST ENVISAGEE !" | |
then Para [ Span ("", ["red"], []) [ Strong inlines ] ] | |
else untouched | |
untouched -> untouched | |
-- Document structure | |
cleanupDocumentStructure :: FilePath -> Pandoc -> Pandoc | |
cleanupDocumentStructure destination = | |
fixTermsAndDefinitions . | |
cleanImages destination . | |
cleanAnnexBNumbering . | |
adjoinFigureCaptions . | |
walk (concatMap (toList . cleanUpSummaryTitles)) . | |
dropHiddenBlocks . | |
patchTables . | |
dropOriginalToC . | |
dropCoverPage | |
patchTables :: Pandoc -> Pandoc | |
patchTables = | |
dropEmptyTables . | |
dropEmptyRows . | |
fixMissingTableHead . | |
fixOrphanTable . | |
dropTableColWidths | |
dropEmptyTables :: Pandoc -> Pandoc | |
dropEmptyTables = | |
let isEmptyTableHead (TableHead _ rows) = allRowsAreEmpty rows | |
isEmptyTableBody (TableBody _ _ _ rows) = allRowsAreEmpty rows | |
allRowsAreEmpty = all isEmptyRow | |
in walk . mapMaybe $ \case | |
table@(Table _ _ _ tableHead tableBodies _) -> | |
if all isEmptyTableBody tableBodies && isEmptyTableHead tableHead | |
then Nothing | |
else Just table | |
untouched -> Just untouched | |
dropEmptyRows :: Pandoc -> Pandoc | |
dropEmptyRows = | |
let dropEmptyRows' (TableBody attr rhc rows' rows) = TableBody attr rhc (filter (not . isEmptyRow) rows') (filter (not . isEmptyRow) rows) | |
in walk $ \case | |
Table attr caption colSpecs tableHead tableBodies tableFoot -> | |
Table attr caption colSpecs tableHead (dropEmptyRows' <$> tableBodies) tableFoot | |
untouched -> untouched | |
isEmptyRow :: Row -> Bool | |
isEmptyRow (Row _ cells) = | |
let isEmptyInline = \case | |
Plain [] -> True | |
_ -> False | |
isEmptyCell = \case | |
Cell _ _ _ _ inlines -> all isEmptyInline inlines | |
isHiddenRow cells' = cells' `elem` hardcodedHiddenRows | |
in all isEmptyCell cells || isHiddenRow cells | |
hardcodedHiddenRows :: [ [ Cell ] ] | |
hardcodedHiddenRows = | |
mkNameOnlyRow <$> | |
-- Table numbers in this list are according to the source Word document | |
[ -- Last row of "Table 19 – TimeDemandType – Element" | |
"TimeDemandTimingsGroup" | |
, -- 3rd row of "Table 32 – TimeStructureFactor – Element" | |
"TariffRef" | |
, -- last row of "Table 66 – SalesOfferPackage – Element" | |
"salesOfferPackageSubstitutions" | |
, -- 5th row of "Table 68 – SalesOfferPackageElement – Element" | |
"SalesOffer-PackageRef" | |
, -- last row of "Table 88 – UsageParameter – Element (abstrait)" | |
"fareTables" | |
] | |
mkNameOnlyRow :: T.Text -> [ Cell ] | |
mkNameOnlyRow name = | |
let mkCell = cell AlignDefault (RowSpan 1) (ColSpan 1) | |
in mkCell <$> | |
[ mempty | |
, plain $ emph $ strong $ marking (str name) | |
, mempty | |
, mempty | |
, mempty | |
] | |
dropTableColWidths :: Pandoc -> Pandoc | |
dropTableColWidths = walk $ \case | |
Table attr caption colSpecs tableHead tableBodies tableFoot -> | |
Table attr caption ((AlignDefault, ColWidthDefault) <$ colSpecs) tableHead tableBodies tableFoot | |
untouched -> untouched | |
fixMissingTableHead :: Pandoc -> Pandoc | |
fixMissingTableHead = walk $ \case | |
table@(Table attr caption colSpecs tableHead tableBodies tableFoot) -> | |
let headIsEmpty (TableHead _ rows) = null rows | |
tableHead' = | |
case head tableBodies of | |
TableBody attr' _ _ rows -> TableHead attr' (take 1 rows) | |
tableBodies' = | |
case head tableBodies of | |
TableBody attr' rhc r' rows -> TableBody attr' rhc r' (drop 1 rows) : tail tableBodies | |
in if headIsEmpty tableHead | |
then Table attr caption colSpecs tableHead' tableBodies' tableFoot | |
else table | |
untouched -> untouched | |
fixOrphanTable :: Pandoc -> Pandoc | |
fixOrphanTable = | |
let joinSplitTables b1 [] = [b1] | |
joinSplitTables b1 (b2:bs) = | |
case (b1, b2) of | |
(Table attr caption colSpecs tableHead tableBodies1 _, Table _ _ _ _ tableBodies2 tableFoot) -> | |
Table attr caption colSpecs tableHead (tableBodies1 <> tableBodies2) tableFoot : bs | |
_ -> b1 : b2 : bs | |
in walk $ topLevel $ adjoin joinSplitTables | |
dropHiddenBlocks :: Pandoc -> Pandoc | |
dropHiddenBlocks = | |
walk $ topLevel $ mapMaybe $ \b -> | |
if b `elem` hardcodedHiddenBlocks | |
then Nothing | |
else Just b | |
marking :: Inlines -> Inlines | |
marking = spanWith ("", ["mark"], []) | |
hardcodedHiddenBlocks :: Blocks | |
hardcodedHiddenBlocks = | |
fold | |
[ para $ marking $ | |
text "Table 45 \8211 " <> emph (str "AccessRightInProduct") <> text " \8211 Element" | |
, para $ marking $ | |
text "Table 109 \8211 " <> emph (str "LimitingRule") <> text " \8211 Element" | |
] | |
cleanAnnexBNumbering :: Pandoc -> Pandoc | |
cleanAnnexBNumbering = walk $ concatMap $ \case | |
OrderedList ( 2 , UpperAlpha , DefaultDelim ) | |
[ untouched@[ Header 1 ( "" , [] , [] ) [ Str "Exemples" ] ] ] -> untouched | |
untouched -> [ untouched ] | |
cleanUpSummaryTitles :: Block -> Blocks | |
cleanUpSummaryTitles b | |
| isAnchored "Avant-propos" b = para $ strong $ text "Avant-propos" | |
| isAnchored "Introduction" b = para $ strong $ text "Introduction" | |
| isAnchored "Usage Parameters" b = header 1 $ text "Usage Parameters" | |
| isAnchored "Exemples" b = header 1 $ text "Exemples" | |
| isAnchored "Bibliographie" b = headerWith ("bibliographie", [], []) 1 $ text "Bibliographie" | |
| otherwise = singleton b | |
dropCoverPage :: Pandoc -> Pandoc | |
dropCoverPage = walk $ topLevel $ dropUntil (isAnchored "Avant-propos") | |
isAnchored :: T.Text -> Block -> Bool | |
isAnchored textContent = \case | |
Para (Span (_, ["anchor"], []) [] : content) -> | |
content == toList (text textContent) | |
OrderedList _ [ [ Para ( LineBreak : Span ( _ , [ "anchor" ] , [] ) [] : content) ] ] -> | |
content == toList (text textContent) | |
_ -> False | |
dropOriginalToC :: Pandoc -> Pandoc | |
dropOriginalToC = | |
let isSommaireTitle b = | |
b == Para [ Str "Sommaire" , Space , Str "Page" ] | |
isIntroductionTitle = isAnchored "Introduction" | |
in walk $ dropWindow isSommaireTitle isIntroductionTitle | |
-- Wrapping annexes in a div let us properly number the annexes in the document via css | |
wrapAnnexes :: Pandoc -> Pandoc | |
wrapAnnexes = | |
let startingPoint = \case | |
Header 1 _ content -> fromList content == text "Usage Parameters" | |
_ -> False | |
keepBody = takeUntil startingPoint | |
cutAnnexes = dropUntil startingPoint | |
doWrap blocks = [ Div ("", ["annexes"], []) blocks ] | |
in walk $ topLevel $ keepBody <> doWrap . cutAnnexes | |
injectXMLExamples :: PandocMonad m => Pandoc -> m Pandoc | |
injectXMLExamples = walkM (topLevelM replaceSnippets) | |
replaceSnippets :: PandocMonad m => [Block] -> m [Block] | |
replaceSnippets = | |
let replaceExamples examples = foldl (<=<) pure (replaceExample' <$> examples) | |
replaceExample' (a,b,c) = replaceExample a b c | |
in replaceExamples | |
[ ( "6.4.1.1.md", "exemples" , "régle-dapplication-des-caractéristiques-qualitystructurefactor" ) | |
, ( "6.4.2.1.md", "exemple" , "tarif-version-de-loffre-tarifaire-tariff" ) | |
, ( "6.4.3.1.md", "exemple-1" , "les-éléments-de-structure-de-tarification-temporelle" ) | |
, ( "6.4.4.4.md", "exemple-2" , "les-éléments-de-structure-de-tarification-géographique" ) | |
, ( "6.4.5.4.md", "exemple-3" , "élément-de-matrice-de-distances-distancematrixelement" ) | |
, ( "6.4.5.7.md", "exemple-4" , "les-élément-validables-validableelement" ) | |
, ( "6.5.1.md" , "exemple-5" , "les-élément-contrôlable-controllableelement" ) | |
, ( "6.7.2.md" , "exemple-6" , "les-offre-à-la-vente-salespackageoffer" ) | |
, ( "6.8.1.md" , "exemple-7" , "document-de-voyage" ) | |
, ( "6.8.2.1.md", "exemples-1" , "distributionchannel" ) | |
, ( "6.8.3.1.md", "exemples-2" , "les-droits-daccès-et-conditions-de-validité-validity-parameters" ) | |
, ( "6.9.2.md" , "exemples-3" , "conditions-dutilisation-usage-parameter" ) | |
, ( "6.10.1.md" , "exemples-4" , "les-grilles-tarifaires-faretable" ) | |
, ( "6.11.2.md" , "exemples-5" , "les-prix" ) | |
, ( "6.12.1.md" , "exemple-minimal" , "entêtes-netex" ) | |
, ( "B.2.md" , "tarif-simple" , "tarif-minimal-ultra-simplifié" ) | |
, ( "B.3.md" , "tarif-minimal-ultra-simplifié", "bus-ile-de-france" ) | |
, ( "B.4.md" , "bus-ile-de-france" , "tgv-paris-lille" ) | |
, ( "B.5.md" , "tgv-paris-lille" , "leman-express" ) | |
, ( "B.6.md" , "leman-express" , "tarif-kilométrique-ferré-ter" ) | |
, ( "B.7.md" , "tarif-kilométrique-ferré-ter" , "bibliographie" ) | |
] | |
replaceExample :: PandocMonad m => FilePath -> BlockTarget -> BlockTarget -> [Block] -> m [Block] | |
replaceExample source startTarget endTarget content = | |
let isStart = matchTarget startTarget | |
isEnd = matchTarget endTarget | |
in do | |
replacement <- readExample source | |
pure $ replaceWindow replacement isStart isEnd content | |
readExample :: PandocMonad m => FilePath -> m [Block] | |
readExample path = | |
let getContent (Pandoc _ bs) = bs | |
opts = def | |
{ readerExtensions = githubMarkdownExtensions | |
} | |
in getContent <$> (readMarkdown opts . decodeUtf8 =<< readFileStrict ("NeTEx/tarifs/examples" </> path)) | |
cleanImages :: FilePath -> Pandoc -> Pandoc | |
cleanImages mediaLocation = walk $ \case | |
Image _attrs inlines target -> Image nullAttr inlines (cleanImageTarget mediaLocation target) | |
inline -> inline | |
cleanImageTarget :: FilePath -> Target -> Target | |
cleanImageTarget prefix (url, title) = | |
let url' = T.replace ".emf" ".png" | |
$ fromMaybe url | |
$ T.stripPrefix (fromString prefix <> "/") url | |
in (url', title) | |
fixTermsAndDefinitions :: Pandoc -> Pandoc | |
fixTermsAndDefinitions = | |
let joinNumberingWithTitle b1 [] = [b1] | |
joinNumberingWithTitle b1 (b2:bs) = | |
case (b1, b2) of | |
(Para [ Str "3." ], Para i2) -> Header 2 nullAttr (walk (concatMap dropStrong) i2) : bs | |
_ -> b1 : b2 : bs | |
in walk $ topLevel $ adjoin joinNumberingWithTitle | |
adjoinFigureCaptions :: Pandoc -> Pandoc | |
adjoinFigureCaptions = | |
let fixEmphasis inlines = [ Emph (dropFigureNumber $ walk (concatMap dropEmphasis) inlines) ] | |
dropFigureNumber = \case | |
Str "Figure" : Space : Str _ : Space : Str "\8211" : Space : rest -> rest | |
untouched -> untouched | |
joinFigureWithCaption b1 [] = [b1] | |
joinFigureWithCaption b1 (b2:bs) = | |
case (b1, b2) of | |
(Para i1@[Image {}], Para i2) -> Para (i1 <> [SoftBreak] <> fixEmphasis i2) : bs | |
_ -> b1 : b2 : bs | |
in walk $ adjoin joinFigureWithCaption | |
-- Pandoc utilities | |
parseSourceDocument :: FilePath -> PandocIO Pandoc | |
parseSourceDocument = readDocx def <=< readFileLazy | |
renderMarkdown :: Pandoc -> PandocIO C8.ByteString | |
renderMarkdown doc = | |
let opts = defaultOpts | |
writerOptions = def | |
{ writerTabStop = optTabStop opts | |
, writerTableOfContents = optTableOfContents opts | |
, writerHTMLMathMethod = optHTMLMathMethod opts | |
, writerIncremental = optIncremental opts | |
, writerCiteMethod = optCiteMethod opts | |
, writerNumberSections = optNumberSections opts | |
, writerNumberOffset = optNumberOffset opts | |
, writerSectionDivs = optSectionDivs opts | |
, writerExtensions = githubMarkdownExtensions | |
, writerReferenceLinks = optReferenceLinks opts | |
, writerReferenceLocation = optReferenceLocation opts | |
, writerDpi = optDpi opts | |
, writerWrapText = optWrap opts | |
, writerColumns = optColumns opts | |
, writerEmailObfuscation = optEmailObfuscation opts | |
, writerIdentifierPrefix = optIdentifierPrefix opts | |
, writerHtmlQTags = optHtmlQTags opts | |
, writerTopLevelDivision = optTopLevelDivision opts | |
, writerListings = optListings opts | |
, writerSlideLevel = optSlideLevel opts | |
, writerSetextHeaders = optSetextHeaders opts | |
, writerListTables = optListTables opts | |
, writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts | |
, writerEpubFonts = optEpubFonts opts | |
, writerEpubTitlePage = optEpubTitlePage opts | |
, writerSplitLevel = optSplitLevel opts | |
, writerTOCDepth = optTOCDepth opts | |
, writerReferenceDoc = optReferenceDoc opts | |
, writerPreferAscii = optAscii opts | |
} | |
in encodeUtf8 <$> writeCommonMark writerOptions doc | |
write :: FilePath -> C8.ByteString -> PandocIO () | |
write path = liftIO . C8.writeFile path | |
advertiseMediaConversions :: PandocIO () | |
advertiseMediaConversions = do | |
medias <- mediaDirectory <$> getMediaBag | |
liftIO $ traverse_ advertiseMediaConversion (filter isToBeConverted medias) | |
isToBeConverted :: (FilePath, MimeType, Int) -> Bool | |
isToBeConverted (_, mt, _) = mt == "image/x-emf" | |
advertiseMediaConversion :: (FilePath, MimeType, Int) -> IO () | |
advertiseMediaConversion (file, mt, _) = | |
case mt of | |
"image/x-emf" -> let convertedFile = replaceExtension file ".png" | |
in print $ file <> " -> " <> convertedFile | |
_ -> pure () | |
-- Pandoc AST utilities | |
-- Utility to potentially merge 2 following items in a list | |
adjoin :: (item -> [item] -> [item]) -> [item] -> [item] | |
adjoin f = foldr f [] | |
-- Restrict walking to the top level (to be used with `walk`). | |
topLevel :: ([Block] -> [Block]) -> Pandoc -> Pandoc | |
topLevel f (Pandoc meta blocks) = Pandoc meta (f blocks) | |
-- Monadic variant of `topLevel` (to be used with `walkM`). | |
topLevelM :: Functor m => ([Block] -> m [Block]) -> Pandoc -> m Pandoc | |
topLevelM f (Pandoc meta blocks) = Pandoc meta <$> f blocks | |
data BlockTarget = HasId T.Text | IsHeader Int | |
instance IsString BlockTarget where | |
fromString = HasId . fromString | |
matchTarget :: BlockTarget -> Block -> Bool | |
matchTarget = \case | |
HasId i -> hasId i | |
IsHeader lvl -> isHeaderOfLevel lvl | |
hasId :: T.Text -> Block -> Bool | |
hasId expected = \case | |
Header _ (observed, _, _) _ -> observed == expected | |
_ -> False | |
isHeaderOfLevel :: Int -> Block -> Bool | |
isHeaderOfLevel expected = \case | |
Header observed _ _ -> observed == expected | |
_ -> False | |
dropWindow :: (a -> Bool) -> (a -> Bool) -> [a] -> [a] | |
dropWindow = replaceWindow [] | |
replaceWindow :: [a] -> (a -> Bool) -> (a -> Bool) -> [a] -> [a] | |
replaceWindow replacement isStart isEndExcluded = | |
let cutEnd = dropUntil isEndExcluded . dropUntil isStart | |
cutStart = takeUntil isStart | |
in cutStart <> const replacement <> cutEnd | |
dropEmphasis :: Inline -> [Inline] | |
dropEmphasis (Emph inlines) = inlines | |
dropEmphasis inline = pure inline | |
dropStrong :: Inline -> [ Inline ] | |
dropStrong = \case | |
Strong inlines -> inlines | |
inline -> [ inline ] | |
dropUntil :: (a -> Bool) -> [a] -> [a] | |
dropUntil p = dropWhile (not . p) | |
takeUntil :: (a -> Bool) -> [a] -> [a] | |
takeUntil p = takeWhile (not . p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment