Skip to content

Instantly share code, notes, and snippets.

@kowey
Created March 16, 2010 13:55
Show Gist options
  • Select an option

  • Save kowey/333974 to your computer and use it in GitHub Desktop.

Select an option

Save kowey/333974 to your computer and use it in GitHub Desktop.
-- ----------------------------------------------------------------------
-- Simplify wiki text
-- ----------------------------------------------------------------------
dropEndOfArticle = takeWhile (not . end)
where
end (Header _ [ Str "See", Space, Str "also"]) = True
end (Header _ [ Str "External", Space, Str "links"]) = True
end (Header _ [ Str "References"]) = True
end _ = False
onPandoc f (Pandoc x y) = Pandoc x (f y)
tweak = onPandoc (map tweakB)
tweakB x@(Header _ _) = x
tweakB x@(Para xs) = Para (tweakEs xs)
tweakB x@(Plain xs) = Plain (tweakEs xs)
tweakB x@(BulletList xs) = BulletList (map (map tweakB) xs)
tweakB x@(DefinitionList xs) = DefinitionList (map tweakD xs)
tweakB x = x
tweakEs = concatMap tweakE
tweakE (Image _ _) = []
tweakE (Emph xs) = tweakEs xs
tweakE (Strong xs) = tweakEs xs
tweakE (Link (Str ltxt : _) _) | "Category:" `isPrefixOf` ltxt = []
tweakE (Link xs _) = tweakEs xs
tweakE (Note _) = []
tweakE x = [ x ]
tweakD (ts,defs) = (tweakEs ts, map (map tweakB) defs)
-- ----------------------------------------------------------------------
-- Pre-process (before feeding into pandoc reader)
-- ----------------------------------------------------------------------
data PrePandoc = Template String [PrePandoc] -- ^ this should actually be [[PrePandoc]]
-- for recursive templates
| Chunk String
pure = (: [])
preprocessTemplates s =
case parse parsePrePandoc "" s of
Left _ -> error $ "preprocessTemplates should accept everything"
Right ps -> concatMap expandPrePandoc ps
expandPrePandoc (Chunk " ") = " "
expandPrePandoc (Chunk s) = s
-- editorial control
expandPrePandoc (Template x _)
| x `elem` [ "Citation needed", "Context", "Coord missing", "Expand", "Fact", "Fix"
, "Notability", "Orphan", "Refimprove", "Short pages monitor"
, "Trivia", "Unreferenced", "Vague", "Who", "Why" ] = ""
-- navigational content
expandPrePandoc (Template x _)
| x `elem` [ "For", "Cite", "Cite book", "Cite web", "Commonscat"
, "Main", "Reflist", "See also" ] = ""
expandPrePandoc (Template x _)
| "Scottish Munros section " `isPrefixOf` x = ""
| "-geo-stub" `isSuffixOf` x = ""
expandPrePandoc (Template x _)
| x `elem` [ "Arrochar Alps", "British hills", "Cornwall", "Isle of Wight box"
, "Lewis and Harris"
, "Marilyns N Eng", "Northern Dales", "North Pennines", "North Western Fells"
, "Outlying Fells", "Peaks of the Peak District", "SCO"
, "Scotland-stub", "Southern Fells", "Western Fells", "UK primary routes 1"
, "UK-SSSI-stub" ] = ""
-- formatting
expandPrePandoc (Template "-" _) = ""
expandPrePandoc (Template "Clear" _) = ""
expandPrePandoc (Template "Flagicon" _) = ""
expandPrePandoc (Template ('D':'E':'F':'A':'U':'L':'T':'S':'O':'R':'T':_) _) = ""
expandPrePandoc (Template "Linktext" xs) = concatMap expandPrePandoc xs
-- fancy content
expandPrePandoc (Template i _)
| i `elem` [ "Image", "Image label", "Wide image" ] = ""
expandPrePandoc (Template "Cquote" _) = ""
expandPrePandoc (Template "Quotation" _) = ""
expandPrePandoc (Template "Location map" _) = ""
expandPrePandoc (Template "Coord" _) = "" -- FIXME: losing too much?
expandPrePandoc (Template i _) | "Infobox" `isPrefixOf` i = "" -- (!) this is dropping quite a lot
expandPrePandoc (Template i _) | "GB summits entry" `isPrefixOf` i = "" -- (!) this is dropping quite a lot
expandPrePandoc (Template i _) | "GB summits start" `isPrefixOf` i = "" -- (!) this is dropping quite a lot
--
expandPrePandoc (Template "Convert" (Chunk n : Chunk "C" : _)) = n ++ " degrees Celcius"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "cm" : _)) = n ++ " centimetres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "mm" : _)) = n ++ " millimetres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "ha" : _)) = n ++ " hectares"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "km" : _)) = n ++ " kilometres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "km2" : _)) = n ++ " square kilometres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "m" : _)) = n ++ " metres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "acre" : _)) = convertStr (* 0.4046) n ++ " hectares"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "ft" : _)) = convertStr (* 0.3048) n ++ " metres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "yd" : _)) = convertStr (* 0.9144) n ++ " metres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "mi" : _)) = convertStr (* 1.1609) n ++ " kilometres"
expandPrePandoc (Template "Convert" (Chunk n : Chunk "lb" : _)) = convertStr (* 0.4539) n ++ " kilograms"
expandPrePandoc (Template "Ft to m" (Chunk n : _)) = convertStr (* 0.3048) n ++ " metres"
expandPrePandoc (Template "Pop density mi2 to km2" (Chunk n : _)) = convertStr (* 2.590) n ++ " inhabitants per square kilometer"
--
expandPrePandoc (Template "IPA" [x]) = expandPrePandoc x
expandPrePandoc (Template "IPA-gd" xs) = tPronounciation "Scottish Gaelic" xs
expandPrePandoc (Template "IPA-cy" xs) = tPronounciation "Welsh" xs
expandPrePandoc (Template "PronEng" xs) = tPronounced xs
expandPrePandoc (Template "Pron-en" xs) = tPronounced xs
expandPrePandoc (Template "Pron" xs) = tPronounced xs
--
expandPrePandoc (Template "Lang-gv" [x]) = "Manx: " ++ expandPrePandoc x
expandPrePandoc (Template "Lang-gd" [x]) = "Scottish Gaelic: " ++ expandPrePandoc x
expandPrePandoc (Template "Lang-cy" [x]) = "Welsh: " ++ expandPrePandoc x
expandPrePandoc (Template "Lang-en" [x]) = "English: " ++ expandPrePandoc x
expandPrePandoc (Template "Lang-non" [x]) = "Old Norse: " ++ expandPrePandoc x
expandPrePandoc (Template "Lang" [Chunk "cy",x]) = "Welsh: " ++ expandPrePandoc x
expandPrePandoc (Template "Langcy" [x]) = "Welsh: " ++ expandPrePandoc x
--
expandPrePandoc (Template "Gbmapping" (x:_)) = "grid reference " ++ expandPrePandoc x
expandPrePandoc (Template "Gbm4ibx" [x]) = expandPrePandoc x
expandPrePandoc (Template "Gbmappingsmall" [x]) = expandPrePandoc x
expandPrePandoc (Template "Oscoor" [_,y]) = expandPrePandoc y
-- unknown template!
expandPrePandoc (Template s xs) = "{{" ++ s ++ "|" ++ intercalate "|" (expandPrePandocs xs) ++ "}}"
tPronounciation t (x:_) = t ++ " pronounciation: " ++ expandPrePandoc x
tPronounced (x:_) = "Pronounced: " ++ expandPrePandoc x
expandPrePandocs = map expandPrePandoc
parsePrePandoc = many $
Chunk `fmap` try (string " ")
<|> parseTemplate
<|> Chunk `fmap` many1 (noneOf special)
<|> Chunk `fmap` many1 (oneOf special) -- last resort
where
special = "&{"
parseTemplate = try $ between (string "{{") (string "}}") $ do
ps <- many1 (noneOf "|}") `sepBy1` char '|'
return (Template (tweakHead toUpper . trim . head $ ps) (map Chunk $ tail ps)) -- TODO actually parse
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment