Created
March 16, 2010 13:55
-
-
Save kowey/333974 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- ---------------------------------------------------------------------- | |
| -- Simplify wiki text | |
| -- ---------------------------------------------------------------------- | |
| dropEndOfArticle = takeWhile (not . end) | |
| where | |
| end (Header _ [ Str "See", Space, Str "also"]) = True | |
| end (Header _ [ Str "External", Space, Str "links"]) = True | |
| end (Header _ [ Str "References"]) = True | |
| end _ = False | |
| onPandoc f (Pandoc x y) = Pandoc x (f y) | |
| tweak = onPandoc (map tweakB) | |
| tweakB x@(Header _ _) = x | |
| tweakB x@(Para xs) = Para (tweakEs xs) | |
| tweakB x@(Plain xs) = Plain (tweakEs xs) | |
| tweakB x@(BulletList xs) = BulletList (map (map tweakB) xs) | |
| tweakB x@(DefinitionList xs) = DefinitionList (map tweakD xs) | |
| tweakB x = x | |
| tweakEs = concatMap tweakE | |
| tweakE (Image _ _) = [] | |
| tweakE (Emph xs) = tweakEs xs | |
| tweakE (Strong xs) = tweakEs xs | |
| tweakE (Link (Str ltxt : _) _) | "Category:" `isPrefixOf` ltxt = [] | |
| tweakE (Link xs _) = tweakEs xs | |
| tweakE (Note _) = [] | |
| tweakE x = [ x ] | |
| tweakD (ts,defs) = (tweakEs ts, map (map tweakB) defs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- ---------------------------------------------------------------------- | |
| -- Pre-process (before feeding into pandoc reader) | |
| -- ---------------------------------------------------------------------- | |
| data PrePandoc = Template String [PrePandoc] -- ^ this should actually be [[PrePandoc]] | |
| -- for recursive templates | |
| | Chunk String | |
| pure = (: []) | |
| preprocessTemplates s = | |
| case parse parsePrePandoc "" s of | |
| Left _ -> error $ "preprocessTemplates should accept everything" | |
| Right ps -> concatMap expandPrePandoc ps | |
| expandPrePandoc (Chunk " ") = " " | |
| expandPrePandoc (Chunk s) = s | |
| -- editorial control | |
| expandPrePandoc (Template x _) | |
| | x `elem` [ "Citation needed", "Context", "Coord missing", "Expand", "Fact", "Fix" | |
| , "Notability", "Orphan", "Refimprove", "Short pages monitor" | |
| , "Trivia", "Unreferenced", "Vague", "Who", "Why" ] = "" | |
| -- navigational content | |
| expandPrePandoc (Template x _) | |
| | x `elem` [ "For", "Cite", "Cite book", "Cite web", "Commonscat" | |
| , "Main", "Reflist", "See also" ] = "" | |
| expandPrePandoc (Template x _) | |
| | "Scottish Munros section " `isPrefixOf` x = "" | |
| | "-geo-stub" `isSuffixOf` x = "" | |
| expandPrePandoc (Template x _) | |
| | x `elem` [ "Arrochar Alps", "British hills", "Cornwall", "Isle of Wight box" | |
| , "Lewis and Harris" | |
| , "Marilyns N Eng", "Northern Dales", "North Pennines", "North Western Fells" | |
| , "Outlying Fells", "Peaks of the Peak District", "SCO" | |
| , "Scotland-stub", "Southern Fells", "Western Fells", "UK primary routes 1" | |
| , "UK-SSSI-stub" ] = "" | |
| -- formatting | |
| expandPrePandoc (Template "-" _) = "" | |
| expandPrePandoc (Template "Clear" _) = "" | |
| expandPrePandoc (Template "Flagicon" _) = "" | |
| expandPrePandoc (Template ('D':'E':'F':'A':'U':'L':'T':'S':'O':'R':'T':_) _) = "" | |
| expandPrePandoc (Template "Linktext" xs) = concatMap expandPrePandoc xs | |
| -- fancy content | |
| expandPrePandoc (Template i _) | |
| | i `elem` [ "Image", "Image label", "Wide image" ] = "" | |
| expandPrePandoc (Template "Cquote" _) = "" | |
| expandPrePandoc (Template "Quotation" _) = "" | |
| expandPrePandoc (Template "Location map" _) = "" | |
| expandPrePandoc (Template "Coord" _) = "" -- FIXME: losing too much? | |
| expandPrePandoc (Template i _) | "Infobox" `isPrefixOf` i = "" -- (!) this is dropping quite a lot | |
| expandPrePandoc (Template i _) | "GB summits entry" `isPrefixOf` i = "" -- (!) this is dropping quite a lot | |
| expandPrePandoc (Template i _) | "GB summits start" `isPrefixOf` i = "" -- (!) this is dropping quite a lot | |
| -- | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "C" : _)) = n ++ " degrees Celcius" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "cm" : _)) = n ++ " centimetres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "mm" : _)) = n ++ " millimetres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "ha" : _)) = n ++ " hectares" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "km" : _)) = n ++ " kilometres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "km2" : _)) = n ++ " square kilometres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "m" : _)) = n ++ " metres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "acre" : _)) = convertStr (* 0.4046) n ++ " hectares" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "ft" : _)) = convertStr (* 0.3048) n ++ " metres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "yd" : _)) = convertStr (* 0.9144) n ++ " metres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "mi" : _)) = convertStr (* 1.1609) n ++ " kilometres" | |
| expandPrePandoc (Template "Convert" (Chunk n : Chunk "lb" : _)) = convertStr (* 0.4539) n ++ " kilograms" | |
| expandPrePandoc (Template "Ft to m" (Chunk n : _)) = convertStr (* 0.3048) n ++ " metres" | |
| expandPrePandoc (Template "Pop density mi2 to km2" (Chunk n : _)) = convertStr (* 2.590) n ++ " inhabitants per square kilometer" | |
| -- | |
| expandPrePandoc (Template "IPA" [x]) = expandPrePandoc x | |
| expandPrePandoc (Template "IPA-gd" xs) = tPronounciation "Scottish Gaelic" xs | |
| expandPrePandoc (Template "IPA-cy" xs) = tPronounciation "Welsh" xs | |
| expandPrePandoc (Template "PronEng" xs) = tPronounced xs | |
| expandPrePandoc (Template "Pron-en" xs) = tPronounced xs | |
| expandPrePandoc (Template "Pron" xs) = tPronounced xs | |
| -- | |
| expandPrePandoc (Template "Lang-gv" [x]) = "Manx: " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Lang-gd" [x]) = "Scottish Gaelic: " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Lang-cy" [x]) = "Welsh: " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Lang-en" [x]) = "English: " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Lang-non" [x]) = "Old Norse: " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Lang" [Chunk "cy",x]) = "Welsh: " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Langcy" [x]) = "Welsh: " ++ expandPrePandoc x | |
| -- | |
| expandPrePandoc (Template "Gbmapping" (x:_)) = "grid reference " ++ expandPrePandoc x | |
| expandPrePandoc (Template "Gbm4ibx" [x]) = expandPrePandoc x | |
| expandPrePandoc (Template "Gbmappingsmall" [x]) = expandPrePandoc x | |
| expandPrePandoc (Template "Oscoor" [_,y]) = expandPrePandoc y | |
| -- unknown template! | |
| expandPrePandoc (Template s xs) = "{{" ++ s ++ "|" ++ intercalate "|" (expandPrePandocs xs) ++ "}}" | |
| tPronounciation t (x:_) = t ++ " pronounciation: " ++ expandPrePandoc x | |
| tPronounced (x:_) = "Pronounced: " ++ expandPrePandoc x | |
| expandPrePandocs = map expandPrePandoc | |
| parsePrePandoc = many $ | |
| Chunk `fmap` try (string " ") | |
| <|> parseTemplate | |
| <|> Chunk `fmap` many1 (noneOf special) | |
| <|> Chunk `fmap` many1 (oneOf special) -- last resort | |
| where | |
| special = "&{" | |
| parseTemplate = try $ between (string "{{") (string "}}") $ do | |
| ps <- many1 (noneOf "|}") `sepBy1` char '|' | |
| return (Template (tweakHead toUpper . trim . head $ ps) (map Chunk $ tail ps)) -- TODO actually parse | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment