Last active
February 11, 2020 14:06
-
-
Save DKurilo/611d736cfb76947a5684a8c8470967b9 to your computer and use it in GitHub Desktop.
Just example for https://codereview.stackexchange.com/questions/236901/color-highlighter-annotator-in-haskell/236955
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- to add colors like dark-green or dark\ngreen in this algorithm you need to add each different phrase | |
-- as sparate phrase or to modify AhoCorassick to use wildcards. It's possible and even easy to do it. | |
-- But you need to change package. | |
module Main where | |
import Data.Char (isAlphaNum) | |
import Data.List.Split (splitOn) | |
import qualified Data.Map as M | |
import System.Environment | |
import System.IO | |
import Text.AhoCorasick | |
processStringWithMachine :: StateMachine Char String -> M.Map String String -> String -> String | |
processStringWithMachine sm m cs = (lastStep . foldl go ("", 0, Position 0 0 "", cs)) ps | |
where ps = findAll sm cs | |
go :: (String, Int, Position String, String) -> Position String -> (String, Int, Position String, String) | |
go (cs', cursor, lastPos, rest) pos | |
| pIndex pos > cursor' && wholeWord = (cs' ++ take (pIndex lastPos - cursor) rest | |
++ (replace . pVal) lastPos | |
, cursor' | |
, pos | |
, drop (cursor' - cursor) rest | |
) -- next word, applying previous | |
| pIndex pos == pIndex lastPos && pLength pos > pLength lastPos && wholeWord = | |
(cs', cursor, pos, rest) -- new match is longer, use it | |
| otherwise = (cs', cursor, lastPos, rest) -- just skip this match | |
where cursor' = pIndex lastPos + pLength lastPos | |
wholeWord = isWordBoundary rest (pIndex pos - cursor - 1) -- before phrase | |
&& isWordBoundary rest (pIndex pos + pLength pos - cursor) -- after phrase | |
lastStep :: (String, Int, Position String, String) -> String | |
lastStep (cs', cursor, pos, rest) = cs' | |
++ take (pIndex pos - cursor) rest | |
++ (replace . pVal) pos | |
++ drop (pIndex pos - cursor + pLength pos) rest | |
replace :: String -> String | |
replace "" = "" | |
replace cs' = case cs' `M.lookup` m of | |
Just color -> "<span class=\"color\" style=\"color: " ++ color ++ "\">" ++ cs' ++ "</span>" | |
_ -> cs' | |
isWordBoundary :: String -> Int -> Bool | |
isWordBoundary cs n | |
| n < 0 || n >= length cs = True | |
| otherwise = (not . isAlphaNum) c | |
where c = cs !! n | |
main :: IO () | |
main = do | |
colorMap <- M.fromList | |
. map (\(w1: w2: _) -> (w1, w2)) . filter (\ws -> length ws > 1) | |
. map (splitOn "\t") . filter (\cs -> (not . null) cs && head cs /= '#') . lines | |
<$> readFile "./rgb.txt" | |
let acMachine = (makeSimpleStateMachine . M.keys) colorMap | |
filename: _ <- getArgs | |
updatedString <- processStringWithMachine acMachine colorMap <$> readFile filename | |
writeFile (filename ++ ".out") updatedString |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ nixpkgs ? import <nixpkgs> {} }: | |
let | |
inherit (nixpkgs) pkgs; | |
myAhoCorasickSrc = pkgs.fetchFromGitHub { | |
owner = "stackbuilders"; | |
repo = "AhoCorasick"; | |
rev = "9a825aef5d19c707d2306befca688a1a72d50bb0"; | |
sha256 = "1hgpbiqslqskrbgkv60vdzbak7sg4kxhi8qvghfw6fnngxd8sdb1"; | |
}; | |
myHaskellPackages = pkgs.haskellPackages.override { | |
overrides = self: super: with pkgs.haskell.lib; { | |
AhoCorasick = super.callCabal2nix "AhoCorasick" myAhoCorasickSrc {}; | |
}; | |
}; | |
ghc = myHaskellPackages.ghcWithPackages (ps: with ps; [ | |
containers | |
split | |
AhoCorasick | |
]); | |
in | |
pkgs.mkShell { | |
name = "color-highlighter-annotator-env"; | |
buildInputs = [ ghc ]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment