Created
October 17, 2024 20:25
-
-
Save l-Luna/24259241e2ac0de7224f791c099fb0fd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedRecordDot #-} | |
import Data.List (sortBy, isInfixOf) | |
import Data.Ord (comparing) | |
import Data.Array ((!), bounds, listArray, Array) | |
suffixes :: Int -> [a] -> [(Int, [a])] | |
suffixes idx [] = [] | |
suffixes idx l@(_:xs) = (idx, l) : suffixes (idx + 1) xs | |
listToArray :: [a] -> Array Int a | |
listToArray xs = listArray (0, length xs - 1) xs | |
-- it's just Maybe again, but it means the names are more descriptive | |
data Symbol a = Term | Sym a | |
deriving (Show, Eq, Ord) | |
data SuffixArray a = SuffixArray { | |
sfxStr :: Array Int (Symbol a), | |
sfxOrd :: Array Int Int | |
} deriving (Show, Eq) | |
mkSuffixArray :: [Symbol a] -> [Int] -> SuffixArray a | |
mkSuffixArray xs ord = SuffixArray (listToArray xs) (listToArray ord) | |
buildSuffixArray :: Ord a => [a] -> SuffixArray a | |
buildSuffixArray str = | |
let adj = (Sym <$> str) ++ [Term] in | |
mkSuffixArray adj $ fst <$> sortBy (comparing snd) (suffixes 0 adj) | |
data Slice a = Slice { | |
sliceArray :: SuffixArray a, | |
sliceStart :: Int, | |
sliceLength :: Int, | |
sliceOffset :: Int | |
} deriving (Show, Eq) | |
initSlice :: SuffixArray a -> Slice a | |
initSlice sfx = Slice{ sliceArray = sfx, sliceStart = 0, sliceLength = snd (bounds sfx.sfxStr) + 1, sliceOffset = 0 } | |
nextSlice :: Slice a -> (Int, Int) -> Slice a | |
nextSlice slc (start, end) = slc{ sliceStart = slc.sliceStart + start, sliceLength = end - start + 1, sliceOffset = slc.sliceOffset + 1 } | |
idxSlice :: Slice a -> Int -> Symbol a | |
idxSlice slc idx = | |
-- Nth element of a slice: add sliceStart, get Nth member of ord, then add sliceOffset to the result | |
let ordIdx = idx + slc.sliceStart | |
pfixIdx = slc.sliceArray.sfxOrd ! ordIdx | |
charIdx = pfixIdx + slc.sliceOffset in | |
slc.sliceArray.sfxStr ! charIdx | |
findBiased :: Ord a => Int -> (Ordering -> Ordering -> Ordering) -> a -> Slice a -> Maybe Int | |
findBiased biasDir bias c slc = go 0 (slc.sliceLength - 1) | |
where | |
go :: Int -> Int -> Maybe Int | |
-- look at middle of the array, compare; | |
-- if it's not equal, split the range up/down; | |
-- consider "c in the middle of the list" to be greater than c | |
go start end | start < 0 || start > end || end >= slc.sliceLength = | |
Nothing | |
go start end | start == end = | |
if idxSlice slc start == Sym c then Just start else Nothing | |
go start end = | |
let midpoint = (start + (end - start) `div` 2) | |
midElem = idxSlice slc midpoint | |
biasPoint = midpoint + biasDir | |
biasElem = if biasPoint >= 0 && biasPoint < slc.sliceStart + slc.sliceLength then Just $ idxSlice slc biasPoint else Nothing | |
compar = bias (Sym c `compare` midElem) (Just (Sym c) `compare` biasElem) in | |
case compar of | |
EQ -> Just midpoint | |
LT -> go start midpoint | |
GT -> go (midpoint + 1) end | |
findFirst :: Ord a => a -> Slice a -> Maybe Int | |
findFirst = findBiased (-1) top | |
where | |
top :: Ordering -> Ordering -> Ordering | |
top EQ GT = EQ | |
top EQ _ = LT | |
top x _ = x | |
findLast :: Ord a => a -> Slice a -> Maybe Int | |
findLast = findBiased 1 bottom | |
where | |
bottom :: Ordering -> Ordering -> Ordering | |
bottom EQ LT = EQ | |
bottom EQ _ = GT | |
bottom x _ = x | |
check :: Ord a => [a] -> Slice a -> Bool | |
check [] _ = True | |
check (c:cs) slc | |
| Just start <- findFirst c slc, | |
Just end <- findLast c slc | |
= check cs (nextSlice slc (start, end)) | |
check _ _ = False | |
-- quickcheck yaaay | |
prop_check_isinfixof :: String -> String -> Bool | |
prop_check_isinfixof needle haystack = needle `isInfixOf` haystack == check needle (initSlice $ buildSuffixArray haystack) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment