Skip to content

Instantly share code, notes, and snippets.

@l-Luna
Created October 17, 2024 20:25
Show Gist options
  • Save l-Luna/24259241e2ac0de7224f791c099fb0fd to your computer and use it in GitHub Desktop.
Save l-Luna/24259241e2ac0de7224f791c099fb0fd to your computer and use it in GitHub Desktop.
{-# LANGUAGE OverloadedRecordDot #-}
import Data.List (sortBy, isInfixOf)
import Data.Ord (comparing)
import Data.Array ((!), bounds, listArray, Array)
suffixes :: Int -> [a] -> [(Int, [a])]
suffixes idx [] = []
suffixes idx l@(_:xs) = (idx, l) : suffixes (idx + 1) xs
listToArray :: [a] -> Array Int a
listToArray xs = listArray (0, length xs - 1) xs
-- it's just Maybe again, but it means the names are more descriptive
data Symbol a = Term | Sym a
deriving (Show, Eq, Ord)
data SuffixArray a = SuffixArray {
sfxStr :: Array Int (Symbol a),
sfxOrd :: Array Int Int
} deriving (Show, Eq)
mkSuffixArray :: [Symbol a] -> [Int] -> SuffixArray a
mkSuffixArray xs ord = SuffixArray (listToArray xs) (listToArray ord)
buildSuffixArray :: Ord a => [a] -> SuffixArray a
buildSuffixArray str =
let adj = (Sym <$> str) ++ [Term] in
mkSuffixArray adj $ fst <$> sortBy (comparing snd) (suffixes 0 adj)
data Slice a = Slice {
sliceArray :: SuffixArray a,
sliceStart :: Int,
sliceLength :: Int,
sliceOffset :: Int
} deriving (Show, Eq)
initSlice :: SuffixArray a -> Slice a
initSlice sfx = Slice{ sliceArray = sfx, sliceStart = 0, sliceLength = snd (bounds sfx.sfxStr) + 1, sliceOffset = 0 }
nextSlice :: Slice a -> (Int, Int) -> Slice a
nextSlice slc (start, end) = slc{ sliceStart = slc.sliceStart + start, sliceLength = end - start + 1, sliceOffset = slc.sliceOffset + 1 }
idxSlice :: Slice a -> Int -> Symbol a
idxSlice slc idx =
-- Nth element of a slice: add sliceStart, get Nth member of ord, then add sliceOffset to the result
let ordIdx = idx + slc.sliceStart
pfixIdx = slc.sliceArray.sfxOrd ! ordIdx
charIdx = pfixIdx + slc.sliceOffset in
slc.sliceArray.sfxStr ! charIdx
findBiased :: Ord a => Int -> (Ordering -> Ordering -> Ordering) -> a -> Slice a -> Maybe Int
findBiased biasDir bias c slc = go 0 (slc.sliceLength - 1)
where
go :: Int -> Int -> Maybe Int
-- look at middle of the array, compare;
-- if it's not equal, split the range up/down;
-- consider "c in the middle of the list" to be greater than c
go start end | start < 0 || start > end || end >= slc.sliceLength =
Nothing
go start end | start == end =
if idxSlice slc start == Sym c then Just start else Nothing
go start end =
let midpoint = (start + (end - start) `div` 2)
midElem = idxSlice slc midpoint
biasPoint = midpoint + biasDir
biasElem = if biasPoint >= 0 && biasPoint < slc.sliceStart + slc.sliceLength then Just $ idxSlice slc biasPoint else Nothing
compar = bias (Sym c `compare` midElem) (Just (Sym c) `compare` biasElem) in
case compar of
EQ -> Just midpoint
LT -> go start midpoint
GT -> go (midpoint + 1) end
findFirst :: Ord a => a -> Slice a -> Maybe Int
findFirst = findBiased (-1) top
where
top :: Ordering -> Ordering -> Ordering
top EQ GT = EQ
top EQ _ = LT
top x _ = x
findLast :: Ord a => a -> Slice a -> Maybe Int
findLast = findBiased 1 bottom
where
bottom :: Ordering -> Ordering -> Ordering
bottom EQ LT = EQ
bottom EQ _ = GT
bottom x _ = x
check :: Ord a => [a] -> Slice a -> Bool
check [] _ = True
check (c:cs) slc
| Just start <- findFirst c slc,
Just end <- findLast c slc
= check cs (nextSlice slc (start, end))
check _ _ = False
-- quickcheck yaaay
prop_check_isinfixof :: String -> String -> Bool
prop_check_isinfixof needle haystack = needle `isInfixOf` haystack == check needle (initSlice $ buildSuffixArray haystack)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment