Skip to content

Instantly share code, notes, and snippets.

@reverendpaco
Created July 26, 2010 22:31
Show Gist options
  • Save reverendpaco/491358 to your computer and use it in GitHub Desktop.
Save reverendpaco/491358 to your computer and use it in GitHub Desktop.
data PrefixSuffixTree a = ExactMatchDatum { shared :: a } |
SharedPrefixDatum { shared :: a, suffixT :: a, suffixS :: a } |
TargetIsSmallerDatum { pre :: a, suffixS :: a } |
SourceIsSmallerDatum { pre :: a, suffixT :: a } |
NoMatchDatum deriving (Show,Eq)
data MatchType = TargetIsPrefix | SourceIsPrefix |
SharedPrefix | ExactMatch | NotEqual deriving (Show)
-- utility functions
switchToWord node = node{nodeType = Word}
takeTogetherWhile :: (Eq a) => [a] -> [a] -> PrefixSuffixTree [a]
takeTogetherWhile [] _ = NoMatchDatum
takeTogetherWhile _ [] = NoMatchDatum
takeTogetherWhile source target = case match of
ExactMatch -> ExactMatchDatum {shared=source}
TargetIsPrefix -> TargetIsSmallerDatum {pre=target,
suffixS= suffixStringOfSource}
SourceIsPrefix -> SourceIsSmallerDatum {pre=source,
suffixT= suffixStringOfTarget}
NotEqual -> NoMatchDatum
SharedPrefix -> SharedPrefixDatum { shared = sharedPrefix ,
suffixS = utilStrip sharedPrefix source ,
suffixT = utilStrip sharedPrefix target}
where match = findMatchOnString source target
suffixStringOfSource = utilStrip target source
suffixStringOfTarget = utilStrip source target
utilStrip s t = case L.stripPrefix s t of
Just s -> s
Nothing -> []
sharedPrefix = [ fst x | x <- takeWhile ( \ (x,y) -> x==y) $ zip source target ]
source =><= target = takeTogetherWhile source target
findMatchOnString sourceS@(s:ss) targetS@(t:ts) | sourceS == targetS = ExactMatch
| sourceS `L.isPrefixOf` targetS = SourceIsPrefix
| targetS `L.isPrefixOf` sourceS = TargetIsPrefix
| s /= t = NotEqual
| otherwise = SharedPrefix
insertNewChildWordNode word@(x:xs) childNodes = (wordNode word) *-> childNodes
insertNewChildNode node@TrieNode{wordFragment=word@(x:xs)} childNodes
= Map.insert x node childNodes
x *-> y = insertNewChildNode x y
-- match for the top node
addWordToTrie word@(x:xs) node@(TrieNode{children=childNodes,nodeType=Top} )
| Map.notMember x childNodes = node{children= (insertNewChildWordNode word childNodes)}
| otherwise = node{ children= newlyChangedNode *-> childNodes}
where subNode s = childNodes ! s
newlyChangedNode = addWordToTrie word (subNode x)
-- match for the others
addWordToTrie source@(x:xs) node@(TrieNode{children=childNodes, wordFragment=target} )
= case matchData of
TargetIsSmallerDatum {pre=target,
suffixS= suffixStringOfSource@(s:ss)}
| Map.notMember s childNodes -> insertNode node matchData
| otherwise -> node{ children = newlyChangedNode *-> childNodes}
where subNode = (childNodes ! )
newlyChangedNode = addWordToTrie suffixStringOfSource (subNode s)
otherwise -> insertNode node matchData
where matchData = source =><= target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment