Created
July 26, 2010 22:31
-
-
Save reverendpaco/491358 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data PrefixSuffixTree a = ExactMatchDatum { shared :: a } | | |
SharedPrefixDatum { shared :: a, suffixT :: a, suffixS :: a } | | |
TargetIsSmallerDatum { pre :: a, suffixS :: a } | | |
SourceIsSmallerDatum { pre :: a, suffixT :: a } | | |
NoMatchDatum deriving (Show,Eq) | |
data MatchType = TargetIsPrefix | SourceIsPrefix | | |
SharedPrefix | ExactMatch | NotEqual deriving (Show) | |
-- utility functions | |
switchToWord node = node{nodeType = Word} | |
takeTogetherWhile :: (Eq a) => [a] -> [a] -> PrefixSuffixTree [a] | |
takeTogetherWhile [] _ = NoMatchDatum | |
takeTogetherWhile _ [] = NoMatchDatum | |
takeTogetherWhile source target = case match of | |
ExactMatch -> ExactMatchDatum {shared=source} | |
TargetIsPrefix -> TargetIsSmallerDatum {pre=target, | |
suffixS= suffixStringOfSource} | |
SourceIsPrefix -> SourceIsSmallerDatum {pre=source, | |
suffixT= suffixStringOfTarget} | |
NotEqual -> NoMatchDatum | |
SharedPrefix -> SharedPrefixDatum { shared = sharedPrefix , | |
suffixS = utilStrip sharedPrefix source , | |
suffixT = utilStrip sharedPrefix target} | |
where match = findMatchOnString source target | |
suffixStringOfSource = utilStrip target source | |
suffixStringOfTarget = utilStrip source target | |
utilStrip s t = case L.stripPrefix s t of | |
Just s -> s | |
Nothing -> [] | |
sharedPrefix = [ fst x | x <- takeWhile ( \ (x,y) -> x==y) $ zip source target ] | |
source =><= target = takeTogetherWhile source target | |
findMatchOnString sourceS@(s:ss) targetS@(t:ts) | sourceS == targetS = ExactMatch | |
| sourceS `L.isPrefixOf` targetS = SourceIsPrefix | |
| targetS `L.isPrefixOf` sourceS = TargetIsPrefix | |
| s /= t = NotEqual | |
| otherwise = SharedPrefix | |
insertNewChildWordNode word@(x:xs) childNodes = (wordNode word) *-> childNodes | |
insertNewChildNode node@TrieNode{wordFragment=word@(x:xs)} childNodes | |
= Map.insert x node childNodes | |
x *-> y = insertNewChildNode x y | |
-- match for the top node | |
addWordToTrie word@(x:xs) node@(TrieNode{children=childNodes,nodeType=Top} ) | |
| Map.notMember x childNodes = node{children= (insertNewChildWordNode word childNodes)} | |
| otherwise = node{ children= newlyChangedNode *-> childNodes} | |
where subNode s = childNodes ! s | |
newlyChangedNode = addWordToTrie word (subNode x) | |
-- match for the others | |
addWordToTrie source@(x:xs) node@(TrieNode{children=childNodes, wordFragment=target} ) | |
= case matchData of | |
TargetIsSmallerDatum {pre=target, | |
suffixS= suffixStringOfSource@(s:ss)} | |
| Map.notMember s childNodes -> insertNode node matchData | |
| otherwise -> node{ children = newlyChangedNode *-> childNodes} | |
where subNode = (childNodes ! ) | |
newlyChangedNode = addWordToTrie suffixStringOfSource (subNode s) | |
otherwise -> insertNode node matchData | |
where matchData = source =><= target |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment