Created
September 16, 2012 16:51
-
-
Save paulcc/3733182 to your computer and use it in GitHub Desktop.
Code for the Word Chain kata, see the October edition of PragPub magazine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- word list is at https://gist.github.com/3799331 | |
import Data.Set(Set, fromDistinctAscList, member) | |
import System.IO.Unsafe(unsafePerformIO) | |
import Data.Tree | |
import Data.List(inits,tails) | |
-------------------- | |
-- first step - generating valid next words | |
one_step :: String -> [String] | |
one_step = filter is_valid_word . generate_candidates | |
generate_candidates :: String -> [String] | |
generate_candidates [] | |
= [] | |
generate_candidates (c:cs) | |
= [ x : cs | x <- ['a'..'z'], x /= c ] -- new letter at front | |
++ [ c : xs | xs <- generate_candidates cs ] -- old letter at front | |
generate_v2 :: String -> [String] | |
generate_v2 w = [ before ++ d : tail after | |
| (before, after) <- all_splits_of w | |
, not (null after) | |
, d <- ['a' .. 'z'] | |
, d /= head after ] | |
all_splits_of :: [a] -> [([a], [a])] | |
all_splits_of w = zip (inits w) (tails w) | |
-------------------- | |
-- second step - load dictionary and add validity test | |
dict :: Set String | |
dict = fromDistinctAscList | |
$ lines | |
$ unsafePerformIO (readFile "fours.txt") | |
is_valid_word :: String -> Bool | |
is_valid_word w = member w dict | |
-------------------- | |
-- third step - generating and traversing the state space | |
state_tree :: (s -> [s]) -> s -> Tree s | |
state_tree children_of state | |
= Node state [ state_tree children_of s | s <- children_of state ] | |
prune :: Int -> Tree a -> Tree a | |
prune d (Node x cs) | |
| d <= 0 = Node x [] | |
| otherwise = Node x [ prune (d - 1) c | c <- cs ] | |
bft :: Tree a -> [a] | |
bft t = bft_ [t] | |
bft_ :: [Tree a] -> [a] | |
bft_ trees = [ x | Node x _ <- trees ] ++ bft_ (concat $ map subForest trees) | |
-------------------- | |
-- step four: doing a simple search | |
breadth_first_search :: (s -> Bool) -- goal test | |
-> (s -> [s]) -- state generator | |
-> s -- initial state | |
-> [s] -- solutions out | |
breadth_first_search is_goal children_of | |
= filter is_goal | |
. bft | |
. state_tree children_of | |
word_chain a b = breadth_first_search (\w -> w == b) one_step a | |
-------------------- | |
-- step five - ensuring we get paths out | |
type Path = [String] | |
goal_path :: String -> Path -> Bool | |
goal_path target ws = target == head ws | |
-- there's an extra optimization here, of disallowing child words which have already | |
-- been seen in the current path, hence avoiding lead -> head -> lead -> ... | |
-- it doesn't cause BFS to fall over, but is fatal for DFS! | |
-- see discussion at end | |
child_paths :: Path -> [Path] | |
child_paths (w:ws) | |
= [ s : w : ws | s <- one_step w, s `notElem` ws ] | |
init_path :: String -> Path | |
init_path w = [w] | |
word_chain_path a b = breadth_first_search (goal_path b) child_paths (init_path a) | |
-------------------- | |
-- step six: some quick tests | |
-- abbreviations for common tests | |
-- yes you can build X-unit assertions for stuff like this | |
[t1,t2] = map word_chain_path ["ruby", "lead"] | |
t_r_c = t1 "code" | |
-- stress-test, for profiling | |
main = sequence [ print $ take (min 1 n) $ word_chain_path "ruby" "code" | n <- [1..1000]] >>= print | |
{- ------------------- | |
-- last words | |
"child_paths" contains an extra test to prevent looping between the same words on a path. | |
I forgot about this detail when writing the article - oops! You definitely need it when | |
trying DFS or best-first. | |
Another optimization: we don't want to repeat work already done, ie if "lead" gave | |
"head" and "bead", then it's pointless having "bead" explored as a child of "head" - so | |
when searching "head" we do not need to visit its child "bead" because it was already | |
covered as a sibling of "head". | |
EXERCISE: try adding this. Hint: when you generate child_paths from W then you have a | |
list of the words under W. Why not pass this around with the path value? (You can | |
define a new record type for this, or use tuples. | |
See http://book.realworldhaskell.org/read/types-and-functions.html#tuple.ghci:book for | |
a quick intro to tuples. | |
-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment