fizbin · January 24, 2022 21:03
diff --git a/wordle_entropy.hs b/wordle_entropy.hs
 -- stack --resolver lts-18.18 script --package pqueue --package aeson --package containers
 {-# LANGUAGE Haskell2010 #-}
 {-# OPTIONS_GHC -Wall -O2 #-}

 import Data.Aeson
 import Data.List
 import Data.Map (Map)
 import qualified Data.Map as M
 import qualified Data.PQueue.Max as MQ

 getSignature :: String -> String -> String
 getSignature guess solution =
  let list1 = zipWith (==) guess solution
      unmatched = map snd $ filter (not . fst) $ zip list1 solution
   in reverse $ fst $ foldl' folder ("", unmatched) $ zip list1 guess
  where
    folder :: (String, [Char]) -> (Bool, Char) -> (String, [Char])
    folder (acc, unmatched) (isEq, guessC)
      | isEq = ('G' : acc, unmatched)
      | guessC `elem` unmatched = ('Y' : acc, delete guessC unmatched)
      | otherwise = ('B' : acc, unmatched)

 -- Gets the "remaining entropy" left in the pool of possible solutions. To compare with
 -- most "entropy based" solutions online, you'll likely need
 -- (logBase 2.0 wlen - getEntropy guess possibleSolutions)
 --
 -- Also, most entropy-based solutions will refer to "maximizing entropy" whereas of course
 -- what we do with this code is minimize the output of this function.
 getEntropy :: String -> [String] -> Double
 getEntropy guess possibleWords =
  let wlen = fromIntegral $ length possibleWords
      sigmap = M.fromListWith (+) $ map (\k -> (getSignature guess k, 1::Int)) possibleWords
   in sum $ map ((\n -> logBase 2.0 n * n / wlen) . fromIntegral . snd) $ M.toList sigmap

 getEntropy2 :: String -> [String] -> [String] -> [(String, String, Double)]
 getEntropy2 guess1 possibleGuess2 possibleSolutions =
  let wlen = fromIntegral $ length possibleSolutions
      sigmap = M.fromListWith (++) $ map (\k -> (getSignature guess1 k, [k])) possibleSolutions
   in do
        guess2 <- possibleGuess2
        let ent = sum $ do
              (_, wordSlice) <- M.toList sigmap
              let sigmap2 = M.fromListWith (+) $ map (\k -> (getSignature guess2 k, 1)) wordSlice :: Map String Int
              pure $ sum $ map ((\n -> logBase 2.0 n * n / wlen) . fromIntegral . snd) $ M.toList sigmap2
        pure (guess1, guess2, ent)

 bottomN :: (Ord a) => Int -> [a] -> [a]
 bottomN szLim lst = MQ.toAscList (go MQ.empty lst)
  where
    go q [] = q
    go q (a : as) =
      let nqueue = MQ.insert a q
       in if MQ.size nqueue > szLim then go (MQ.deleteMax nqueue) as else go nqueue as

 main :: IO ()
 main = do
  words1' <- eitherDecodeFileStrict "wordle1.json"
  words1 <- case words1' of
    Left err -> ioError (userError err)
    Right x -> pure x
  words2' <- eitherDecodeFileStrict "wordle2.json"
  words2 <- case words2' of
    Left err -> ioError (userError err)
    Right x -> pure x
  print $ length words1
  putStrLn $ "Bits initially " ++ show (logBase 2.0 (fromIntegral $ length words1) :: Double)
  putStrLn (getSignature "raise" "cigar")
  print $ getEntropy "raise" words1
  print $ getEntropy "train" words1
  print $ getEntropy "rathe" words1
  print $ getEntropy "raree" words1

  let goodWords = bottomN 2000 $ map (\k -> (getEntropy k words1, k)) (words1 ++ words2)
  mapM_ print $ take 5 $ sort goodWords
  mapM_ print $ zip [1::Int ..] $ bottomN 20 $ map (\k -> (getEntropy k words1, k)) words1

  print $ getEntropy2 "salon" ["trice"] words1

  -- Uncomment only if you can compile this with -O2 and have a bunch of time
  -- This hunts for the best two-word pairs, assuming easy mode and assuming the first
  -- of the pair is one of the 2000 best single words.
  --mapM_ print $ zip [1::Int ..] $ bottomN 50 $ map (\(a, b, c) -> (c, a, b)) $ concatMap ((\w -> getEntropy2 w (words1 ++ words2) words1) . snd) goodWords
	-- stack --resolver lts-18.18 script --package pqueue --package aeson --package containers
	{-# LANGUAGE Haskell2010 #-}
	{-# OPTIONS_GHC -Wall -O2 #-}

	import Data.Aeson
	import Data.List
	import Data.Map (Map)
	import qualified Data.Map as M
	import qualified Data.PQueue.Max as MQ

	getSignature :: String -> String -> String
	getSignature guess solution =
	let list1 = zipWith (==) guess solution
	unmatched = map snd $ filter (not . fst) $ zip list1 solution
	in reverse $ fst $ foldl' folder ("", unmatched) $ zip list1 guess
	where
	folder :: (String, [Char]) -> (Bool, Char) -> (String, [Char])
	folder (acc, unmatched) (isEq, guessC)
	\| isEq = ('G' : acc, unmatched)
	\| guessC `elem` unmatched = ('Y' : acc, delete guessC unmatched)
	\| otherwise = ('B' : acc, unmatched)

	-- Gets the "remaining entropy" left in the pool of possible solutions. To compare with
	-- most "entropy based" solutions online, you'll likely need
	-- (logBase 2.0 wlen - getEntropy guess possibleSolutions)
	--
	-- Also, most entropy-based solutions will refer to "maximizing entropy" whereas of course
	-- what we do with this code is minimize the output of this function.
	getEntropy :: String -> [String] -> Double
	getEntropy guess possibleWords =
	let wlen = fromIntegral $ length possibleWords
	sigmap = M.fromListWith (+) $ map (\k -> (getSignature guess k, 1::Int)) possibleWords
	in sum $ map ((\n -> logBase 2.0 n * n / wlen) . fromIntegral . snd) $ M.toList sigmap

	getEntropy2 :: String -> [String] -> [String] -> [(String, String, Double)]
	getEntropy2 guess1 possibleGuess2 possibleSolutions =
	let wlen = fromIntegral $ length possibleSolutions
	sigmap = M.fromListWith (++) $ map (\k -> (getSignature guess1 k, [k])) possibleSolutions
	in do
	guess2 <- possibleGuess2
	let ent = sum $ do
	(_, wordSlice) <- M.toList sigmap
	let sigmap2 = M.fromListWith (+) $ map (\k -> (getSignature guess2 k, 1)) wordSlice :: Map String Int
	pure $ sum $ map ((\n -> logBase 2.0 n * n / wlen) . fromIntegral . snd) $ M.toList sigmap2
	pure (guess1, guess2, ent)

	bottomN :: (Ord a) => Int -> [a] -> [a]
	bottomN szLim lst = MQ.toAscList (go MQ.empty lst)
	where
	go q [] = q
	go q (a : as) =
	let nqueue = MQ.insert a q
	in if MQ.size nqueue > szLim then go (MQ.deleteMax nqueue) as else go nqueue as

	main :: IO ()
	main = do
	words1' <- eitherDecodeFileStrict "wordle1.json"
	words1 <- case words1' of
	Left err -> ioError (userError err)
	Right x -> pure x
	words2' <- eitherDecodeFileStrict "wordle2.json"
	words2 <- case words2' of
	Left err -> ioError (userError err)
	Right x -> pure x
	print $ length words1
	putStrLn $ "Bits initially " ++ show (logBase 2.0 (fromIntegral $ length words1) :: Double)
	putStrLn (getSignature "raise" "cigar")
	print $ getEntropy "raise" words1
	print $ getEntropy "train" words1
	print $ getEntropy "rathe" words1
	print $ getEntropy "raree" words1

	let goodWords = bottomN 2000 $ map (\k -> (getEntropy k words1, k)) (words1 ++ words2)
	mapM_ print $ take 5 $ sort goodWords
	mapM_ print $ zip [1::Int ..] $ bottomN 20 $ map (\k -> (getEntropy k words1, k)) words1

	print $ getEntropy2 "salon" ["trice"] words1

	-- Uncomment only if you can compile this with -O2 and have a bunch of time
	-- This hunts for the best two-word pairs, assuming easy mode and assuming the first
	-- of the pair is one of the 2000 best single words.
	--mapM_ print $ zip [1::Int ..] $ bottomN 50 $ map (\(a, b, c) -> (c, a, b)) $ concatMap ((\w -> getEntropy2 w (words1 ++ words2) words1) . snd) goodWords