Created
January 15, 2014 23:31
-
-
Save startling/8446881 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE GeneralizedNewtypeDeriving #-} | |
{-# LANGUAGE DeriveFunctor #-} | |
{-# LANGUAGE DeriveFoldable #-} | |
{-# LANGUAGE DeriveTraversable #-} | |
-- base | |
import Data.List | |
import Data.Monoid | |
import Data.Foldable (Foldable) | |
import Data.Traversable (Traversable) | |
import Control.Applicative | |
import Control.Monad | |
-- QuickCheck | |
import Test.QuickCheck | |
-- Hspec | |
import Test.Hspec | |
newtype NGram a = NGram { toList :: [Maybe a] } | |
deriving | |
( Eq | |
, Ord | |
, Show | |
, Monoid | |
, Functor | |
, Foldable | |
, Traversable | |
) | |
tests :: IO () | |
tests = hspec $ do | |
describe "match" $ do | |
it "matches straightforward wildcardless ngrams" $ do | |
NGram [Just 'a', Just 'b'] `shouldMatch` "ab" | |
it "matches when the string has a suffix" $ do | |
NGram [Just 'a', Just 'b'] `shouldMatch` "abc" | |
it "matches when the string has a prefix" $ do | |
NGram [Just 'a', Just 'b'] `shouldMatch` "9ab" | |
it "doesn't match the empty string the NGram is empty" $ do | |
NGram [Just 'a'] `shouldn'tMatch` "" | |
NGram [] `shouldMatch` "" | |
it "the empty ngram matches everything" . property $ | |
\xs -> (NGram [] :: NGram Char) `matches` xs | |
it "the wildcard-only ngram matches every nonempty string" . property $ | |
\xs -> null xs || (NGram [Nothing] :: NGram Char) `matches` xs | |
it "some ngrams with wildcards match things" $ do | |
NGram [Just "quick", Nothing, Just "fox"] | |
`shouldMatch` words "the quick red fox" | |
describe "ngramsFrom" $ do | |
it "only creates ngrams that match the string given" . property $ | |
\xs -> all (`matches` xs) $ ngramsFrom 5 (xs :: [Char]) | |
where | |
-- Convenience expectations. | |
shouldMatch :: Eq a => NGram a -> [a] -> Expectation | |
a `shouldMatch` b = matches a b `shouldBe` True | |
shouldn'tMatch :: Eq a => NGram a -> [a] -> Expectation | |
a `shouldn'tMatch` b = matches a b `shouldBe` False | |
-- | Test whether an 'NGram' matches a list. | |
matches :: Eq a => NGram a -> [a] -> Bool | |
matches (NGram []) = const True | |
matches ns = any (matchInit ns) . init . tails | |
where | |
matchInit :: Eq a => NGram a -> [a] -> Bool | |
matchInit (NGram ns) = all (uncurry match) . zip ns | |
match :: Eq a => Maybe a -> a -> Bool | |
match Nothing _ = True | |
match (Just a) b = a == b | |
-- | Find all the ngrams no longer than a particular length that | |
-- match a given list. | |
ngramsFrom :: Int -> [a] -> [NGram a] | |
ngramsFrom n = concatMap (map NGram . replacingWith Nothing . toList) | |
. wildcardless n | |
where | |
-- Given a list, generate all possible lists with a given | |
-- item inserted at arbitrary places an arbitrary number of times. | |
replacingWith :: a -> [a] -> [[a]] | |
replacingWith a as = forM as $ (: a : []) | |
-- Generate all the wildcard-free ngrams that match a string | |
-- no longer than a given length. | |
wildcardless :: Int -> [a] -> [NGram a] | |
wildcardless n = map (NGram . map Just) . sublistsLengthN n | |
-- Generate all the sublists of a list with a length no longer | |
-- than the given 'Int'. | |
sublistsLengthN :: Int -> [a] -> [[a]] | |
sublistsLengthN 0 _ = [[]] | |
sublistsLengthN n ns = (sublistsLengthN (n - 1) ns ++) | |
. filter (\x -> length x == n) . map (take n) | |
. takeWhile (not . null) . iterate tail $ ns |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment