Last active
May 24, 2021 12:34
-
-
Save rampion/6337593 to your computer and use it in GitHub Desktop.
The Lazy Length monoid allows you to compare two lists by length without calculating the full length of both lists (unless they are of equal length). Though `lazyLength :: [a] -> LazyLength` is O(n), two `LazyLength` values for lists of length n and m may be compared in time O(min(log n, log m)).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
module LazyLength ( | |
LazyLength(), | |
fromLazyLength, | |
toLazyLength, | |
lazyLength, | |
-- QuickCheck properties | |
prop_invariant, | |
prop_invertible, | |
prop_addition, | |
prop_ordered, | |
prop_bounded, | |
prop_accurate | |
) where | |
import Data.Function (fix) | |
import Data.List | |
import Data.Monoid | |
import Test.QuickCheck | |
-- Lazy representation of a length. Stores the largest non-zero bit | |
-- position as a unary number, then the rest of the bits at the tail | |
-- of the unary number. | |
-- | |
-- This allows lengths N and M to be compared in O(min(log(N),log(M))) | |
-- steps, and, in particular, only one must be fully evaluated. | |
-- | |
-- We'll use smart constructors rather than expose the | |
-- default constructors to maintain an invariant: a LazyLength | |
-- made of `b` `Bit`s and `Rem n` has `0 <= n < 2^b`. | |
data LazyLength = Rem !Integer | Bit LazyLength deriving (Eq, Ord, Show) | |
-- NOTE: derived `Ord` instance has a `compare` which takes O(min(log n, log m)) | |
instance Bounded LazyLength where | |
minBound = Rem 0 | |
maxBound = fix Bit | |
-- Convert the given non-negative value to LazyLength, O(n). | |
toLazyLength :: Integral a => a -> LazyLength | |
toLazyLength n | n < 0 = error "can't have a negative length" | |
| otherwise = toLazyLength' n 0 1 | |
where toLazyLength' n lo !hi = if n < hi | |
then Rem . fromIntegral $ n - lo | |
else Bit $ toLazyLength' n hi (2*hi) | |
-- Convert the given LazyLength to a number n, O(log n). | |
fromLazyLength :: Integral a => LazyLength -> a | |
fromLazyLength (Rem _) = 0 | |
fromLazyLength (Bit x) = fromLazyLength' 0 x | |
where fromLazyLength' !b (Rem n) = fromInteger $ 2^b + n | |
fromLazyLength' !b (Bit x) = fromLazyLength' (b+1) x | |
instance Monoid LazyLength where | |
mempty = minBound | |
-- mappend just does addition on the represented naturals | |
mappend (Bit x) (Bit y) = Bit $ mappend' 0 x y | |
where -- (2^{b + i} + n) + (2^{b + j} + m) > 2^b | |
mappend' !b (Bit x) (Bit y) = Bit $ mappend' (b+1) x y | |
-- (2^b + n) + (2^b + m) = 2^{b+1} + (n+m) | |
-- 0 <= n < 2^b | |
-- 0 <= m < 2^b | |
-- => | |
-- 0 <= n+m < 2^{b+1} | |
mappend' !b (Rem n) (Rem m) = Bit . Rem $ n + m | |
mappend' !b (Rem n) (Bit y) = Bit $ cleanup (2^b + n) (b+1) y | |
mappend' !b (Bit x) (Rem m) = Bit $ cleanup (2^b + m) (b+1) x | |
cleanup n !b (Bit y) = Bit $ cleanup n (b+1) y | |
-- 0 <= n < 2^b | |
-- 0 <= m < 2^b | |
-- => either | |
-- 0 <= n + m < 2^b | |
-- 2^b <= n + m < 2^{b+1} => 0 <= n + m - 2^b < 2^b | |
-- => either | |
-- n + (2^b + m) = 2^b + (n+m) | |
-- n + (2^b + m) = 2^{b+1} + (n+m-2^n) | |
cleanup n !b (Rem m) = let p = 2^b | |
q = n + m | |
in if q >= p | |
then Bit . Rem $ q - p | |
else Rem q | |
mappend _ _ = mempty | |
-- compute the length of a list, O(n). | |
lazyLength :: [a] -> LazyLength | |
lazyLength [] = minBound | |
lazyLength (_:as) = Bit $ lazyLength' 1 as | |
where lazyLength' !n as = case genericSplitAt (n-1) as of | |
(rs,[]) -> Rem $ genericLength rs | |
(_,_:qs) -> Bit $ lazyLength' (2*n) qs | |
-- make sure we can test the invariant w/ QuickCheck | |
instance Arbitrary LazyLength where | |
arbitrary = fmap (toLazyLength . abs) (arbitrary :: Gen Integer) | |
instance CoArbitrary LazyLength where | |
coarbitrary = variant . fromLazyLength | |
-- it's in canonical form | |
prop_invariant :: LazyLength -> Bool | |
prop_invariant = invariant' 0 | |
where invariant' !b (Rem n) = 0 <= n && n < 2^b | |
invariant' !b (Bit x) = invariant' (b+1) x | |
-- there's a bijection between LazyLength and the naturals | |
prop_invertible :: LazyLength -> Bool | |
prop_invertible x = toLazyLength (fromLazyLength x) == x | |
-- mempty is the identity for mappend | |
prop_zero :: LazyLength -> Bool | |
prop_zero x = x `mappend` mempty == x | |
-- mappend works like addition over the naturals | |
prop_addition :: LazyLength -> LazyLength -> Bool | |
prop_addition x y = x `mappend` y == toLazyLength (fromLazyLength x + fromLazyLength y) | |
-- order is preserved | |
prop_ordered :: LazyLength -> LazyLength -> Bool | |
prop_ordered x y = compare x y == compare (fromLazyLength x) (fromLazyLength y) | |
-- stays inside the given bounds | |
prop_bounded :: LazyLength -> Bool | |
prop_bounded x = minBound <= x && x < maxBound | |
-- lazyLength accurately calculates list length | |
prop_accurate :: [()] -> Bool | |
prop_accurate as = toLazyLength (length as) == lazyLength as |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@treeowl yeah, you’re right