Skip to content

Instantly share code, notes, and snippets.

@chessai
Created February 20, 2018 00:58
Show Gist options
  • Save chessai/5da19e5c608623be6d3b2e8d2ba5cd8a to your computer and use it in GitHub Desktop.
Save chessai/5da19e5c608623be6d3b2e8d2ba5cd8a to your computer and use it in GitHub Desktop.
{-# language BangPatterns #-}
{-# language DataKinds #-}
{-# language GADTs #-}
{-# language KindSignatures #-}
{-# language MagicHash #-}
{-# language NoImplicitPrelude #-}
{-# language ScopedTypeVariables #-}
{-# language StandaloneDeriving #-}
{-# language TypeFamilies #-}
{-# language TypeOperators #-}
{-# OPTIONS_GHC -O2 #-}
module BOI where
import Control.Applicative (Applicative(..))
import Data.ByteString.Internal (ByteString(..), w2c, c2w)
import Data.Foldable
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Semigroup
import Data.String (String)
import Data.Word (Word8)
import GHC.Base hiding (empty, Nat)
import Flow
import Prelude ( (+), (*), (/), (<$>) )
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Unsafe as BU
import qualified Data.List as L
import qualified Data.Map.Strict as DMS
import qualified Prelude as P
data Nat = Z | S Nat
deriving (Eq, Ord)
data SNat (n :: Nat) where
SZ :: SNat Z
SS :: SNat n -> SNat (S n)
(-) :: SNat n -> SNat m -> SNat (n :- m)
SZ - _ = SZ
(SS n) - SZ = (SS n)
type family (n :: Nat) :- (m :: Nat) :: Nat where
Z :- m = Z
(S n) :- Z = S n
(S n) :- m = S (n :- m)
data Vector (a :: *) (n :: Nat) where
Nil :: Vector a Z
Cons :: a -> Vector a n -> Vector a (S n)
deriving instance Eq a => Eq (Vector a n)
deriving instance Ord a => Ord (Vector a n)
fromList :: SNat n -> [a] -> Vector a n
fromList SZ _ = Nil
fromList (SS n) (x:xs) = Cons x (fromList n xs)
sNatToInt :: SNat n -> Int
sNatToInt SZ = 0
sNatToInt (SS n) = 1 + sNatToInt n
type NGram n = Vector Char n
newtype Probability = Prob { prob :: Double }
wrap :: Double -> Probability
unwrap :: Probability -> Double
wrap d = Prob d
unwrap (Prob d) = d
-- multiplicative semigroup
instance Semigroup Probability where
Prob d1 <> Prob d2 = Prob (d1 * d2)
-- multiplicative monoid
instance Monoid Probability where
mempty = Prob 0
Prob d1 `mappend` Prob d2 = Prob (d1 * d2)
--takes first n chars, turns into ngram
--runs function, monoidal combines result of function with foldWindow with rest of text
group :: BC.ByteString -> [BC.ByteString]
group b = BC.groupBy (\_ _ -> False) b
foldWindow :: (Monoid m) => SNat n -> (NGram n -> m) -> BC.ByteString -> m
foldWindow SZ _ _ = mempty
foldWindow k@(SS n) f b = go (group b)
where
i = sNatToInt k :: Int
go :: (Monoid m) => [BC.ByteString] -> m
go [] = mempty
go bs = (f $ fromList k $ BC.head <$> L.take i bs) `mappend` foldWindow n _ (BC.concat $ L.drop i bs)
--createModel :: BC.ByteString -> Map (NGram n) Probability
--createModel (PS _ _ 0) = DSM.empty
--createModel !b = go 0 l b
-- where
-- l = BC.length b :: Int
--
-- go :: Int -> Int -> BC.ByteString -> Map (NGram n) Probability
-- go !p !q bs
-- | p == q = DMS.empty
-- | otherwise =
--
--
-- let k = BU.unsafeIndex bs <$> [p..q]
--
-- in
--f will lookup b in the map, default to 0, then wrap it in the multiplicative monoid,
classify :: Map (NGram n) Probability -> BC.ByteString -> Probability
classify m b = undefined
--foldWindow (wrap . fromMaybe 0 . DMS.lookup b) b
-- unwrap . foldWindow f
--
--
--type NGram = Vector N Char
--foldWindow :: (Monoid m) => (NGram -> m) -> Text -> m
--createModel :: Text -> Map NGram Probability
--classify :: Map NGram Probability -> Text -> Probability
--classify m = foldWindow (lookup m .> fromMaybe 0 .> wrapMultiplicative) .> unwrapMultiplicative
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment