Created
February 20, 2018 00:58
-
-
Save chessai/5da19e5c608623be6d3b2e8d2ba5cd8a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# language BangPatterns #-} | |
{-# language DataKinds #-} | |
{-# language GADTs #-} | |
{-# language KindSignatures #-} | |
{-# language MagicHash #-} | |
{-# language NoImplicitPrelude #-} | |
{-# language ScopedTypeVariables #-} | |
{-# language StandaloneDeriving #-} | |
{-# language TypeFamilies #-} | |
{-# language TypeOperators #-} | |
{-# OPTIONS_GHC -O2 #-} | |
module BOI where | |
import Control.Applicative (Applicative(..)) | |
import Data.ByteString.Internal (ByteString(..), w2c, c2w) | |
import Data.Foldable | |
import Data.Map.Strict (Map) | |
import Data.Maybe (fromMaybe) | |
import Data.Monoid | |
import Data.Semigroup | |
import Data.String (String) | |
import Data.Word (Word8) | |
import GHC.Base hiding (empty, Nat) | |
import Flow | |
import Prelude ( (+), (*), (/), (<$>) ) | |
import qualified Data.ByteString.Char8 as BC | |
import qualified Data.ByteString.Unsafe as BU | |
import qualified Data.List as L | |
import qualified Data.Map.Strict as DMS | |
import qualified Prelude as P | |
data Nat = Z | S Nat | |
deriving (Eq, Ord) | |
data SNat (n :: Nat) where | |
SZ :: SNat Z | |
SS :: SNat n -> SNat (S n) | |
(-) :: SNat n -> SNat m -> SNat (n :- m) | |
SZ - _ = SZ | |
(SS n) - SZ = (SS n) | |
type family (n :: Nat) :- (m :: Nat) :: Nat where | |
Z :- m = Z | |
(S n) :- Z = S n | |
(S n) :- m = S (n :- m) | |
data Vector (a :: *) (n :: Nat) where | |
Nil :: Vector a Z | |
Cons :: a -> Vector a n -> Vector a (S n) | |
deriving instance Eq a => Eq (Vector a n) | |
deriving instance Ord a => Ord (Vector a n) | |
fromList :: SNat n -> [a] -> Vector a n | |
fromList SZ _ = Nil | |
fromList (SS n) (x:xs) = Cons x (fromList n xs) | |
sNatToInt :: SNat n -> Int | |
sNatToInt SZ = 0 | |
sNatToInt (SS n) = 1 + sNatToInt n | |
type NGram n = Vector Char n | |
newtype Probability = Prob { prob :: Double } | |
wrap :: Double -> Probability | |
unwrap :: Probability -> Double | |
wrap d = Prob d | |
unwrap (Prob d) = d | |
-- multiplicative semigroup | |
instance Semigroup Probability where | |
Prob d1 <> Prob d2 = Prob (d1 * d2) | |
-- multiplicative monoid | |
instance Monoid Probability where | |
mempty = Prob 0 | |
Prob d1 `mappend` Prob d2 = Prob (d1 * d2) | |
--takes first n chars, turns into ngram | |
--runs function, monoidal combines result of function with foldWindow with rest of text | |
group :: BC.ByteString -> [BC.ByteString] | |
group b = BC.groupBy (\_ _ -> False) b | |
foldWindow :: (Monoid m) => SNat n -> (NGram n -> m) -> BC.ByteString -> m | |
foldWindow SZ _ _ = mempty | |
foldWindow k@(SS n) f b = go (group b) | |
where | |
i = sNatToInt k :: Int | |
go :: (Monoid m) => [BC.ByteString] -> m | |
go [] = mempty | |
go bs = (f $ fromList k $ BC.head <$> L.take i bs) `mappend` foldWindow n _ (BC.concat $ L.drop i bs) | |
--createModel :: BC.ByteString -> Map (NGram n) Probability | |
--createModel (PS _ _ 0) = DSM.empty | |
--createModel !b = go 0 l b | |
-- where | |
-- l = BC.length b :: Int | |
-- | |
-- go :: Int -> Int -> BC.ByteString -> Map (NGram n) Probability | |
-- go !p !q bs | |
-- | p == q = DMS.empty | |
-- | otherwise = | |
-- | |
-- | |
-- let k = BU.unsafeIndex bs <$> [p..q] | |
-- | |
-- in | |
--f will lookup b in the map, default to 0, then wrap it in the multiplicative monoid, | |
classify :: Map (NGram n) Probability -> BC.ByteString -> Probability | |
classify m b = undefined | |
--foldWindow (wrap . fromMaybe 0 . DMS.lookup b) b | |
-- unwrap . foldWindow f | |
-- | |
-- | |
--type NGram = Vector N Char | |
--foldWindow :: (Monoid m) => (NGram -> m) -> Text -> m | |
--createModel :: Text -> Map NGram Probability | |
--classify :: Map NGram Probability -> Text -> Probability | |
--classify m = foldWindow (lookup m .> fromMaybe 0 .> wrapMultiplicative) .> unwrapMultiplicative | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment