Created
May 21, 2015 17:21
-
-
Save cocreature/7f32b77cef97a873c68b to your computer and use it in GitHub Desktop.
frames experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Note: I am not quite sure if frames is actually aimed at this kind | |
-- of usage but it seems to be made for data science/machine learning | |
-- so I thought it would be a good fit | |
-- I am taking some coursera class on machine learning and wanted to | |
-- implement linear regression using gradient descent using haskell | |
-- instead of octave The first two values in my csv represent the | |
-- parameters and the third the output of the function I am trying to | |
-- calculate | |
-- One problem I have is that I feel like I have to duplicate the code | |
-- a lot. since I just have the parameters and the value I would like | |
-- to write code that works on an arbitrary amount of paramaters, but | |
-- I am not sure how to do this | |
{-# LANGUAGE TypeOperators #-} | |
{-# LANGUAGE FlexibleContexts #-} | |
{-# LANGUAGE DataKinds #-} | |
{-# LANGUAGE TemplateHaskell #-} | |
module Ex1 where | |
import Frames | |
import Frames.CSV | |
import Pipes | |
import Control.Lens | |
import qualified Control.Foldl as L | |
import qualified Data.Foldable as F | |
-- this initializes x1 and x2 as Ints while Doubles would be better, | |
-- but I couldn't figure out how to set the types | |
tableTypes' | |
rowGen {rowTypeName = "Data2" | |
,columnNames = ["x1","x2","y"]} | |
"data/ex1data2.txt" | |
data2Stream :: Producer Data2 IO () | |
data2Stream = readTableOpt data2Parser "data/ex1data2.txt" | |
loadData2 :: IO (Frame Data2) | |
loadData2 = inCoreAoS data2Stream | |
-- needed for the constant of the linear function | |
prependOne :: Frame Data2 -> Frame (Record '["x0" :-> Int, "x1" :-> Int, "x2" :-> Int, "y" :-> Int]) | |
prependOne = fmap ((1::Int)&:) | |
-- all good :) | |
mean :: Frame s -> Getting Int s Int -> Float | |
mean d col = (fromIntegral (L.fold L.sum (view col <$> d))) / (fromIntegral $ frameLength d) | |
-- works just fine :) | |
std :: Frame s -> Getting Int s Int -> Float | |
std d col = | |
let m = mean d col | |
in sqrt $ | |
(/ (fromIntegral (frameLength d) - 1)) $ | |
L.fold L.sum $ | |
(((\x -> | |
(x - m) ** | |
2) . | |
fromIntegral . view col) <$> | |
d) | |
-- my attempt to change the type | |
type X1' = "x1" :-> Double | |
-- this should be polymorphic enough, to be able to apply this to any | |
-- column, but I just tried to apply it to one first | |
-- This should | |
-- ideally apply (x-mean)/std to the first two columns, however just | |
-- changing the type from int to double doesn't seem to work | |
normalizer1 :: Record '[X1] -> Record '[X1'] | |
normalizer1 = mapMono (fromIntegral :: Int -> Double) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment