Skip to content

Instantly share code, notes, and snippets.

@killerswan
Created April 25, 2012 23:06
Show Gist options
  • Save killerswan/2494260 to your computer and use it in GitHub Desktop.
Save killerswan/2494260 to your computer and use it in GitHub Desktop.
Haskell BLOWS UP
-- Kevin Cantu
-- April 2012
-- Count location
--
-- runtime: 16s on one million lines of CSV
-- infinity secs on 3 million lines (RAM blows up)
-- (ip,lat.000,lon.000)
import Control.DeepSeq
import Control.Monad (when)
import Data.Map (Map)
import Data.List
import qualified Data.Map as M
import System.Environment
import System.IO
import Text.CSV
import Text.Printf
-- read a CSV file
-- NOTE: this is where ALL our allocation goes
loadCSV path =
do
eitherRecs <- parseCSVFromFile path
let records = case eitherRecs
of Left e -> error . show $ e
Right recs -> recs
return records
{-
-- an arbitrary test filter
lessThan7 xs =
length xs == 4 && field1' < 7
where
field1 :: String
field1 = xs !! 1
field1' = read field1 :: Int
-}
-- our map will be keyed on the location
-- experimentally: strings are faster than actual numbers,
--type Location = (Rational, Rational)
type Location = (String, String)
-- create a location map: (lat,lon) --> count
newMap :: Map Location Integer
newMap = M.fromList []
-- increment a count
-- NOTE: profiling indicates this gets called as many times as readCoord
-- NOTE: sprinkling bangs here makes no improvement
-- NOTE: deepseq (key) makes no improvement
updateMap mm key =
M.insertWith' (plus) key 1 mm
where
plus x y = x + y
-- title to string
showTitle = "LATITUDE,LONGITUDE,COUNT"
-- record to string
resultToRecord loc count acc =
acc ++ [record] -- this might be slow
where
(lat,lon) = loc
record :: Record
--record = [showRationalDeg lat, showRationalDeg lon, show count]
record = [lat, lon, show count]
{-
-- convert a rational degree into string
showRationalDeg deg =
showF . toF $ deg
where
toF rr = (fromRational rr :: Float)
showF ff = printf "%0.4f" ff
-}
-- NOTE: profiling indicates all our time is here
-- NOTE: sprinkling bangs here makes no improvement
readCoord record =
case length record
of 3 -> (lat, lon)
_ -> error $ "wrong size record: " ++ show record
where
(lat, lon) = (record !! 1, record !! 2)
{-
where
--ip = read (record !! 0) :: Integer
lat = read (record !! 1) :: Float
lon = read (record !! 2) :: Float
lat' = toRational lat :: Rational
lon' = toRational lon :: Rational
-}
-- arg0 is the CSV file
getInputFile args =
if length args == 1
then args !! 0
else error "usage: count csvInputFile"
-- logging
verbose = True
now xx = when verbose (putStrLn xx)
main =
do
args <- getArgs
now "counting IPs per coordinate..."
recs <- loadCSV $ getInputFile args
let coordsMap = createMapFromRecords recs
now "saving/printing..."
putStrLn showTitle
putStrLn . printCSV . M.foldrWithKey resultToRecord [] $ coordsMap
now "ok."
where
createMapFromRecords =
myfold updateMap newMap . map readCoord . filter (\xs -> length xs == 3)
--NOTE: still doesn't help
--myfold fun a bs = a `deepseq` bs `deepseq` (foldl' fun a bs)
myfold = foldl'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment