Created
June 16, 2017 17:25
-
-
Save IronGremlin/f037823daa1a1febbb2e2382cd64eff7 to your computer and use it in GitHub Desktop.
conduitMMapParser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
passesPredicate :: (POExpinDoc -> Bool) -> (Either ParseError POExpinDoc -> Bool) | |
passesPredicate p = either (\_ -> True) (p) | |
both :: (a->c) -> (b->c) -> Either a b -> Either c c | |
both fl fr inp = | |
case inp of | |
Left l -> Left (fl $! l) | |
Right r -> Right (fr $! r) | |
loadFromFile :: (ACustomDoc -> Bool)-> [FilePath] -> Format -> IO (V.Vector (CString,Int,Int)) | |
{-# INLINE loadFromFile #-} | |
loadFromFile fPred iPath format = do | |
let pred = passesPredicate $! fPred | |
let xmap = case format of | |
Wrapped -> DC.lines .| D.filter isDoctype .| D.concatMapAccum unWrap False | |
Unwrapped -> D.map id | |
res <- runConduitRes $ | |
(sequence_ $ map D.sourceFile $ iPath) | |
.| D.filterE (/= (C.toWord8 '\r')) | |
.| xmap | |
.| conduitParserEither (p_UWrapped) | |
.| D.map (fmap snd) | |
.| D.filter pred | |
.| D.map (both (B.toStrict . B.pack . show) (showRaw)) | |
.| forkBS | |
act <- mapFile res | |
return act | |
projectPositions :: MonadIO m => (Either ByteString ByteString) -> Int -> m (Int, [(Int,Int)]) | |
projectPositions inp eofM = do | |
case inp of | |
Right pex -> do | |
let propLen = C.length pex | |
let eofM' = eofM + propLen | |
return $! (eofM', [(eofM,propLen)]) | |
Left _ -> do | |
return $! (eofM, []) | |
mapFile :: (V.Vector (Int,Int)) -> IO (V.Vector (CString,Int,Int)) | |
mapFile inVec = do | |
let totalSize = V.sum . V.map snd $ inVec | |
(ptr, _ , _ , _) <- mmapFilePtr "./outputTempFile" ReadOnly (Just (0,totalSize)) | |
return (V.map (\(x,y) -> (ptr,x,y)) inVec) | |
left :: Either l r -> Maybe l | |
left = either Just (const Nothing) | |
right :: Either l r -> Maybe r | |
right = either (const Nothing) Just | |
forkBS :: MonadResource m => Sink (Either ByteString ByteString) m (V.Vector (Int,Int)) | |
forkBS = | |
getZipSink $ | |
( ZipSink (D.concatMap left .| D.sinkFileBS "./Errors.log") | |
*> ZipSink (D.concatMap right .| D.sinkFileBS "./outputTempFile") | |
*> ZipSink ( D.concatMapAccumM projectPositions 0 | |
.| D.foldMapM (return . V.singleton) ) ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment