Skip to content

Instantly share code, notes, and snippets.

@plaster
Created February 17, 2013 15:18
Show Gist options
  • Select an option

  • Save plaster/4971850 to your computer and use it in GitHub Desktop.

Select an option

Save plaster/4971850 to your computer and use it in GitHub Desktop.
ISO-2022-JP な行と UTF-8 な行の混ざったファイルを読んでみるテスト
import Codec.Text.IConv as IConv
import Codec.Binary.UTF8.String as UTF8
import Data.ByteString as B
import Data.ByteString.Lazy as BL
import System.IO as IO
convertDetect :: [ IConv.EncodingName ] -> IConv.EncodingName -> BL.ByteString ->
Either BL.ByteString (IConv.EncodingName, BL.ByteString)
convertDetect [] _ src = Left src
convertDetect (srcEnc:srcEncs) dstEnc src = case IConv.convertStrictly srcEnc dstEnc src of
Left dst -> Right (srcEnc, dst)
Right _ -> convertDetect srcEncs dstEnc src
hGetConvertedLine' :: [ IConv.EncodingName ] -> IO.Handle ->
IO (Either BL.ByteString (IConv.EncodingName, String))
hGetConvertedLine' srcEncs h = do
l <- B.hGetLine h
return $ case convertDetect srcEncs "UTF-8" $ BL.fromChunks [l] of
Left srcBin -> Left srcBin
Right (srcEnc, dstBin) -> Right (srcEnc, UTF8.decode $ BL.unpack dstBin)
hGetConvertedLine :: [ IConv.EncodingName ] -> IO.Handle -> IO (Either BL.ByteString String)
hGetConvertedLine srcEncs h = do
result <- hGetConvertedLine' srcEncs h
return $ case result of
Left srcBin -> Left srcBin
Right (_, dstStr) -> Right dstStr
hRepeatToEOF :: (IO.Handle -> IO a) -> IO.Handle -> IO [a]
hRepeatToEOF op h = do
eof <- IO.hIsEOF h
if eof
then return []
else do
e <- op h
es <- hRepeatToEOF op h
return $ e : es
myHGetConvertedLine = hGetConvertedLine' ["ISO-2022-JP","UTF-8"]
testFile = "/home/plaster/work/_testdata/2011/02/02/log"
main = do
ls <- IO.withFile testFile IO.ReadMode $ hRepeatToEOF myHGetConvertedLine
mapM print ls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment