Created
February 17, 2013 15:18
-
-
Save plaster/4971850 to your computer and use it in GitHub Desktop.
ISO-2022-JP な行と UTF-8 な行の混ざったファイルを読んでみるテスト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import Codec.Text.IConv as IConv | |
| import Codec.Binary.UTF8.String as UTF8 | |
| import Data.ByteString as B | |
| import Data.ByteString.Lazy as BL | |
| import System.IO as IO | |
| convertDetect :: [ IConv.EncodingName ] -> IConv.EncodingName -> BL.ByteString -> | |
| Either BL.ByteString (IConv.EncodingName, BL.ByteString) | |
| convertDetect [] _ src = Left src | |
| convertDetect (srcEnc:srcEncs) dstEnc src = case IConv.convertStrictly srcEnc dstEnc src of | |
| Left dst -> Right (srcEnc, dst) | |
| Right _ -> convertDetect srcEncs dstEnc src | |
| hGetConvertedLine' :: [ IConv.EncodingName ] -> IO.Handle -> | |
| IO (Either BL.ByteString (IConv.EncodingName, String)) | |
| hGetConvertedLine' srcEncs h = do | |
| l <- B.hGetLine h | |
| return $ case convertDetect srcEncs "UTF-8" $ BL.fromChunks [l] of | |
| Left srcBin -> Left srcBin | |
| Right (srcEnc, dstBin) -> Right (srcEnc, UTF8.decode $ BL.unpack dstBin) | |
| hGetConvertedLine :: [ IConv.EncodingName ] -> IO.Handle -> IO (Either BL.ByteString String) | |
| hGetConvertedLine srcEncs h = do | |
| result <- hGetConvertedLine' srcEncs h | |
| return $ case result of | |
| Left srcBin -> Left srcBin | |
| Right (_, dstStr) -> Right dstStr | |
| hRepeatToEOF :: (IO.Handle -> IO a) -> IO.Handle -> IO [a] | |
| hRepeatToEOF op h = do | |
| eof <- IO.hIsEOF h | |
| if eof | |
| then return [] | |
| else do | |
| e <- op h | |
| es <- hRepeatToEOF op h | |
| return $ e : es | |
| myHGetConvertedLine = hGetConvertedLine' ["ISO-2022-JP","UTF-8"] | |
| testFile = "/home/plaster/work/_testdata/2011/02/02/log" | |
| main = do | |
| ls <- IO.withFile testFile IO.ReadMode $ hRepeatToEOF myHGetConvertedLine | |
| mapM print ls |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment