Created
December 10, 2013 00:53
-
-
Save dpwright/7884004 to your computer and use it in GitHub Desktop.
Super-simple base 64 decoder done for the sake of the exercise. Implemented entirely in terms of base; thus it doesn't make use of ByteString or Text which would probably be faster/better.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Word8 | |
import Data.Bits | |
import Data.Char | |
import Data.List | |
-- Sample text taken from Wikipedia | |
sampleText :: String | |
sampleText = "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=" | |
-- splitEvery is defined in the Data.List.Splits library from package | |
-- "split".. implementing it here to avoid pulling in the dependency. | |
splitEvery :: Int -> [a] -> [[a]] | |
splitEvery n l | null l = [] | |
| otherwise = first : splitEvery n rest | |
where (first, rest) = splitAt n l | |
-- Performs the actual decode. Could get rid of some of the repetitiveness | |
-- of the `cv` function by constructing a list of pairs [(b1, b2), (b2, b3), b3, b4)] | |
-- and then zipping that with the indices 1, 2, 3 to run getByte on, but | |
-- I think just writing it out explicitly is much easier to read. | |
decode :: String -> String | |
decode = concatMap convert . splitEvery 4 | |
where convert = map (chr . fromIntegral) . cv . map ctob64 . stripPadding | |
stripPadding = filter (/= '=') | |
cv (b1:b2:[]) = [ getByte1 b1 b2 ] | |
cv (b1:b2:b3:[]) = [ getByte1 b1 b2, getByte2 b2 b3 ] | |
cv (b1:b2:b3:b4:[]) = [ getByte1 b1 b2, getByte2 b2 b3, getByte3 b3 b4 ] | |
getByte1 b1 b2 = ( b1 `shiftL` 2) .|. ( b2 `shiftR` 4) | |
getByte2 b2 b3 = ( (b2 .&. 0x0f) `shiftL` 4) .|. ( (b3 .&. 0x3c) `shiftR` 2) | |
getByte3 b3 b4 = ( (b3 .&. 0x03) `shiftL` 6) .|. b4 | |
-- Returning a Maybe directly and allowing `decode` to fail with a Maybe | |
-- would be better, but for the sake of the exercise just throwing an error | |
-- on failure. | |
ctob64 :: Char -> Word8 | |
ctob64 c = maybe lookupError fromIntegral $ elemIndex c b64table | |
where b64table = concat [['A'..'Z'], ['a'..'z'], ['0'..'9'], "+/"] | |
lookupError = error $ "Invalid base64 value " ++ show c | |
main :: IO () | |
main = putStrLn $ decode sampleText |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment