Created
April 14, 2010 15:21
-
-
Save dgfitch/365910 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
A parser needs to compress input as it receives it, | |
but it may receive the characters in small chunks. | |
How does the .NET System.IO.Compression namespace | |
deal with small chunks? Let's see the worst case! | |
*) | |
let streamToString (x:MemoryStream) = | |
let a = x.ToArray() | |
let sb = new System.Text.StringBuilder(a.Length) | |
for c in a do | |
sb.Append(char c) |> ignore | |
sb.ToString() | |
let compress (oneAtATime:bool) (s:string) = | |
let stream = new System.IO.MemoryStream() | |
let compress = new System.IO.Compression.GZipStream(stream, System.IO.Compression.CompressionMode.Compress) | |
let bytes = System.Text.Encoding.ASCII.GetBytes s | |
(* the main difference, write the bytes in teeny chunks or all at once *) | |
if oneAtATime then | |
for b in bytes do | |
compress.Write([|b|], 0, 1) | |
else | |
compress.Write(bytes, 0, bytes.Length) | |
compress.Close() | |
stream | |
let compressToString = | |
compress false >> streamToString | |
let compressOneAtATimeToString = | |
compress true >> streamToString | |
let comparison s = | |
let c = compressToString s | |
let d = compressOneAtATimeToString s | |
printfn "compressing from %i: %i chars vs. %i chars" s.Length c.Length d.Length | |
let input (len:int) = | |
let sb = new System.Text.StringBuilder(len) | |
for i in [1..len] do | |
sb.Append("X") |> ignore | |
sb.ToString() | |
let check = comparison << input | |
check 1 | |
check 10 | |
check 100 | |
check 100000 | |
(* | |
compressing from 1: 121 chars vs. 121 chars | |
compressing from 10: 122 chars vs. 137 chars | |
compressing from 100: 124 chars vs. 294 chars | |
compressing from 100000: 972 chars vs. 175119 chars | |
Progressively worse! | |
*) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Oddly, the worst case gets BETTER when the input data is randomized.
Solution, thanks to TheOnionKnight: Add a BufferedStream.