Skip to content

Instantly share code, notes, and snippets.

@dgfitch
Created April 14, 2010 15:21
Show Gist options
  • Save dgfitch/365910 to your computer and use it in GitHub Desktop.
Save dgfitch/365910 to your computer and use it in GitHub Desktop.
(*
A parser needs to compress input as it receives it,
but it may receive the characters in small chunks.
How does the .NET System.IO.Compression namespace
deal with small chunks? Let's see the worst case!
*)
let streamToString (x:MemoryStream) =
let a = x.ToArray()
let sb = new System.Text.StringBuilder(a.Length)
for c in a do
sb.Append(char c) |> ignore
sb.ToString()
let compress (oneAtATime:bool) (s:string) =
let stream = new System.IO.MemoryStream()
let compress = new System.IO.Compression.GZipStream(stream, System.IO.Compression.CompressionMode.Compress)
let bytes = System.Text.Encoding.ASCII.GetBytes s
(* the main difference, write the bytes in teeny chunks or all at once *)
if oneAtATime then
for b in bytes do
compress.Write([|b|], 0, 1)
else
compress.Write(bytes, 0, bytes.Length)
compress.Close()
stream
let compressToString =
compress false >> streamToString
let compressOneAtATimeToString =
compress true >> streamToString
let comparison s =
let c = compressToString s
let d = compressOneAtATimeToString s
printfn "compressing from %i: %i chars vs. %i chars" s.Length c.Length d.Length
let input (len:int) =
let sb = new System.Text.StringBuilder(len)
for i in [1..len] do
sb.Append("X") |> ignore
sb.ToString()
let check = comparison << input
check 1
check 10
check 100
check 100000
(*
compressing from 1: 121 chars vs. 121 chars
compressing from 10: 122 chars vs. 137 chars
compressing from 100: 124 chars vs. 294 chars
compressing from 100000: 972 chars vs. 175119 chars
Progressively worse!
*)
@dgfitch
Copy link
Author

dgfitch commented Apr 14, 2010

Oddly, the worst case gets BETTER when the input data is randomized.

Solution, thanks to TheOnionKnight: Add a BufferedStream.

let compress (oneAtATime:bool) (s:string) =
  let stream = new System.IO.MemoryStream()
  let compress = new System.IO.Compression.GZipStream(stream, System.IO.Compression.CompressionMode.Compress)
  let buffer = new System.IO.BufferedStream(compress, 10000)
  let bytes = System.Text.Encoding.ASCII.GetBytes s
  if oneAtATime then
    for b in bytes do
      buffer.Write([|b|], 0, 1)
  else
    buffer.Write(bytes, 0, bytes.Length)
  buffer.Flush()
  compress.Close()
  stream

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment