Skip to content

Instantly share code, notes, and snippets.

@owainlewis
Last active June 21, 2023 05:30
Show Gist options
  • Save owainlewis/1e7d1e68a6818ee4d50e to your computer and use it in GitHub Desktop.
Save owainlewis/1e7d1e68a6818ee4d50e to your computer and use it in GitHub Desktop.
Gzip Scala
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
import scala.util.Try
object Gzip {
def compress(input: Array[Byte]): Array[Byte] = {
val bos = new ByteArrayOutputStream(input.length)
val gzip = new GZIPOutputStream(bos)
gzip.write(input)
gzip.close()
val compressed = bos.toByteArray
bos.close()
compressed
}
def decompress(compressed: Array[Byte]): Option[String] =
Try {
val inputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
scala.io.Source.fromInputStream(inputStream).mkString
}.toOption
}
class GzipSpec extends WordSpecLike with Matchers {
"The GZIP object" should {
"decompress a compressed string" in {
val input = Gzip.compress("Hello World".getBytes("UTF-8"))
Gzip.decompress(input) shouldBe Some("Hello World")
}
}
}
@JThakrar
Copy link

Thanks @owainlewis!
This is very nice and useful.
Just wondering - why does decompress return a String and not an Array[Byte]?
I think having Array[Byte] will make this useful for a variety of binary and non binary items.

@phderome
Copy link

phderome commented Jul 2, 2017

I have same question as JThakrar.

@JThakrar
Copy link

Here's my version of decompress.

  def decompress(compressed: Array[Byte]): Array[Byte] = {
    val gzipInputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
    val output = new ArrayBuffer[Byte]()
    var totalByteCount = 0
    val bytes = new Array[Byte](1024)
    while (gzipInputStream.available() == 1) {
      val byteCount = gzipInputStream.read(bytes)
      if (byteCount > 0) {
        output ++= bytes.take(byteCount)
        totalByteCount += byteCount
      }
    }
    output.take(totalByteCount).toArray
  }

@dalegaspi
Copy link

similar to @JThakrar's...probably simpler but requires Apache Commons IO

def decompress(compressed: Array[Byte]): Option[Array[Byte]] =
    Try {
      val inputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
      org.apache.commons.io.IOUtils.toByteArray(inputStream)
    }.toOption

@moneroexamples
Copy link

Thanks. Extra example of sequential generating of gziped CSV files with os-lib:

Add the following to build.sbt

libraryDependencies += "com.lihaoyi" %% "os-lib" % "0.8.0"

then

package example

import scala.util.{Try, Success, Failure, Random}

import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}

object Main extends  App {

  // create header for a csv file
  val header = List("column1","column2", "column3")
  val header_compressed = Gzip.compress(
        (header.mkString(",")+"\n").getBytes("UTF-8"))
  os.write.over(os.Path("/tmp/file.gz"), header_compressed)  

  (0 to 10).foreach { idx =>

    val rand = Random

    val lines_to_write = (0 to 100).map { _ =>
      (0 to 2).map(_ => rand.nextInt(100).toString).mkString(",")     
    }

    val new_lines = Gzip.compress(
      (lines_to_write.mkString("\n")+"\n").getBytes("UTF-8"))

    os.write.append(os.Path("/tmp/file.gz"), new_lines) 
  }
}

  
object Gzip {

  def compress(input: Array[Byte]): Array[Byte] = {
    val bos = new ByteArrayOutputStream(input.length)
    val gzip = new GZIPOutputStream(bos)
    gzip.write(input)
    gzip.close()
    val compressed = bos.toByteArray
    bos.close()
    compressed
  }

  def decompress(compressed: Array[Byte]): Option[String] =
    Try {
      val inputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
      scala.io.Source.fromInputStream(inputStream).mkString
    }.toOption
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment