-
-
Save owainlewis/1e7d1e68a6818ee4d50e to your computer and use it in GitHub Desktop.
import java.io.{ByteArrayOutputStream, ByteArrayInputStream} | |
import java.util.zip.{GZIPOutputStream, GZIPInputStream} | |
import scala.util.Try | |
object Gzip { | |
def compress(input: Array[Byte]): Array[Byte] = { | |
val bos = new ByteArrayOutputStream(input.length) | |
val gzip = new GZIPOutputStream(bos) | |
gzip.write(input) | |
gzip.close() | |
val compressed = bos.toByteArray | |
bos.close() | |
compressed | |
} | |
def decompress(compressed: Array[Byte]): Option[String] = | |
Try { | |
val inputStream = new GZIPInputStream(new ByteArrayInputStream(compressed)) | |
scala.io.Source.fromInputStream(inputStream).mkString | |
}.toOption | |
} |
class GzipSpec extends WordSpecLike with Matchers { | |
"The GZIP object" should { | |
"decompress a compressed string" in { | |
val input = Gzip.compress("Hello World".getBytes("UTF-8")) | |
Gzip.decompress(input) shouldBe Some("Hello World") | |
} | |
} | |
} |
@sbhola only if you need it in an ASCII string for sending via a transport like SMTP or embedding within some other file format. The raw binary data will be smaller than the Base64 representation.
Is there any dependency we need include to run this code? I am getting following error message"Cannot resolve symbol wordspeclike"?
Thanks @owainlewis!
This is very nice and useful.
Just wondering - why does decompress return a String and not an Array[Byte]?
I think having Array[Byte] will make this useful for a variety of binary and non binary items.
I have same question as JThakrar.
Here's my version of decompress.
def decompress(compressed: Array[Byte]): Array[Byte] = {
val gzipInputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
val output = new ArrayBuffer[Byte]()
var totalByteCount = 0
val bytes = new Array[Byte](1024)
while (gzipInputStream.available() == 1) {
val byteCount = gzipInputStream.read(bytes)
if (byteCount > 0) {
output ++= bytes.take(byteCount)
totalByteCount += byteCount
}
}
output.take(totalByteCount).toArray
}
similar to @JThakrar's...probably simpler but requires Apache Commons IO
def decompress(compressed: Array[Byte]): Option[Array[Byte]] =
Try {
val inputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
org.apache.commons.io.IOUtils.toByteArray(inputStream)
}.toOption
Thanks. Extra example of sequential generating of gziped CSV files with os-lib:
Add the following to build.sbt
libraryDependencies += "com.lihaoyi" %% "os-lib" % "0.8.0"
then
package example
import scala.util.{Try, Success, Failure, Random}
import java.io.{ByteArrayOutputStream, ByteArrayInputStream}
import java.util.zip.{GZIPOutputStream, GZIPInputStream}
object Main extends App {
// create header for a csv file
val header = List("column1","column2", "column3")
val header_compressed = Gzip.compress(
(header.mkString(",")+"\n").getBytes("UTF-8"))
os.write.over(os.Path("/tmp/file.gz"), header_compressed)
(0 to 10).foreach { idx =>
val rand = Random
val lines_to_write = (0 to 100).map { _ =>
(0 to 2).map(_ => rand.nextInt(100).toString).mkString(",")
}
val new_lines = Gzip.compress(
(lines_to_write.mkString("\n")+"\n").getBytes("UTF-8"))
os.write.append(os.Path("/tmp/file.gz"), new_lines)
}
}
object Gzip {
def compress(input: Array[Byte]): Array[Byte] = {
val bos = new ByteArrayOutputStream(input.length)
val gzip = new GZIPOutputStream(bos)
gzip.write(input)
gzip.close()
val compressed = bos.toByteArray
bos.close()
compressed
}
def decompress(compressed: Array[Byte]): Option[String] =
Try {
val inputStream = new GZIPInputStream(new ByteArrayInputStream(compressed))
scala.io.Source.fromInputStream(inputStream).mkString
}.toOption
}
Should you not do Base64Encoding here ?