Skip to content

Instantly share code, notes, and snippets.

@ktoso
Created June 20, 2013 10:22
Show Gist options
  • Save ktoso/5821704 to your computer and use it in GitHub Desktop.
Save ktoso/5821704 to your computer and use it in GitHub Desktop.
unzipper
package tv.yap.common.zip
import collection.mutable
import java.util.zip.{ZipEntry, ZipFile}
import java.io._
import tv.yap.logging.Logging
import collection.JavaConversions._
import tv.yap.common.util.{TimedVerb, StreamOperations}
import org.apache.commons.io.{IOUtils, FilenameUtils}
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import com.google.common.io.{ByteStreams, Files}
import tv.yap.common.format.PrettyFileSize
/** Used to unzip files into tmp locations */
class Unzipper extends Logging
with StreamOperations {
private val BufferSize = 2048
/**
* Unzip a zip file's contents and return a set of files extracted.
*
* This method can detect: zip, bzip based on extensions and delegate unzipping to apropriate implementations.
*
* @param file extract
* @param filesToExtractFilter a (String) => Boolean function which gets a filename passed in and should decide
* if we should extract this file (we can for example only extract 1 file out of a zip).
* By default all files are extracted.
* @return a set of extracted files from the zip. May be shorter than the total number of files - due to filtering
*/
def unpack(file: File, targetLocation: File, filesToExtractFilter: (String) => Boolean = (it: String) => true): Set[File] =
FilenameUtils.getExtension(file.getName) match {
case "zip" => unZip(new ZipFile(file), targetLocation, filesToExtractFilter)
case "tbz" => unBZip(file, targetLocation, filesToExtractFilter)
case "bz" => unBZip(file, targetLocation, filesToExtractFilter)
}
private[zip] def unZip(file: ZipFile, targetLocation: File, filesToExtractFilter: (String) => Boolean = (it: String) => true): Set[File] = {
val extractedEntries = mutable.HashSet[File]()
for (entry: ZipEntry <- file.entries()) {
if (filesToExtractFilter(entry.getName)) {
// java apis, unable to use withFilter for loop
val targetFilename = targetLocation.getAbsolutePath + File.separator + entry.getName
logger.info("Extracting [%s]".format(targetFilename))
val is = new BufferedInputStream(file.getInputStream(entry))
val fos = new FileOutputStream(targetFilename)
val dest = new BufferedOutputStream(fos, BufferSize)
StreamOperations.copy(is, dest)
dest.flush()
dest.close()
is.close()
extractedEntries += new File(targetFilename)
}
}
extractedEntries.toSet // return immutable version
}
private[zip] def unBZip(file: File, targetLocation: File, filesToExtractFilter: (String) => Boolean = (it: String) => true): Set[File] = {
val targetOutLocation = new File(targetLocation, "out")
targetOutLocation.mkdir()
import scala.sys.process._
logger.info("Extracting [%s]...".format(file.getAbsolutePath))
val command = """tar -C %s -xjvf %s""".format(targetOutLocation, file.getAbsolutePath)
command!
def listFiles(d: File): List[File] = {
val all = d.listFiles
val inner = all.filter(_.isDirectory).map { listFiles }
(all.toList ++ inner.toList.flatten).filterNot(_.isDirectory)
}
val fileSet = listFiles(targetOutLocation).toSet
val (keep, remove) = fileSet.partition(f => filesToExtractFilter(f.getName))
remove foreach { f => logger.debug("Deleting [%s]".format(f.getAbsolutePath)); f.delete() }
logger.debug("Returning filtered files: " + keep.map(_.getName).mkString(", "))
keep
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment