Created
June 20, 2013 10:22
-
-
Save ktoso/5821704 to your computer and use it in GitHub Desktop.
unzipper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package tv.yap.common.zip | |
import collection.mutable | |
import java.util.zip.{ZipEntry, ZipFile} | |
import java.io._ | |
import tv.yap.logging.Logging | |
import collection.JavaConversions._ | |
import tv.yap.common.util.{TimedVerb, StreamOperations} | |
import org.apache.commons.io.{IOUtils, FilenameUtils} | |
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream | |
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream | |
import com.google.common.io.{ByteStreams, Files} | |
import tv.yap.common.format.PrettyFileSize | |
/** Used to unzip files into tmp locations */ | |
class Unzipper extends Logging | |
with StreamOperations { | |
private val BufferSize = 2048 | |
/** | |
* Unzip a zip file's contents and return a set of files extracted. | |
* | |
* This method can detect: zip, bzip based on extensions and delegate unzipping to apropriate implementations. | |
* | |
* @param file extract | |
* @param filesToExtractFilter a (String) => Boolean function which gets a filename passed in and should decide | |
* if we should extract this file (we can for example only extract 1 file out of a zip). | |
* By default all files are extracted. | |
* @return a set of extracted files from the zip. May be shorter than the total number of files - due to filtering | |
*/ | |
def unpack(file: File, targetLocation: File, filesToExtractFilter: (String) => Boolean = (it: String) => true): Set[File] = | |
FilenameUtils.getExtension(file.getName) match { | |
case "zip" => unZip(new ZipFile(file), targetLocation, filesToExtractFilter) | |
case "tbz" => unBZip(file, targetLocation, filesToExtractFilter) | |
case "bz" => unBZip(file, targetLocation, filesToExtractFilter) | |
} | |
private[zip] def unZip(file: ZipFile, targetLocation: File, filesToExtractFilter: (String) => Boolean = (it: String) => true): Set[File] = { | |
val extractedEntries = mutable.HashSet[File]() | |
for (entry: ZipEntry <- file.entries()) { | |
if (filesToExtractFilter(entry.getName)) { | |
// java apis, unable to use withFilter for loop | |
val targetFilename = targetLocation.getAbsolutePath + File.separator + entry.getName | |
logger.info("Extracting [%s]".format(targetFilename)) | |
val is = new BufferedInputStream(file.getInputStream(entry)) | |
val fos = new FileOutputStream(targetFilename) | |
val dest = new BufferedOutputStream(fos, BufferSize) | |
StreamOperations.copy(is, dest) | |
dest.flush() | |
dest.close() | |
is.close() | |
extractedEntries += new File(targetFilename) | |
} | |
} | |
extractedEntries.toSet // return immutable version | |
} | |
private[zip] def unBZip(file: File, targetLocation: File, filesToExtractFilter: (String) => Boolean = (it: String) => true): Set[File] = { | |
val targetOutLocation = new File(targetLocation, "out") | |
targetOutLocation.mkdir() | |
import scala.sys.process._ | |
logger.info("Extracting [%s]...".format(file.getAbsolutePath)) | |
val command = """tar -C %s -xjvf %s""".format(targetOutLocation, file.getAbsolutePath) | |
command! | |
def listFiles(d: File): List[File] = { | |
val all = d.listFiles | |
val inner = all.filter(_.isDirectory).map { listFiles } | |
(all.toList ++ inner.toList.flatten).filterNot(_.isDirectory) | |
} | |
val fileSet = listFiles(targetOutLocation).toSet | |
val (keep, remove) = fileSet.partition(f => filesToExtractFilter(f.getName)) | |
remove foreach { f => logger.debug("Deleting [%s]".format(f.getAbsolutePath)); f.delete() } | |
logger.debug("Returning filtered files: " + keep.map(_.getName).mkString(", ")) | |
keep | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment