Skip to content

Instantly share code, notes, and snippets.

@CsBigDataHub
Created May 21, 2019 19:44
Show Gist options
  • Save CsBigDataHub/54b51080f398da98e6f800b5f6cc1371 to your computer and use it in GitHub Desktop.
Save CsBigDataHub/54b51080f398da98e6f800b5f6cc1371 to your computer and use it in GitHub Desktop.
import scala.util.Try
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.IOUtils
import java.io.IOException
def copyMerge(
srcFS: FileSystem, srcDir: Path,
dstFS: FileSystem, dstFile: Path,
deleteSource: Boolean, conf: Configuration
): Boolean = {
if (dstFS.exists(dstFile))
throw new IOException(s"Target $dstFile already exists")
// Source path is expected to be a directory:
if (srcFS.getFileStatus(srcDir).isDirectory()) {
val outputFile = dstFS.create(dstFile)
Try {
srcFS
.listStatus(srcDir)
.sortBy(_.getPath.getName)
.collect {
case status if status.isFile() =>
val inputFile = srcFS.open(status.getPath())
Try(IOUtils.copyBytes(inputFile, outputFile, conf, false))
inputFile.close()
}
}
outputFile.close()
if (deleteSource) srcFS.delete(srcDir, true) else true
}
else false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment