Last active
August 2, 2016 13:46
-
-
Save bdkosher/fe42c12b11edfbb7394a45869dd6dbe8 to your computer and use it in GitHub Desktop.
Command line utility for batch downloading.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cli = new CliBuilder(usage:'batchdl [options] <inputfile>') | |
cli.with { | |
start(args:1, argName:'int', 'starting index (1-based) of input file to download') | |
size(args:1, argName:'int', 'number of URLs to download') | |
dest(args:1, argName:'dir', 'where the files should be downloaded to') | |
log(args:1, argName: 'logfile', 'where output should be logged') | |
} | |
def options = cli.parse(args) | |
if (!options) { | |
cli.usage() | |
System.exit(1) | |
} | |
String inputFile = options.arguments()[0] | |
if (!inputFile) { | |
println "Must provide an input file containing a URL per line" | |
cli.usage() | |
System.exit(1) | |
} | |
def input = new File(inputFile) | |
if (input.exists() && input.isDirectory()) { | |
println "Invalid input file. $input is a directory." | |
System.exit(1) | |
} | |
int start = options.start ? Math.abs(options.start.toInteger()) : 1 | |
int size = options.size ? Math.abs(options.size.toInteger()) : 100 | |
def dest = options.dest ? new File(options.dest) : new File('.') | |
if (dest.exists() && !dest.isDirectory()) { | |
println "Invalid destination direcotry. $dest is a file." | |
System.exit(1) | |
} | |
def fmt = 'yyyy-MM-dd_HHmmss' | |
def defaultLogfileName = "batchdl-${new Date().format(fmt)}.log" | |
def log = options.cli ? new File(options.cli) : new File(dest, defaultLogfileName) | |
if (log.exists() && log.isDirectory()) { | |
log = new File(log, defaultLogfileName) | |
} | |
int downloaded = 0 | |
input.eachLine { url, lineNumber -> | |
if (lineNumber < start) return | |
if (downloaded >= size) { | |
println "Processing complete." | |
System.exit(0) | |
} | |
println "Downloading data from $url" | |
def out = new File(dest, "${url.replaceAll('/', '-') - ':'}_${new Date().format(fmt)}.out") | |
try { | |
long startTime = System.nanoTime() | |
new URL(url).withInputStream { stream -> | |
long bytesWritten = java.nio.file.Files.copy(stream, out.toPath()) | |
log << "$caseId,SUCCESS,${(System.nanoTime() - start) / 1e9},$url,$bytesWritten \n" | |
} | |
} catch (e) { | |
log << "$caseId,ERROR,${(System.nanoTime() - start) / 1e9},$url,$e.message \n" | |
} | |
++downloaded | |
} | |
println "Processed $downloaded of $size entries" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment