Last active
April 8, 2019 10:11
-
-
Save cvan/da899090fa6c38f87dacbef95ea5d785 to your computer and use it in GitHub Desktop.
download URLs to disk (without any third-party npm dependencies)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Source: https://gist.github.com/cvan/da899090fa6c38f87dacbef95ea5d785#file-downloader-js | |
// Author: CVAN <https://cvan.io> | |
const fs = require('fs'); | |
const http = require('http'); | |
const https = require('https'); | |
const path = require('path'); | |
const URL = require('url'); | |
const utils = {}; | |
utils.getSafeTimestamp = () => new Date().toISOString().slice(0, 23).replace(/:/g, '.'); | |
utils.prependToFileExtname = (filename, str) => { | |
const idxDot = filename.lastIndexOf('.'); | |
if (idxDot === -1) { | |
return `${filename}${str}`; | |
} | |
return `${filename.substring(0, idxDot)}${str}${filename.substring(idxDot)}`; | |
}; | |
utils.getSafeFilenameWithTimestamp = (safeFilename, opts = {replacement: '__'}) => { | |
return utils.prependToFileExtname(safeFilename, `${opts.replacement || ''}${utils.getSafeTimestamp()}`); | |
}; | |
utils.getSafeFilename = utils.getSafeFilenameWithTimestamp; | |
try { | |
utils.getSafeFilename = require('./filenamify.js'); | |
} catch (err) { | |
} | |
class Downloader { | |
constructor (url, options) { | |
this.counter = 0; | |
this.options = {url}; | |
if (typeof options === 'undefined') { | |
if (typeof url === 'object') { | |
this.options = Object.assign(this.options, {url}); | |
} | |
} else { | |
this.options = {url}; | |
} | |
this.defaults = { | |
followRedirect: true, | |
getFormattedFilename: utils.getSafeFilename, | |
defaultProtocol: 'https', // Default protocol to use when URL passed does not start with a protocol. | |
timeout: 3000, // Give up on request after (in milliseconds). Default: 3000ms (three seconds). | |
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.39 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36' | |
}; | |
if (!('userAgent' in this.options)) { | |
if ('useragent' in this.options) { | |
this.options.userAgent = this.options['useragent']; | |
} else if ('user-agent' in this.options) { | |
this.options.userAgent = this.options['user-agent']; | |
} else if ('User-agent' in this.options) { | |
this.options.userAgent = this.options['User-agent']; | |
} else if ('User-Agent' in this.options) { | |
this.options.userAgent = this.options['User-Agent']; | |
} else if ('UserAgent' in this.options) { | |
this.options.userAgent = this.options['UserAgent']; | |
} else if ('useragent' in this.options) { | |
this.options.userAgent = this.options['useragent']; | |
} | |
} | |
this.options = Object.assign({}, this.defaults, this.options); | |
this.options.defaultProtocol = this.options.defaultProtocol.toLowerCase().replace(/:.*/, ''); | |
if (typeof this.options.url !== 'undefined') { | |
this.url = this.options.url; | |
this.fetch(this.url); | |
} | |
} | |
get url () { | |
return this._url; | |
} | |
set url (url) { | |
if (url.startsWith('//')) { | |
url = `http://${url.substr(2)}`; | |
} else if (!url.startsWith('https://') && !url.startsWith('http://')) { | |
url = `${this.options.defaultProtocol}://${url}`; | |
} | |
this._url = url; | |
this.urlObj = URL.parse(url); | |
this.requestOptions = { | |
method: 'GET', | |
hostname: this.urlObj.hostname, | |
path: this.urlObj.path || '/', | |
headers: { | |
'User-Agent': this.options.userAgent | |
}, | |
timeout: this.options.timeout | |
}; | |
} | |
getFileExtnameFromUrl ({pathname, contentType}) { | |
contentType = (contentType || '').toLowerCase(); | |
let extname = path.extname(pathname); | |
if (!extname || extname !== 'html' && contentType.includes('html')) { | |
extname = '.html'; | |
} | |
return extname; | |
} | |
getSafeFilename ({host, pathname, contentType}) { | |
const includePathname = this.options.getFormattedFilename._willEscapeUnsafeCharacters && pathname !== '/' && pathname !== '/index.html'; | |
let filename = host; | |
if (includePathname) { | |
filename += pathname; | |
} | |
if (!includePathname || !path.extname(path.basename(filename.toLowerCase()))) { | |
filename += this.getFileExtnameFromUrl({ | |
pathname, | |
contentType | |
}); | |
} | |
filename = this.options.getFormattedFilename(filename, {replacement: '__'}); | |
return filename; | |
} | |
fetch (url) { | |
if (typeof url !== 'undefined') { | |
this.url = url; | |
} | |
return new Promise((resolve, reject) => { | |
// TODO: Improve handling of media assets that have compressed response bodies (e.g., gzip, Deflate, Brotli). | |
const request = (this.urlObj.protcool === 'https' ? https : http).request(this.requestOptions, res => { | |
// Handle redirects. | |
if (this.followRedirect && | |
res.statusCode === 301 || res.statusCode === 302 || res.statusCode === 307 || res.statusCode === 308) { | |
if (!res.headers.location) { | |
request.abort(); | |
return reject(new Error('Encountered a URL redirect without a "Location" HTTP response header')); | |
} | |
if (this.requestOptions.path === res.headers.location || | |
this.urlObj.href === res.headers.location) { | |
request.abort(); | |
return reject(new Error('Encountered an infinite URL redirect')); | |
} | |
request.abort(); | |
return this.fetch(res.headers.location); | |
} | |
if (res.statusCode !== 200) { | |
request.abort(); | |
return reject(new Error(`Encountered an unexpected server response: ${res.statusCode} – ${res.statusMessage}`)); | |
} | |
res.setEncoding('utf8'); | |
res.on('error', err => reject(err)); | |
let filename = this.getSafeFilename({ | |
host: res.socket._host, | |
pathname: res.req.path.split('?')[0], | |
contentType: res.headers['content-type'] | |
}); | |
const filenameAbsolute = path.join(__dirname, filename); | |
const destStream = fs.createWriteStream(filenameAbsolute); | |
destStream.on('finish', () => resolve(`Downloaded "${this.urlObj.href}" to ${filenameAbsolute}`)); | |
destStream.on('error', err => reject(err)); | |
res.pipe(destStream); | |
}); | |
request.on('timeout', () => { | |
request.abort(); | |
reject(new Error('Network timeout')); | |
}); | |
request.end(); | |
}).then(successMsg => { | |
if (module.parent) { | |
return successMsg; | |
} | |
console.log(successMsg); | |
process.exit(0); | |
}).catch(err => { | |
if (!module.parent) { | |
console.error(`Error occurred: ${err.message}`); | |
process.exit(1); | |
} | |
throw err; | |
}); | |
} | |
} | |
if (!module.parent) { | |
new Downloader(process.argv[2] || process.env.DOWNLOADER_URL); | |
} | |
module.exports.Downloader = Downloader; | |
module.exports.utils = utils; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Source: Adapted from https://github.com/sindresorhus/filenamify/blob/master/index.js | |
let filenameReservedRegex = () => (/[<>:"\/\\|?*\x00-\x1F]/g); | |
filenameReservedRegex.windowsNames = () => (/^(con|prn|aux|nul|com[0-9]|lpt[0-9])$/i); | |
// Doesn't make sense to have longer filenames | |
const MAX_FILENAME_LENGTH = 100; | |
const matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g; | |
const reControlChars = /[\u0000-\u001f\u0080-\u009f]/g; // eslint-disable-line no-control-regex | |
const reRelativePath = /^\.+/; | |
function trimRepeated (str, target) { | |
if (typeof str !== 'string' || typeof target !== 'string') { | |
throw new TypeError('Expected a string'); | |
} | |
return str.replace(new RegExp('(?:' + escapeStringRegexp(target) + '){2,}', 'g'), target); | |
} | |
function escapeStringRegexp () { | |
return function (str) { | |
if (typeof str !== 'string') { | |
throw new TypeError('Expected a string'); | |
} | |
return str.replace(matchOperatorsRe, '\\$&'); | |
}; | |
} | |
function stripOuter (input, substring) { | |
if (typeof input !== 'string' || typeof substring !== 'string') { | |
throw new TypeError('Expected a string'); | |
} | |
substring = escapeStringRegexp(substring); | |
return input.replace(new RegExp(`^${substring}|${substring}$`, 'g'), ''); | |
} | |
function filenamify (string, options = {}) { | |
if (typeof string !== 'string') { | |
throw new TypeError('Expected a string'); | |
} | |
const replacement = options.replacement === undefined ? '!' : options.replacement; | |
if (filenameReservedRegex().test(replacement) && reControlChars.test(replacement)) { | |
throw new Error('Replacement string cannot contain reserved filename characters'); | |
} | |
string = string.replace(filenameReservedRegex(), replacement); | |
string = string.replace(reControlChars, replacement); | |
string = string.replace(reRelativePath, replacement); | |
if (replacement.length > 0) { | |
string = trimRepeated(string, replacement); | |
string = string.length > 1 ? stripOuter(string, replacement) : string; | |
} | |
string = filenameReservedRegex.windowsNames().test(string) ? string + replacement : string; | |
string = string.slice(0, MAX_FILENAME_LENGTH); | |
return string; | |
} | |
filenamify._willEscapeUnsafeCharacters = true; | |
module.exports = filenamify; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment