Skip to content

Instantly share code, notes, and snippets.

@wizardforcel
Last active July 5, 2017 13:40
Show Gist options
  • Save wizardforcel/e8c03320511efc38e873af55aaed2810 to your computer and use it in GitHub Desktop.
Save wizardforcel/e8c03320511efc38e873af55aaed2810 to your computer and use it in GitHub Desktop.
download img in html files
var fs = require('fs');
var request = require('sync-request');
var crypto = require('crypto');
var cheerio = require('cheerio');
var dirname = process.argv[2];
if(!dirname)
{
console.log('请指定目录。');
process.exit(0);
}
try {fs.mkdirSync(dirname + '/img');} catch(ex) {}
var dir= fs.readdirSync(dirname);
var imgs = new Set();
for(var i = 0; i < dir.length; i++)
{
var fname = dir[i];
if(!fname.endsWith('.html'))
continue;
console.log('file: ' + fname);
var content = fs.readFileSync(dirname + '/' + fname, 'utf-8');
content = dealWithHtml(content);
fs.writeFileSync(dirname + '/' + fname, content);
}
function dealWithHtml(html) {
var $ = cheerio.load(html);
var $imgs = $('img');
for(var i = 0; i < $imgs.length; i++) {
var $img = $imgs.eq(i);
var url = $img.attr('src');
if(!url.startsWith('http'))
continue;
var picname = crypto.createHash('md5').update(url).digest('hex') + ".jpg";
console.log(picname)
if(!imgs.has(picname)) {
var data = request('GET', url).getBody();
fs.writeFileSync(dirname + '/img/' + picname, data);
imgs.add(picname);
}
$img.attr('src', '../Images/' + picname);
}
return $.html();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment