Skip to content

Instantly share code, notes, and snippets.

@gnipbao
Created December 26, 2016 10:26
Show Gist options
  • Save gnipbao/ca38835928cc0fa3f4e85dbc4b3c5bf4 to your computer and use it in GitHub Desktop.
Save gnipbao/ca38835928cc0fa3f4e85dbc4b3c5bf4 to your computer and use it in GitHub Desktop.
nodejs爬虫抓取图片
//依赖模块
var fs = require('fs');
var request = require("request");
var cheerio = require("cheerio");
var mkdirp = require('mkdirp');
//目标网址
var url = 'http://www.mmjpg.com/';
//本地存储目录
var dir = './images';
//创建目录
mkdirp(dir, function(err) {
if (err) {
console.log(err);
}
});
//发送请求
request(url, function(error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
var page = $('.page .info').text().replace(/[^0-9]*/g, '');
var urls = [];
console.log(page);
if (page !== '') {
for (var i = 0; i < page; i++) {
var u = 'http://www.mmjpg.com/home/' + (i + 1);
urls.push(u);
console.log(u);
}
};
console.log(urls);
urls.forEach((url) => {
request(url, (error, res, body) => {
if (!error && res.statusCode == 200) {
var $ = cheerio.load(body);
$('.pic ul img').each(function() {
var src = $(this).attr('src');
console.log('正在下载' + src);
download(src, dir, Math.floor(Math.random() * 100000) + src.substr(-4, 4));
console.log('下载完成');
});
}
});
})
}
});
//下载方法
var download = function(url, dir, filename) {
request.head(url, function(err, res, body) {
request(url).pipe(fs.createWriteStream(dir + "/" + filename));
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment