Created
December 26, 2016 10:26
-
-
Save gnipbao/ca38835928cc0fa3f4e85dbc4b3c5bf4 to your computer and use it in GitHub Desktop.
nodejs爬虫抓取图片
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//依赖模块 | |
var fs = require('fs'); | |
var request = require("request"); | |
var cheerio = require("cheerio"); | |
var mkdirp = require('mkdirp'); | |
//目标网址 | |
var url = 'http://www.mmjpg.com/'; | |
//本地存储目录 | |
var dir = './images'; | |
//创建目录 | |
mkdirp(dir, function(err) { | |
if (err) { | |
console.log(err); | |
} | |
}); | |
//发送请求 | |
request(url, function(error, response, body) { | |
if (!error && response.statusCode == 200) { | |
var $ = cheerio.load(body); | |
var page = $('.page .info').text().replace(/[^0-9]*/g, ''); | |
var urls = []; | |
console.log(page); | |
if (page !== '') { | |
for (var i = 0; i < page; i++) { | |
var u = 'http://www.mmjpg.com/home/' + (i + 1); | |
urls.push(u); | |
console.log(u); | |
} | |
}; | |
console.log(urls); | |
urls.forEach((url) => { | |
request(url, (error, res, body) => { | |
if (!error && res.statusCode == 200) { | |
var $ = cheerio.load(body); | |
$('.pic ul img').each(function() { | |
var src = $(this).attr('src'); | |
console.log('正在下载' + src); | |
download(src, dir, Math.floor(Math.random() * 100000) + src.substr(-4, 4)); | |
console.log('下载完成'); | |
}); | |
} | |
}); | |
}) | |
} | |
}); | |
//下载方法 | |
var download = function(url, dir, filename) { | |
request.head(url, function(err, res, body) { | |
request(url).pipe(fs.createWriteStream(dir + "/" + filename)); | |
}); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment