Created
December 14, 2018 15:25
-
-
Save taikulawo/107aa08fab15d24c0a75c4d383f50953 to your computer and use it in GitHub Desktop.
douban
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const cheerio = require('cheerio') | |
const { promises: fs } = require('fs') | |
const axios = require('axios').default.create({ | |
proxy: false, | |
headers: { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', | |
'Referer': 'https://book.douban.com/subject/1322455/comments/hot?p=3', | |
'Accept': 'application/json, text/javascript, */*; q=0.01', | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Host': 'book.douban.com', | |
'X-Requested-With': 'XMLHttpRequest' | |
} | |
}) | |
const maxLoopCount = 2 | |
async function getLine() { | |
var rl = require('readline').createInterface({ | |
input: process.stdin, | |
output: process.stdout, | |
terminal: false | |
}); | |
var continuation; | |
var getline = (() => { | |
var thenable = { | |
then: resolve => { | |
continuation = resolve; | |
} | |
}; | |
return () => thenable; | |
})(); | |
rl.on('line', line => continuation(line)) | |
rl.on('close', function () { | |
console.debug('console close') | |
}) | |
return await getline() | |
} | |
function findNumFromString(s) { | |
return s.match(/\d+/g) | |
} | |
async function getCommentsCounts(path) { | |
let axios = require('axios').default.create({ | |
proxy: false, | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', | |
'Accept': 'application/json, text/javascript, */*; q=0.01', | |
'Accept-Encoding': 'gzip, deflate, br', | |
}) | |
let res = await axios.get(path) | |
let $ = cheerio.load(res.data) | |
let html = $("#total-comments")[0].children[0].data | |
let count = findNumFromString(html)[0] | |
count = Number.parseInt(count) | |
if (!Number.isInteger(count)) { | |
throw `Count is ${count}, Is not a number` | |
} | |
return count | |
} | |
async function goToReq(target, path) { | |
//get total | |
let count = await getCommentsCounts(target) | |
let loopCount = (count - (count % 20)) / 20 + 1 | |
if (typeof maxLoopCount !== 'undefined') { | |
loopCount = maxLoopCount | |
} | |
let workers = [] | |
for (let i = 1; i <= loopCount; ++i) { | |
workers.push(new Worker(axios, `${target}?p=${i}`)) | |
} | |
let promises = workers.map(async worker => { | |
return await worker.do() | |
}) | |
let allComments = await Promise.all(promises) | |
allComments = [].concat(...allComments); | |
await fs.writeFile(path, JSON.stringify(allComments)) | |
console.log(`done: view json file in ${path}`) | |
} | |
class Worker { | |
constructor(axios, target) { | |
this.axios = axios | |
this.target = target | |
} | |
async do() { | |
let { data: { content } } = await axios.get(this.target) | |
return this.getAllList(content) | |
} | |
getAllList(content) { | |
let $ = cheerio.load(content, { | |
decodeEntities: false | |
}) | |
return $("#comments .comment-item .comment").map((index, comment) => { | |
let user = {} | |
user.name = $(comment).find("h3 .comment-info a").html() | |
user.comment = $(comment).find(".comment-content span").html() | |
return user | |
}).get() | |
} | |
} | |
; (async () => { | |
for (; ;) { | |
console.log('Input URL (for example: https://book.douban.com/subject/1322455/comments/hot)') | |
let target = await getLine() | |
let defaultPath = "D:\\doubancomments.json" | |
console.log(`Input store path(default ${defaultPath})`) | |
let path = await getLine() | |
if (!path) { | |
path = defaultPath | |
} | |
await goToReq(target, path) | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment