Skip to content

Instantly share code, notes, and snippets.

@tranchausky
Created August 22, 2019 03:11
Show Gist options
  • Save tranchausky/77065c6f9641cff73487c83e7e8b7098 to your computer and use it in GitHub Desktop.
Save tranchausky/77065c6f9641cff73487c83e7e8b7098 to your computer and use it in GitHub Desktop.
// Write Javascript code here
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const URL = "https://danhngoncuocsong.vn/chu-de/danh-ngon-cong-viec_1.html";
/*
var i;
var max = 10;
var result_data=[]
for (i = 0; i < max; i++) {
var link = "https://danhngoncuocsong.vn/chu-de/danh-ngon-cong-viec_"+i+".html";
var data = craw_url(link,function(result){
//console.log(result)
result_data.concat(data);
});
if(i ==max-1){
save_data(result_data)
}
}
*/
get_result_callback(10,function(result){
console.log(result)
save_data(result)
});
function get_result_callback(max,callback){
var i;
//var max = 10;
var tang=0;
var result_data=[]
for (i = 0; i < max; i++) {
var link = "https://danhngoncuocsong.vn/chu-de/danh-ngon-cong-viec_"+i+".html";
craw_url(link,function(result){
//console.log(result)
result_data.push(result);
//console.log(result)
tang++
if(tang ==max){
console.log('end')
callback(result_data)
}
});
}
}
function craw_url(URL,callback){
request(URL, function (err, res, body) {
if(err)
{
console.log(err);
}
else
{
const arr = [];
let $ = cheerio.load(body);
$('.boxy').each(function(index){
//const data = $(this).find('div._1UoZlX>a').attr('href');
const data = $(this).find('.bqQuoteLink a').attr('href');
const name = myTrim($(this).find('.bqQuoteLink a').text());
if(name!=''){
const autho = myTrim($(this).find('.bq-aut a').text());
var tag = myTrim($(this).find('.boxyBottom .body').text());
tag = tag.replace(/\n/g,'');
const obj = {
//data : data,
name : name,
autho : autho,
tag : tag
};
//console.log(obj);
arr.push(JSON.stringify(obj));
//return arr;
}
});
console.log('111')
callback(arr)
}
});
}
function save_data(arr){
fs.writeFile('data_craw.txt', arr, function (err) {
if(err) {
console.log(err);
}
else{
console.log("success");
}
});
}
/*
request(URL, function (err, res, body) {
if(err)
{
console.log(err);
}
else
{
const arr = [];
let $ = cheerio.load(body);
$('.boxy').each(function(index){
//const data = $(this).find('div._1UoZlX>a').attr('href');
const data = $(this).find('.bqQuoteLink a').attr('href');
const name = myTrim($(this).find('.bqQuoteLink a').text());
if(name!=''){
const autho = myTrim($(this).find('.bq-aut a').text());
var tag = myTrim($(this).find('.boxyBottom .body').text());
tag = tag.replace(/\n/g,'');
const obj = {
//data : data,
name : name,
autho : autho,
tag : tag
};
console.log(obj);
arr.push(JSON.stringify(obj));
}
});
console.log(arr.toString());
fs.writeFile('data.txt', arr, function (err) {
if(err) {
console.log(err);
}
else{
console.log("success");
}
});
}
});
*/
function myTrim(x) {
return x.replace(/^\s+|\s+$/gm,'');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment