Last active
April 20, 2018 17:41
-
-
Save mkhizeryounas/7dd15ad30d2b8827f65754d0094499ac to your computer and use it in GitHub Desktop.
GetCeleb Apify Crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function pageFunction(context) { | |
var $ = context.jQuery; | |
var result = []; | |
// var _pages = 2018; | |
var _pages = 3; | |
var fetch = function(url, cb) { | |
// console.log(url); | |
$.ajax({ | |
url: url, | |
type: 'GET', | |
success : function (res) { | |
return cb(res); | |
} | |
}) | |
} | |
var extractData = function(count) { | |
var cont = true; | |
fetch("http://www.gotceleb.com/page/"+count, function(res) { | |
$(res).find('.post-inner').each(function(){ | |
// console.log($(this).find('.post-date').text().indexOf(_pages)); | |
if(count > _pages) { | |
// if($(this).find('.post-date').text().indexOf(_pages) < 0) { | |
console.log("Done process"); | |
cont = false; | |
context.finish(result); | |
return cont; | |
} | |
var pageUrl = $(this).find('.post-title').find('a').attr('href'); | |
console.log(pageUrl); | |
var imgs = []; | |
fetch(pageUrl, function(res2) { | |
$(res2).find('.gallery-item').each(function() { | |
imgs.push($(this).find('img').attr('src')); | |
}) | |
}); | |
var tmp = { | |
pageUrl : pageUrl, | |
imageUrl : imgs | |
} | |
// console.log(tmp) | |
result.push(tmp); | |
}); | |
if(cont) | |
extractData(++count); | |
}); | |
}; | |
extractData(0); | |
context.willFinishLater(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment