Created
September 10, 2017 22:50
-
-
Save tommelo/b1336621e28e5e81b8a5f1424081a2a0 to your computer and use it in GitHub Desktop.
Seeks for Instagram Links
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
var util = require('util'); | |
var Scraper = require('./scraper'); | |
/** | |
* The default constructor | |
*/ | |
function Hunter() { | |
Scraper.call(this); | |
} | |
/** | |
* The implementation of the abstract method transform() | |
* Returns a single array of the given multi array | |
* | |
* @param {Array} result The multi array | |
* @return {Array} single The single array | |
*/ | |
Hunter.prototype.transform = function(result) { | |
var profiles = [].concat.apply([], result); | |
return Array.from(new Set(profiles)); | |
} | |
/** | |
* The implementation of the abstract method scrape() | |
* It searches for instagram profiles in the given html body. | |
* | |
* @param {Function} $ The cheerio instance | |
* @return {Array} links The Insgragram account links | |
*/ | |
Hunter.prototype.scrape = function($) { | |
var profiles = []; | |
var links = $('a[href^="http://instagram.com/"],a[href^="https://instagram.com/"]') | |
.not('[href^="http://instagram.com/p/"]') | |
.not('[href^="https://instagram.com/p/"]') | |
.not('[href^="http://instagram.com/d/"]') | |
.not('[href^="https://instagram.com/d/"]'); | |
$(links).each(function(i, link) { | |
var href = $(link).attr('href'); | |
if (href.endsWith('/')) { | |
href = href.substring(0, href.length - 1); | |
} | |
profiles.push(href); | |
}); | |
return profiles; | |
} | |
/** | |
* Searches for instagram profiles links in the given page's url | |
* | |
* @param {Array} links The page's url | |
* @return {Promise} promise A promise of the execution chain | |
**/ | |
Hunter.prototype.hunt = function(links) { | |
var promises = []; | |
for (var index in links) { | |
var link = links[index]; | |
var promise = this.prepare(link); | |
promises.push(promise); | |
} | |
return this.execute(promises); | |
} | |
util.inherits(Hunter, Scraper); | |
module.exports = Hunter; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment