Last active
August 15, 2018 12:44
-
-
Save kayslay/d732ac513a02f9299e1c2001b5dbbd65 to your computer and use it in GitHub Desktop.
crawl.js for Build a CLI to crawl a web page with web-crawljs article
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/** | |
* Created by kayslay on 5/31/17. | |
*/ | |
const crawler = require('web-crawljs'); | |
const program = require('commander'); | |
//commander configuration | |
function list(val) { | |
"use strict"; | |
return val.split(','); | |
} | |
program | |
.option('-x --execute <string>', 'the configurtion to execute') | |
.option('-d --depth [number]', 'the depth of the crawl') | |
.option('-u --urls [items]', 'change the urls',list) | |
.parse(process.argv); | |
//throw an error if the execute flag is not used | |
if (!program.execute) { | |
throw new Error('the configuration to use must be set use the -x flag to define configuration;' + | |
' use the --help for help') | |
} | |
//holds the additional configuration that will be added to crawlConfig | |
const additionalConfig = {}; | |
//set the object that will override the default crawlConfig | |
(function (config) { | |
//depth | |
if (program.depth) config['depth'] = program.depth; | |
if(!!program.urls) config['urls'] = program.urls | |
})(additionalConfig); | |
//the action is the file name that holds the crawlConfig | |
let action = program.execute; | |
try { | |
//set the crawlConfig | |
//adds the additional config if need | |
let crawlConfig = Object.assign(require(`./config/${action}`), additionalConfig); | |
const Crawler = crawler(crawlConfig); | |
Crawler.CrawlAllUrl() | |
} catch (err) { | |
console.error(`An Error occurred: ${err.message}`); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment