Last active
January 3, 2017 21:20
-
-
Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*** HOME ***/ | |
var HOME = 'http://www.plotandesign.net/'; | |
/*** WHERE ***/ | |
var HOME_CONTENT = '#content .row2 > .main > .wrapper > .col1.cols', | |
POST_MENU = 'ul.left a', | |
TOP_MENU = '#header .menu li > a', | |
CONTENT_POST = '#content .row2 .main > .wrapper'; | |
/*** DATA ***/ | |
var PAGES = {}, | |
POST = [], | |
stepIndex = 0; | |
function posibleJS (msg, line, source) { | |
console.log('>', msg); | |
} | |
function posibleAlert (msg) { | |
console.log('<', msg); | |
} | |
function posibleError (resourceError) { | |
console.error(resourceError.errorString) | |
} | |
/** | |
* [getPostOrPages description] | |
* @param {String} name Selector | |
* @return {Object} Post/Pages | |
*/ | |
function getPostOrPages (name) { | |
function clean (node) { | |
var newNode = []; | |
for (var i = 0; i < node.length; i++) { | |
if(node[i] && !node[i].body && node[i].tagName ){ | |
if(node[i].tagName == 'DIV'){ | |
var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>'))); | |
for (var w = 0; w < z.length; w++) { | |
newNode.push(z[w]); | |
} | |
} else if( /P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName) ){ | |
newNode.push(node[i]); | |
} | |
} | |
} | |
return newNode; | |
} | |
var child = clean($(name).children()); | |
var post = { text : [] }; | |
for (var i = 0; i < child.length; i++) { | |
if( child[i] && child[i].tagName && !child[i].body && child[i].innerHTML.length ){ | |
if( child[i].tagName != 'BR' && child[i].tagName != 'H1' ){ | |
post.text.push(child[i].outerHTML); | |
} else if ( child[i].tagName == 'H1' ) { | |
post.title = child[i].innerText; | |
} else if ( child[i].tagName == 'I' && !post.autor ) { | |
post.autor = child[i].innerText; | |
} | |
} | |
} | |
return post; | |
} | |
/** | |
* [getHome description] | |
* @param {String} name Selector | |
* @return {Array} Element | |
*/ | |
function getHome(name) { | |
var data = [], | |
post = {}, | |
child = $(name).children(); | |
for (var i = 0; i < child.length; i++) { | |
if(child[i].tagName === 'H2'){ | |
if(post.title){ | |
data.push(post); | |
} | |
post = { title : child[i].innerText, text : [] }; | |
} else { | |
if(post.text && child[i].tagName != 'BR' ){ | |
if(child[i].tagName != 'I'){ | |
post.text.push(child[i].outerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>'))); | |
} else { | |
post.autor = child[i].innerText; | |
} | |
} | |
} | |
} | |
if(post.title){ | |
data.push(post); | |
} | |
return data; | |
} | |
/** | |
* [getPostMenu description] | |
* @param {String} name Selector | |
* @return {Array} Array URL | |
*/ | |
function getPostMenu (name) { | |
var urs = [], | |
items = $(name); | |
for (var i = 0; i < items.length; i++) { | |
urs.push(items[i].href); | |
} | |
return urs; | |
} | |
/** | |
* [getMenu description] | |
* @param {Number} i Number in array | |
* @param {String} top Selector | |
* @return {String} Go to | |
*/ | |
function getMenu (i, top) { | |
return $(top)[i].href; | |
} | |
/** | |
* [startPage description] | |
* @param {String} url Go to | |
* @param {Boolean} isPost [description] | |
*/ | |
function startPage(url, isPost) { | |
var webPage = require('webpage'); | |
var page = webPage.create(), | |
newUrl; | |
page.onConsoleMessage = posibleJS; | |
page.onAlert = posibleAlert; | |
page.onResourceError = posibleError; | |
stepIndex++; | |
if(url){ | |
page.open(url, function(status){ | |
if (status === 'success') { | |
console.log('Start :', url); | |
page.injectJs('jquery.min.js'); | |
if(!phantom.state){ | |
if(isPost || url.indexOf('index.html') < 0){ | |
PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST); | |
} | |
if(isPost){ | |
PAGES[ url ].post = true; | |
newUrl = POST[stepIndex]; | |
} else { | |
if(url.indexOf('index.html') >= 0){ | |
POST = page.evaluate(getPostMenu, POST_MENU); | |
PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT); | |
PAGES[ url ].list = true; | |
} else { | |
PAGES[ url ].page = true; | |
} | |
newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU); | |
} | |
if(!newUrl && !isPost){ | |
stepIndex = 0; | |
newUrl = POST[stepIndex]; | |
isPost = true; | |
} | |
startPage(newUrl, isPost); | |
} else { | |
phantom.state(); | |
} | |
} else { | |
console.log('mierda'); | |
} | |
}); | |
} else { | |
console.log(JSON.stringify(PAGES, null, '\t')); | |
phantom.exit(); | |
} | |
} | |
console.log('Start :', HOME); | |
startPage(HOME + 'index.html'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2017-01-03T16:17:16 [DEBUG] CookieJar - Created but will not store cookies (use option '--cookies-file=<filename>' to enable persistent cookie storage) | |
2017-01-03T16:17:22 [DEBUG] Set "http" proxy to: "" : 1080 | |
2017-01-03T16:17:22 [DEBUG] Phantom - execute: Configuration | |
2017-01-03T16:17:22 [DEBUG] 0 objectName : "" | |
2017-01-03T16:17:22 [DEBUG] 1 cookiesFile : "" | |
2017-01-03T16:17:22 [DEBUG] 2 diskCacheEnabled : "false" | |
2017-01-03T16:17:22 [DEBUG] 3 maxDiskCacheSize : "-1" | |
2017-01-03T16:17:22 [DEBUG] 4 diskCachePath : "" | |
2017-01-03T16:17:22 [DEBUG] 5 ignoreSslErrors : "false" | |
2017-01-03T16:17:22 [DEBUG] 6 localUrlAccessEnabled : "true" | |
2017-01-03T16:17:22 [DEBUG] 7 localToRemoteUrlAccessEnabled : "false" | |
2017-01-03T16:17:22 [DEBUG] 8 outputEncoding : "UTF-8" | |
2017-01-03T16:17:22 [DEBUG] 9 proxyType : "http" | |
2017-01-03T16:17:22 [DEBUG] 10 proxy : ":1080" | |
2017-01-03T16:17:22 [DEBUG] 11 proxyAuth : ":" | |
2017-01-03T16:17:22 [DEBUG] 12 scriptEncoding : "UTF-8" | |
2017-01-03T16:17:22 [DEBUG] 13 webSecurityEnabled : "true" | |
2017-01-03T16:17:22 [DEBUG] 14 offlineStoragePath : "" | |
2017-01-03T16:17:22 [DEBUG] 15 localStoragePath : "" | |
2017-01-03T16:17:22 [DEBUG] 16 localStorageDefaultQuota : "-1" | |
2017-01-03T16:17:22 [DEBUG] 17 offlineStorageDefaultQuota : "-1" | |
2017-01-03T16:17:22 [DEBUG] 18 printDebugMessages : "true" | |
2017-01-03T16:17:22 [DEBUG] 19 javascriptCanOpenWindows : "true" | |
2017-01-03T16:17:22 [DEBUG] 20 javascriptCanCloseWindows : "true" | |
2017-01-03T16:17:22 [DEBUG] 21 sslProtocol : "default" | |
2017-01-03T16:17:22 [DEBUG] 22 sslCiphers : "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES128-SHA:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-RC4-SHA:ECDHE-RSA-RC4-SHA:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:RC4-SHA:RC4-MD5" | |
2017-01-03T16:17:22 [DEBUG] 23 sslCertificatesPath : "" | |
2017-01-03T16:17:22 [DEBUG] 24 sslClientCertificateFile : "" | |
2017-01-03T16:17:22 [DEBUG] 25 sslClientKeyFile : "" | |
2017-01-03T16:17:22 [DEBUG] 26 sslClientKeyPassphrase : "" | |
2017-01-03T16:17:22 [DEBUG] 27 webdriver : ":" | |
2017-01-03T16:17:22 [DEBUG] 28 webdriverLogFile : "" | |
2017-01-03T16:17:22 [DEBUG] 29 webdriverLogLevel : "INFO" | |
2017-01-03T16:17:22 [DEBUG] 30 webdriverSeleniumGridHub : "" | |
2017-01-03T16:17:22 [DEBUG] Phantom - execute: Script & Arguments | |
2017-01-03T16:17:22 [DEBUG] script: "import/getText.js" | |
2017-01-03T16:17:22 [DEBUG] Phantom - execute: Starting normal mode | |
2017-01-03T16:17:22 [DEBUG] WebPage - setupFrame "" | |
2017-01-03T16:17:22 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r"))) | |
2017-01-03T16:17:23 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r"))) | |
2017-01-03T16:17:23 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment