Last active
November 7, 2015 23:42
-
-
Save LinZap/17baa298060880aebd90 to your computer and use it in GitHub Desktop.
scroll to end and save web source
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var webdriver = require('selenium-webdriver'), | |
By = require('selenium-webdriver').By, | |
until = require('selenium-webdriver').until, | |
fs = require('fs'); | |
// keyword | |
var keyword = "不是鐵馬"; | |
// open chrome | |
var driver = new webdriver.Builder().withCapabilities(webdriver.Capabilities.chrome()).build(); | |
// outset | |
driver.get('https://www.facebook.com/'); | |
var flow = driver.wait(driver.getTitle(),randTime()) | |
// login FB | |
.then(function(tit){ | |
var ele = { | |
email: driver.findElement(By.id('email')), | |
pass: driver.findElement(By.id('pass')), | |
btnlogin: driver.findElement(By.id('u_0_v')) | |
}; | |
ele.email.sendKeys('帳號'); | |
ele.pass.sendKeys('密碼'); | |
ele.btnlogin.click(); | |
return driver.wait(driver.getTitle(),randTime()); | |
}) | |
// go page | |
.then(function(tit){ | |
driver.get('https://www.facebook.com/hashtag/'+keyword); | |
return driver.wait(driver.getTitle(),randTime()); | |
}) | |
// scrollToEnd | |
.then(function(tlt){ | |
return scrollToEnd('//*[@id="u_ps_0_3_a"]/div[1]/div[2]/div/div'); | |
}) | |
// get Page HTML | |
.then(function(endElement){ | |
return driver.findElement(By.tagName('body')); | |
}) | |
.then(function(el){ | |
return el.getInnerHtml(); | |
},function(err){ | |
return "can not fetch body"; | |
}) | |
// save webSource as html file | |
.then(function(html){ | |
var fpath = __dirname+"/source/"+keyword+".html"; | |
fs.writeFileSync(fpath, html, 'utf8'); | |
driver.quit(); | |
}) | |
/* | |
scrollToEnd | |
@arg1: end of element's xpath | |
@arg2: scroll offset (Y axis) | |
*/ | |
function scrollToEnd(){ | |
var pattern = arguments[0], | |
y = arguments[1] || 1000; | |
return driver.touchActions().scroll({x:0,y:y}).perform() | |
.then(function(){ | |
return driver.findElement(By.xpath(pattern)); | |
}) | |
.then(function(el){ | |
return el; | |
},function(err){ | |
scrollToEnd(pattern,y); | |
}) | |
} | |
/* | |
wait rand time | |
*/ | |
function randTime() { | |
var min = arguments[0] || 500, | |
max = arguments[1] || 3000; | |
return Math.random() * (max - min) + min; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Selenium ScrollToEnd
說明
上面這個範例,接續 QuickStart 的範例。
目標是抓取需要不斷觸底載入新資料的狀況。
這邊主要示範如何使用 Promise chain 做不斷觸底的行為。
scrollToEnd(xpath,[offsetY])
換句話說就是當這個元素存在時,表示已經沒有更多資料可以被載入。
FYI
小結
在進行需要類似 recursive 的流程時,需要注意 promise 的規劃,才能讓程式流程符合預期。
撰寫時盡量符合 Selenium 的風格,每個 Step 都回傳 Promise 以便讓後面的程式可以接續運作。