Skip to content

Instantly share code, notes, and snippets.

@dovydasvenckus
Last active January 16, 2018 20:53
Show Gist options
  • Save dovydasvenckus/72269a7429ffbc3967b9ed9c460cd5ec to your computer and use it in GitHub Desktop.
Save dovydasvenckus/72269a7429ffbc3967b9ed9c460cd5ec to your computer and use it in GitHub Desktop.
Pocket crawler
#!/usr/bin/env groovy
@Grab(group='org.seleniumhq.selenium', module='selenium-server', version='3.6.0')
@Grab(group='org.seleniumhq.selenium', module='selenium-java', version='3.6.0')
@Grab(group='org.seleniumhq.selenium', module='selenium-chrome-driver', version='3.6.0')
import org.openqa.selenium.By
import org.openqa.selenium.JavascriptExecutor
import org.openqa.selenium.WebDriver
import org.openqa.selenium.WebElement
import org.openqa.selenium.firefox.FirefoxDriver
import org.openqa.selenium.chrome.*
final String USERNAME = System.getenv("POCKET_USERNAME")
final String PASSWORD = System.getenv("POCKET_PASSWORD")
WebDriver driver = new ChromeDriver()
driver.get('http://www.getpocket.com/login')
driver.findElement(By.id('feed_id')).sendKeys(USERNAME)
driver.findElement(By.id('login_password')).sendKeys(PASSWORD)
driver.findElement(By.className('login-btn-email')).click()
sleep(3000)
int articleCount = driver.findElements(By.className('item')).size()
while (true) {
((JavascriptExecutor) driver).executeScript("window.scrollTo(0, document.body.scrollHeight)")
sleep(3000)
int newArticleCount = driver.findElements(By.className('item')).size()
if (newArticleCount == articleCount) {
break
}
articleCount = newArticleCount
}
def items = driver.findElements(By.className('item'))
items.each {
println it.getAttribute('id')
println it.findElement(By.className('title')).text
String url = it.findElement(By.className('original_url')).getAttribute('href')
println getUrlParams(url).url
}
println items.size()
Map<String, String> getUrlParams(String url) {
def paramMap = new URL(url).getQuery().split('&').collectEntries {
int idx = it.indexOf("=")
[URLDecoder.decode(it[0..idx - 1], 'UTF-8'), URLDecoder.decode(it[idx + 1.. -1], 'UTF-8')]
}
return paramMap
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment