Skip to content

Instantly share code, notes, and snippets.

@elbakramer
Created October 23, 2018 10:45
Show Gist options
  • Save elbakramer/612ebdf6ee5b278f430725413a22a0c8 to your computer and use it in GitHub Desktop.
Save elbakramer/612ebdf6ee5b278f430725413a22a0c8 to your computer and use it in GitHub Desktop.
import org.openqa.selenium.WebDriver
import org.openqa.selenium.remote.RemoteWebDriver
import org.openqa.selenium.chrome.ChromeDriver
import org.openqa.selenium.chrome.ChromeOptions
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import java.io.File
import java.io.PrintWriter
import java.net.URI
class StaticPageDumper {
private val driver: RemoteWebDriver = {
System.setProperty("webdriver.chrome.driver", "/usr/local/bin/chromedriver")
val options = new ChromeOptions()
options.addArguments("headless", "window-size=1920x1080", "disable-gpu")
new ChromeDriver(options)
}
private def getCurrentURI(): URI = {
new URI(driver.getCurrentUrl())
}
private def getStaticPageElementOfCurrentFrame(parentFrame: Option[Element] = None): Element = {
driver.executeScript("window.scrollTo(0, document.body.scrollHeight);")
val pageSource = driver.getPageSource()
val document = Jsoup.parse(pageSource)
val frameElements = document.select("frame, iframe")
for (i <- 0 until frameElements.size()) {
val frameElement = frameElements.get(i)
driver.switchTo.frame(i)
getStaticPageElementOfCurrentFrame(Some(frameElement))
driver.switchTo.parentFrame()
}
val frameSetElements = document.getElementsByTag("frameset")
for (i <- 0 until frameSetElements.size()) {
frameSetElements.get(i).tagName("div")
}
parentFrame match {
case Some(frame) => {
frame.tagName("div")
if (frame.attributes().hasKey("src")) {
val src = frame.attributes().get("src")
if (src.startsWith("/")) {
frame.attr("data-src", getCurrentURI().resolve(src).toString())
} else {
frame.attr("data-src", src)
}
frame.removeAttr("src")
}
frame.appendChild(document.selectFirst("html"))
}
case None => Unit
}
document
}
def getStaticPageSource(url: String): String = synchronized {
driver.get(url)
driver.switchTo.defaultContent()
getStaticPageElementOfCurrentFrame().outerHtml()
}
def quit(): Unit = {
driver.quit()
}
}
object ChromeDriverExample {
def main(args: Array[String]): Unit = {
val dumper = new StaticPageDumper()
val pageSource = dumper.getStaticPageSource("https://github.com")
val writer = new PrintWriter(new File("index.html"), "UTF-8")
writer.write(pageSource)
writer.close()
dumper.quit()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment