Skip to content

Instantly share code, notes, and snippets.

@josephpconley
Last active January 4, 2016 07:49
Show Gist options
  • Save josephpconley/8590984 to your computer and use it in GitHub Desktop.
Save josephpconley/8590984 to your computer and use it in GitHub Desktop.
package com.josephpconley.books
import com.gargoylesoftware.htmlunit.html.{HtmlAnchor, HtmlImage, HtmlDivision, HtmlPage}
import com.gargoylesoftware.htmlunit.WebClient
import org.apache.http.client.methods.HttpGet
import org.apache.http.HttpStatus
import org.apache.http.impl.client.HttpClients
import org.apache.http.protocol.BasicHttpContext
import org.apache.http.util.EntityUtils
import com.josephpconley.rss.{Feed, Item}
import scala.xml.Elem
import scalax.io.Resource
import java.io.{File, PrintWriter}
/**
* User: joe
* Date: 10/21/13
*/
class NewEBookFeed(val name: String, val title: String, val description: String, val link: String, baseUrl: String) extends Feed {
val atomLink: String = link
lazy val webClient = new WebClient()
lazy val opts = webClient.getOptions
opts.setCssEnabled(false)
opts.setJavaScriptEnabled(false)
def htmlDescription(title: String, author: String, url: String, imgSrc: String) =
<div>
<a href={url}>
<h3>{title}</h3>
<h4>by {author}</h4>
</a>
<img src={imgSrc}/>
</div>
def items: Seq[Item] = {
//go to homepage
val page: HtmlPage = webClient.getPage(baseUrl)
//grab the new ebooks page
val newBookPage: HtmlPage = page.getByXPath("//a[starts-with(text(),'New eBooks')]").toArray().apply(1).asInstanceOf[HtmlAnchor].click()
val titles = newBookPage.getByXPath("//div[@class='trunc-title-line']").toArray
val authors = newBookPage.getByXPath("//div[@class='trunc-author-line']").toArray
val images = newBookPage.getByXPath("//img[@class='lrgImg']").toArray
//Should see a title of New ebooks with the 20 latest ebooks
println(newBookPage.getTitleText)
println(newBookPage.getPage.getUrl.getPath)
println(titles.length)
titles.indices.map{ i =>
val titleAnchor = titles(i).asInstanceOf[HtmlDivision].getFirstChild
val authorDiv = authors(i).asInstanceOf[HtmlDivision]
val title = titleAnchor.getAttributes.getNamedItem("title").getNodeValue
val author = authorDiv.getAttribute("title")
val imgSrc = images(i).asInstanceOf[HtmlImage].getSrcAttribute
val bookUrl = baseUrl + "/" + titleAnchor.getAttributes.getNamedItem("href").getNodeValue
Item(title + " by " + author, htmlDescription(title, author, bookUrl, imgSrc).toString, bookUrl, bookUrl)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment