Skip to content

Instantly share code, notes, and snippets.

@rodloboz
Created January 26, 2020 23:27
Show Gist options
  • Save rodloboz/e1ad35537fb752a1381b770326091883 to your computer and use it in GitHub Desktop.
Save rodloboz/e1ad35537fb752a1381b770326091883 to your computer and use it in GitHub Desktop.
Goodreads Scrapper
require 'open-uri'
require 'nokogiri'
require 'byebug'
class Book
# {
# cover: "goodread.com...",
# title: "For Whom the Bell Tolls",
# author: "Ernest Hemmingway",
# rating: 4.56
# }
def initialize(attributes = {})
@cover = attributes[:cover]
@title = attributes[:title]
@author = attributes[:author]
@rating = attributes[:rating] || 0.0
end
def to_s
"#{@title} by #{@author} | #{@rating} rating"
end
end
# Scapping with OOP
# GoodreadsBookScrapperService.new("fiction").perfor,
class GoodreadsBookScrapperService
BASE_URL = "https://www.goodreads.com/"
attr_reader :books
def initialize(term)
@url = BASE_URL + "search?query=#{term}"
@books = []
end
def perform
build_books
self # returns the scrapper instance
end
private
def build_books
@books = rows.map { |book| build_book(book) }
end
def build_book(book)
cover = book.search("td:first a").attr("href").value
title = book.search("span[itemprop='name']").first.text
author = book.search("span[itemprop='name']")[1].text
rating = book.search("span.greyText")[1]&.text&.strip.to_f
attributes = {
cover: cover,
title: title,
author: author,
rating: rating
}
Book.new(attributes)
end
def doc
@doc ||= open(@url).read
end
def page
@page ||= Nokogiri::HTML.parse(doc)
end
def table
@table ||= page.search("table").first
end
def rows
@rows ||= table.search("tr")
end
end
scrapper = GoodreadsBookScrapperService.new("fiction").perform
books = scrapper.books
books.each_with_index do |book, index|
puts "#{index + 1}) #{book.to_s}"
end
puts "Finished!"
# Scapping
require 'open-uri'
require 'nokogiri'
require 'byebug'
BASE_URL = "https://www.goodreads.com/"
term = "fiction"
url = BASE_URL + "search?query=#{term}"
doc = open(url).read
page = Nokogiri::HTML.parse(doc)
table = page.search("table").first
rows = table.search("tr")
books = rows.map do |book|
cover = book.search("td:first a").attr("href").value
title = book.search("span[itemprop='name']").first.text
author = book.search("span[itemprop='name']")[1].text
rating = book.search("span.greyText")[1]&.text&.strip.to_f
{
cover: cover,
title: title,
author: author,
rating: rating
}
end
books.each_with_index do |book, index|
puts "#{index + 1}) #{book[:title]} by #{book[:author]} | #{book[:rating]} rating"
end
puts "Finished!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment