Created
January 26, 2020 23:27
-
-
Save rodloboz/e1ad35537fb752a1381b770326091883 to your computer and use it in GitHub Desktop.
Goodreads Scrapper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'nokogiri' | |
require 'byebug' | |
class Book | |
# { | |
# cover: "goodread.com...", | |
# title: "For Whom the Bell Tolls", | |
# author: "Ernest Hemmingway", | |
# rating: 4.56 | |
# } | |
def initialize(attributes = {}) | |
@cover = attributes[:cover] | |
@title = attributes[:title] | |
@author = attributes[:author] | |
@rating = attributes[:rating] || 0.0 | |
end | |
def to_s | |
"#{@title} by #{@author} | #{@rating} rating" | |
end | |
end | |
# Scapping with OOP | |
# GoodreadsBookScrapperService.new("fiction").perfor, | |
class GoodreadsBookScrapperService | |
BASE_URL = "https://www.goodreads.com/" | |
attr_reader :books | |
def initialize(term) | |
@url = BASE_URL + "search?query=#{term}" | |
@books = [] | |
end | |
def perform | |
build_books | |
self # returns the scrapper instance | |
end | |
private | |
def build_books | |
@books = rows.map { |book| build_book(book) } | |
end | |
def build_book(book) | |
cover = book.search("td:first a").attr("href").value | |
title = book.search("span[itemprop='name']").first.text | |
author = book.search("span[itemprop='name']")[1].text | |
rating = book.search("span.greyText")[1]&.text&.strip.to_f | |
attributes = { | |
cover: cover, | |
title: title, | |
author: author, | |
rating: rating | |
} | |
Book.new(attributes) | |
end | |
def doc | |
@doc ||= open(@url).read | |
end | |
def page | |
@page ||= Nokogiri::HTML.parse(doc) | |
end | |
def table | |
@table ||= page.search("table").first | |
end | |
def rows | |
@rows ||= table.search("tr") | |
end | |
end | |
scrapper = GoodreadsBookScrapperService.new("fiction").perform | |
books = scrapper.books | |
books.each_with_index do |book, index| | |
puts "#{index + 1}) #{book.to_s}" | |
end | |
puts "Finished!" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scapping | |
require 'open-uri' | |
require 'nokogiri' | |
require 'byebug' | |
BASE_URL = "https://www.goodreads.com/" | |
term = "fiction" | |
url = BASE_URL + "search?query=#{term}" | |
doc = open(url).read | |
page = Nokogiri::HTML.parse(doc) | |
table = page.search("table").first | |
rows = table.search("tr") | |
books = rows.map do |book| | |
cover = book.search("td:first a").attr("href").value | |
title = book.search("span[itemprop='name']").first.text | |
author = book.search("span[itemprop='name']")[1].text | |
rating = book.search("span.greyText")[1]&.text&.strip.to_f | |
{ | |
cover: cover, | |
title: title, | |
author: author, | |
rating: rating | |
} | |
end | |
books.each_with_index do |book, index| | |
puts "#{index + 1}) #{book[:title]} by #{book[:author]} | #{book[:rating]} rating" | |
end | |
puts "Finished!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment