Last active
January 2, 2025 02:25
-
-
Save abstractart/7b969dd9004cd11e04ba0a031fa4ab6c to your computer and use it in GitHub Desktop.
Free Programming Ebooks - O'Reilly Media. Codeship free ebooks here - https://bit.ly/2oQ0knQ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script generates links for downloading free books from O'Reilly site (http://www.oreilly.com/programming/free) | |
# Requirements | |
# ruby | |
# httparty, nokogiri (gem install httparty nokogiri --no-ri --no-rdoc) | |
# Execute | |
# ruby script.rb > books.md | |
require 'httparty' | |
require 'nokogiri' | |
require 'uri' | |
module OReillySite | |
URL = 'http://www.oreilly.com/' | |
THEME_TITLES = { | |
'programming' => 'Programming', | |
'iot' => 'IoT', | |
'data' => 'Data', | |
'webops-perf' => 'WebOps', | |
'web-platform' => 'Web Development', | |
'security' => 'Security', | |
'business' => 'Business', | |
'design' => 'Design' | |
} | |
THEMES = THEME_TITLES.keys | |
FORMATS = ['pdf', 'epub', 'mobi'] | |
end | |
module OReillySite::URLBuilder | |
def self.theme_url(theme) | |
OReillySite::URL + theme + '/' + 'free/' | |
end | |
def self.download_url(theme, book_filename, format) | |
theme_url(theme) + 'files/' + book_filename + '.' + format | |
end | |
end | |
module OReillySite::Crawler | |
def self.library | |
books = Hash.new { |hash, key| hash[key] = [] } | |
OReillySite::THEMES.each do |t| | |
books[t] = theme_books(OReillySite::URLBuilder.theme_url(t)) | |
end | |
books | |
end | |
private | |
def self.theme_books(theme_url) | |
Nokogiri.HTML(HTTParty.get(theme_url).body) | |
.css("section .product-row a") | |
.map { |link| get_book_info(link) } | |
end | |
def self.get_book_info(link) | |
splitted_url = URI(link.attributes['href'].value).path.split('/') | |
OpenStruct.new( | |
theme: splitted_url[1], | |
title: link.attributes['title'].value, | |
file_name: splitted_url.last.split('.').first | |
) | |
end | |
end | |
def markdown(library) | |
main_header = "# Free Programming Ebooks - O'Reilly Media \n" | |
head_of_contents = ["## Categories"] | |
theme_sections = [] | |
library.each do |theme, books| | |
theme_title = OReillySite::THEME_TITLES[theme] | |
theme_link = URI.escape(theme_title.downcase.sub ' ', '-') | |
head_of_contents << "- [#{theme_title}](##{theme_link})" | |
section_header = "\n## #{theme_title} \n" | |
section_books = books.map do |book| | |
book_title = "\n### #{book.title}" | |
links = OReillySite::FORMATS.map do |fmt| | |
"[#{fmt}](#{OReillySite::URLBuilder.download_url(book.theme, book.file_name, fmt)})" | |
end.join(" ") | |
[book_title, links].join("\n") | |
end.join("\n") | |
theme_sections << [section_header, section_books].join("\n") | |
end | |
[main_header, head_of_contents.join("\n"), theme_sections].join("\n") | |
end | |
def main | |
puts markdown(OReillySite::Crawler.library) | |
end | |
main |
There is one more section:
'design' => 'Design'
OMG!
Thanks a lot, man!
Oh, thank you very much
Created https://github.com/argv1/OReilly-Downloader to keep the list up-to-date
more url 404 not found
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thank you SO MUCH!