-
-
Save mdespuits/d317ba7c02e503b3e8d1fb971f504574 to your computer and use it in GitHub Desktop.
Script to import books from Instapaper to Airtable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Book < Airrecord::Table | |
class Endorser < Airrecord::Table | |
self.base_key = "" | |
self.table_name = "Endorser" | |
end | |
self.base_key = "" | |
self.table_name = "Books" | |
has_many :endorsements, class: 'Book::Endorser', column: 'Endorsements' | |
GOODREADS_BLACKLIST = %w( | |
to-read favorites currently-reading owned | |
series favourites re-read owned-books | |
books-i-own wish-list si audiobook | |
book-club ebook kindle to-buy | |
) | |
GOODREADS_MERGE = { | |
"Non-fiction" => "Nonfiction", | |
"Classic" => "Classics", | |
"Cookbook" => "Cooking", | |
"Cookbooks" => "Cooking", | |
"Biography" => "Memoir", | |
"Biographies" => "Memoir", | |
"Autobiography" => "Memoir", | |
"Auto-biography" => "Memoir", | |
"Sci-fi" => "Science Fiction", | |
"Scifi" => "Science Fiction", | |
"Management" => "Leadership", | |
"Self-help" => "Personal Development", | |
"Selfhelp" => "Personal Development", | |
"Personal-development" => "Personal Development", | |
"Self-improvement" => "Personal Development", | |
"Science-fiction" => "Science Fiction", | |
"Ya" => "Young-adult", | |
"Tech" => "Technology", | |
"Young-adult" => "Young Adult", | |
"Computer-science" => "Programming", | |
"Investing" => "Economics", | |
"Fitness" => "Health", | |
"Food" => "Cooking", | |
"Finance" => "Economics", | |
"Software" => "Programming", | |
"Literature" => "Classics", | |
} | |
CATEGORIES = [ | |
"Business", "Psychology", "Science", "Personal Development", "Philosophy", | |
"History", "Fiction", "Memoir", "Leadership", "Classics", "Economics", | |
"Cooking", "Programming", "Health", "Politics", "Technology", "Science Fiction", | |
"Entrepreneurship", "Design", "Writing", "Fantasy", "Young Adult", "Nonfiction", | |
] | |
def goodreads_id | |
query = self["ISBN"] if self["ISBN"] | |
query ||= "\"#{self[:title]}\"" | |
search = goodreads_client.search_books(query) | |
if search.results.respond_to?(:work) | |
matches = [search.results.work].flatten | |
if self[:author] | |
best_match = matches.find { |match| | |
character_difference?(match["best_book"]["author"]["name"], self[:author]) | |
} | |
end | |
best_match ||= matches.first | |
return unless best_match | |
best_match.best_book.id | |
end | |
end | |
def goodreads_book | |
@book ||= begin | |
id = goodreads_id | |
return unless id | |
goodreads_client.book(id) | |
end | |
end | |
def goodreads_categories(n = 5) | |
popular = goodreads_book.popular_shelves | |
return [] if popular.blank? | |
shelves = popular.shelf | |
return [] unless shelves.first.respond_to?(:name) | |
shelves.map(&:name).reject { |name| | |
GOODREADS_BLACKLIST.include?(name) | |
}.first(n).map { |name| | |
name = name.capitalize | |
name = GOODREADS_MERGE[name] if GOODREADS_MERGE[name] | |
(CATEGORIES.include?(name) && name) || nil | |
}.compact.uniq | |
end | |
def populate_from_goodreads(prevent_duplicates_from: []) | |
book = goodreads_book | |
unless book | |
$stderr.puts "Unable to find book #{self["Title"]}" | |
return | |
end | |
before = self.serializable_fields | |
self["Title"] = book.title | |
self["ISBN"] = book.isbn13 || self["ISBN"] | |
self["Publication Year"] = book.work.original_publication_year.to_s || book.publication_year.to_s | |
self["Goodreads Rating"] = book.average_rating | |
self["Pages"] = book.num_pages | |
authors = [book.authors.author].flatten | |
self["Author"] = authors.first.name | |
self["Categories"] = goodreads_categories.sort | |
self["Goodreads Ratings"] = book.work.ratings_count | |
difference = HashDiff.diff(before, self.serializable_fields) | |
flagged = false | |
author_ok = true | |
$stderr.puts "\x1b[35m#{before["Title"]}\x1b[0m" | |
difference.each do |(type, key, prev, new)| | |
if key == "Author" && type == "~" | |
unless authors.any? { |author| character_difference?(author.name, prev) } | |
$stderr.puts "Author changed too much" | |
flagged = true | |
author_ok = false | |
end | |
end | |
if key == "Title" && type == "~" | |
unless new.downcase.start_with?(prev.downcase) || author_ok | |
$stderr.puts "New title '#{new}' didn't start with old title '#{prev}'" | |
flagged = true | |
end | |
end | |
if type == "~" | |
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[31m#{prev} => \x1b[32m#{new}\x1b[0m" | |
elsif type == "+" | |
$stderr.puts "\x1b[34m#{type} #{key}: \x1b[32m#{prev}\x1b[0m" | |
end | |
end | |
if flagged | |
Rollbar.warn("Skipping book", title: self[:title]) | |
elsif prevent_duplicates_from.find { |other| other["ISBN"] == self["ISBN"] } | |
$stderr.puts "Skipping #{self[:title]} due to duplicate" | |
else | |
if self.new_record? | |
self.create | |
else | |
self.save | |
end | |
end | |
end | |
private | |
def goodreads_client | |
self.class.goodreads_client | |
end | |
def self.goodreads_client | |
@client ||= begin | |
Goodreads::Client.new(api_key: '', api_secret: '') | |
end | |
end | |
def character_difference?(a, b, n = 4) | |
(a.split('') - b.split('')).size <= n && (b.split('') - a.split('')).size <= n | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BookImport | |
def instapaper | |
InstapaperClient.bookmarks(limit: 500).to_enum(:each).map { |bookmark| | |
if URI(bookmark.url).host =~ /\A(www\.)?amazon\.(com|ca)/ | |
uri = URI(bookmark.url) | |
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body | |
isbn = text.match(/(ISBN|ASIN)(-13|-10)?:\s*<\/b>\s*(\w{10,13})/) | |
create_record_from_isbn(isbn[3], bookmark.bookmark_id) | |
elsif bookmark.url =~ /goodreads\.com/ | |
uri = URI(bookmark.url) | |
text = client_for("#{uri.scheme}://#{uri.hostname}").get(uri.path).body | |
doc = Nokogiri::HTML(text) | |
create_record_from_isbn(doc.at('meta[property="books:isbn"]')["content"], bookmark.bookmark_id) | |
end | |
}.compact | |
end | |
def kindle | |
books_from_highlights | |
end | |
private | |
# TODO: Do like what we do with words, where it puts the source multiple times | |
# TODO: Refactor to be consistent with Words? | |
# It does work though :) | |
def books_from_highlights | |
sources = JSON.parse(Readwise.get("/munger").body)["data"] | |
existing_books = Book.all | |
sources.each do |source| | |
book_highlights = source["highlights"].select { |h| h["note"] =~ /\A\.?book/i } | |
book_titles = book_highlights.map { |h| h["highlight"] } | |
book_titles.each do |title| | |
next if title == "Randomness)." # ugh can't get rid of it | |
book = Book.new("Title" => title) | |
book.populate_from_goodreads(prevent_duplicates_from: existing_books) | |
end | |
end | |
end | |
def create_record_from_isbn(isbn, bookmark_id) | |
Book.new("ISBN" => isbn).populate_from_goodreads | |
InstapaperClient.delete_bookmark(bookmark_id) | |
end | |
def client_for(host) | |
@clients ||= {} | |
return @clients[host] if @clients[host] | |
@clients[host] ||= Faraday.new(:url => host) do |b| | |
b.request :retry, max: 10, interval: 1, interval_randomness: 2, backoff_factor: 2, exceptions: Semian::NetHTTP::DEFAULT_ERRORS | |
b.use FaradayMiddleware::FollowRedirects | |
b.adapter :net_http_persistent | |
b.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36" | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
InstapaperClient = Instapaper::Client.new do |client| | |
client.consumer_key = "" | |
client.consumer_secret = "" | |
client.oauth_token = '' | |
client.oauth_token_secret = '' # check docs, need to email them for this | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment