hwatkins · December 2, 2011 23:15
diff --git a/ratings.rb b/ratings.rb
 #!/usr/bin/env ruby
 require 'iconv'
 require 'nokogiri'

 # This is a simple script to spider your Netflix paginated "What You've Rated" list.
 # It requires an OS X based system with Ruby 1.9+, Safari, and AppleScript
 #
 # I could not find a way to back up my ratings (for all titles, not just my rental activity)
 # without registering for a Netflix API key or handing my Netflix credentials over to someone
 # who had an API key, so I decided to take a brute force approach and just parse the HTML for
 # every page of my ratings history on Netflix's site.
 #
 # INSTRUCTIONS:
 #  1) Launch Safari, visit Netflix.com, log in if necessary, and visit your "What You've Rated"
 #     page. If the URL for page 1 differs from that of the STARTING_URL variable below, then 
 #     update the variable's value. Leave the browser open on that page.
 #     for this script to write the ratings to.
 #  2) Set the PAGE_LOAD_GRACE variable equal to the number of seconds that you would like to
 #     give Safari to fully render each individual ratings history page before grabbing the
 #     HTML source for the page.
 #  3) Execute this script ($> ruby <scriptname>) and be careful not to interfere with Safari
 #     while it visits each page in your ratings history.

 # Config
 STARTING_URL    = 'http://movies.netflix.com/MoviesYouveSeen'
 PAGE_LOAD_GRACE = 4 # seconds of grace to allow for Safari to finish rendering a single page of ratings

 # Character encoding converter instance used to force all HTML output into UTF-8 format
 ICONV           = Iconv.new('UTF-8//IGNORE', 'UTF-8')

 # For the given page's worth of Netflix ratings, glean out the title, Netflix URL,
 # genre, and rating for each entry.
 def glean_movie_info(html, ratings_array=[])
  next_url = nil
  page = Nokogiri::HTML(html)
  page.xpath('//table//tbody//tr').each do |row|
   title=row.xpath('.//td[@class="cell-title"]').text.strip
   genre=row.xpath('.//td[@class="cell-genre"]').text.strip
   rating= row.xpath('.//td[@class="cell-starbar"]//span[contains(@class,"stbrMaskFg")]').text.strip.split(":")[1].strip
   puts "#{title}|#{genre}|#{rating}"
  end
  next_url=page.xpath('//a[contains(@class, "next")]').first
  next_url.nil? ? "" : next_url["href"]
 end

 # Obtain the HTML source for the given URL
 def fetch_html(url)
  applescript = <<-EOF
    tell application "Safari"
      activate
      set url of document 1 to "#{url}"
      delay #{PAGE_LOAD_GRACE}
      set htmlSource to source of document 1
      set the clipboard to htmlSource as text
    end tell
  EOF
  ICONV.iconv(`osascript -e '#{applescript}' && pbpaste` + ' ')[0..-2]
 end

 # Starting with the first page of ratings, keep gleaning ratings info
 # and moving on to the next page until the last page (which will not
 # have a "next" link at the bottom). Keep adding each page's worth of
 # info to the ratings array, which contains a hash of info for each movie.
 url_to_fetch = STARTING_URL
 ratings = []
 until url_to_fetch == ""
  url_to_fetch = glean_movie_info(fetch_html(url_to_fetch), ratings)
 end
	#!/usr/bin/env ruby
	require 'iconv'
	require 'nokogiri'

	# This is a simple script to spider your Netflix paginated "What You've Rated" list.
	# It requires an OS X based system with Ruby 1.9+, Safari, and AppleScript
	#
	# I could not find a way to back up my ratings (for all titles, not just my rental activity)
	# without registering for a Netflix API key or handing my Netflix credentials over to someone
	# who had an API key, so I decided to take a brute force approach and just parse the HTML for
	# every page of my ratings history on Netflix's site.
	#
	# INSTRUCTIONS:
	# 1) Launch Safari, visit Netflix.com, log in if necessary, and visit your "What You've Rated"
	# page. If the URL for page 1 differs from that of the STARTING_URL variable below, then
	# update the variable's value. Leave the browser open on that page.
	# for this script to write the ratings to.
	# 2) Set the PAGE_LOAD_GRACE variable equal to the number of seconds that you would like to
	# give Safari to fully render each individual ratings history page before grabbing the
	# HTML source for the page.
	# 3) Execute this script ($> ruby <scriptname>) and be careful not to interfere with Safari
	# while it visits each page in your ratings history.

	# Config
	STARTING_URL = 'http://movies.netflix.com/MoviesYouveSeen'
	PAGE_LOAD_GRACE = 4 # seconds of grace to allow for Safari to finish rendering a single page of ratings

	# Character encoding converter instance used to force all HTML output into UTF-8 format
	ICONV = Iconv.new('UTF-8//IGNORE', 'UTF-8')

	# For the given page's worth of Netflix ratings, glean out the title, Netflix URL,
	# genre, and rating for each entry.
	def glean_movie_info(html, ratings_array=[])
	next_url = nil
	page = Nokogiri::HTML(html)
	page.xpath('//table//tbody//tr').each do \|row\|
	title=row.xpath('.//td[@class="cell-title"]').text.strip
	genre=row.xpath('.//td[@class="cell-genre"]').text.strip
	rating= row.xpath('.//td[@class="cell-starbar"]//span[contains(@class,"stbrMaskFg")]').text.strip.split(":")[1].strip
	puts "#{title}\|#{genre}\|#{rating}"
	end
	next_url=page.xpath('//a[contains(@class, "next")]').first
	next_url.nil? ? "" : next_url["href"]
	end

	# Obtain the HTML source for the given URL
	def fetch_html(url)
	applescript = <<-EOF
	tell application "Safari"
	activate
	set url of document 1 to "#{url}"
	delay #{PAGE_LOAD_GRACE}
	set htmlSource to source of document 1
	set the clipboard to htmlSource as text
	end tell
	EOF
	ICONV.iconv(`osascript -e '#{applescript}' && pbpaste` + ' ')[0..-2]
	end

	# Starting with the first page of ratings, keep gleaning ratings info
	# and moving on to the next page until the last page (which will not
	# have a "next" link at the bottom). Keep adding each page's worth of
	# info to the ratings array, which contains a hash of info for each movie.
	url_to_fetch = STARTING_URL
	ratings = []
	until url_to_fetch == ""
	url_to_fetch = glean_movie_info(fetch_html(url_to_fetch), ratings)
	end
No results found