lrowe · August 1, 2012 14:19
diff --git a/agent.rb b/agent.rb
 require 'tempfile'
 require 'net/ntlm'
 require 'kconv'
 require 'webrobots'

 ##
 # An HTTP (and local disk access) user agent.  This class is an implementation
 # detail and is subject to change at any time.

 class Mechanize::HTTP::Agent

  # :section: Headers

  # Disables If-Modified-Since conditional requests (enabled by default)
  attr_accessor :conditional_requests

  # Is gzip compression of requests enabled?
  attr_accessor :gzip_enabled

  # A hash of request headers to be used for every request
  attr_accessor :request_headers

  # The User-Agent header to send
  attr_reader :user_agent

  # :section: History

  # history of requests made
  attr_accessor :history

  # :section: Hooks

  # A list of hooks to call after retrieving a response.  Hooks are called with
  # the agent and the response returned.
  attr_reader :post_connect_hooks

  # A list of hooks to call before making a request.  Hooks are called with
  # the agent and the request to be performed.
  attr_reader :pre_connect_hooks

  # A list of hooks to call to handle the content-encoding of a request.
  attr_reader :content_encoding_hooks

  # :section: HTTP Authentication

  attr_reader :auth_store # :nodoc:
  attr_reader :authenticate_methods # :nodoc:
  attr_reader :digest_challenges # :nodoc:

  # :section: Redirection

  # Follow HTML meta refresh and HTTP Refresh.  If set to +:anywhere+ meta
  # refresh tags outside of the head element will be followed.
  attr_accessor :follow_meta_refresh

  # Follow an HTML meta refresh that has no "url=" in the content attribute.
  #
  # Defaults to false to prevent infinite refresh loops.
  attr_accessor :follow_meta_refresh_self

  # Controls how this agent deals with redirects.  The following values are
  # allowed:
  #
  # :all, true:: All 3xx redirects are followed (default)
  # :permanent:: Only 301 Moved Permanantly redirects are followed
  # false:: No redirects are followed
  attr_accessor :redirect_ok

  # Maximum number of redirects to follow
  attr_accessor :redirection_limit

  # :section: Allowed error codes

  # List of error codes to handle without raising an exception.

  attr_accessor :allowed_error_codes

  # :section: Robots

  # When true, this agent will consult the site's robots.txt for each access.
  attr_reader :robots

  # :section: SSL

  # OpenSSL key password
  attr_accessor :pass

  # :section: Timeouts

  # Set to false to disable HTTP/1.1 keep-alive requests
  attr_accessor :keep_alive

  # Length of time to wait until a connection is opened in seconds
  attr_accessor :open_timeout

  # Length of time to attempt to read data from the server
  attr_accessor  :read_timeout

  # :section:

  # The cookies for this agent
  attr_accessor :cookie_jar

  # Responses larger than this will be written to a Tempfile instead of stored
  # in memory.  Setting this to nil disables creation of Tempfiles.
  attr_accessor :max_file_buffer

  # :section: Utility

  # The context parses responses into pages
  attr_accessor :context

  attr_reader :http # :nodoc:

  # When set to true mechanize will ignore an EOF during chunked transfer
  # encoding so long as at least one byte was received.  Be careful when
  # enabling this as it may cause data loss.
  attr_accessor :ignore_bad_chunking

  # Handlers for various URI schemes
  attr_accessor :scheme_handlers

  # :section:

  # Creates a new Mechanize HTTP user agent.  The user agent is an
  # implementation detail of mechanize and its API may change at any time.

  def initialize
    @allowed_error_codes      = []
    @conditional_requests     = true
    @context                  = nil
    @content_encoding_hooks   = []
    @cookie_jar               = Mechanize::CookieJar.new
    @follow_meta_refresh      = false
    @follow_meta_refresh_self = false
    @gzip_enabled             = true
    @history                  = Mechanize::History.new
    @ignore_bad_chunking      = false
    @keep_alive               = true
    @max_file_buffer          = 100_000 # 5MB for response bodies
    @open_timeout             = nil
    @post_connect_hooks       = []
    @pre_connect_hooks        = []
    @read_timeout             = nil
    @redirect_ok              = true
    @redirection_limit        = 20
    @request_headers          = {}
    @robots                   = false
    @user_agent               = nil
    @webrobots                = nil

    # HTTP Authentication
    @auth_store           = Mechanize::HTTP::AuthStore.new
    @authenticate_parser  = Mechanize::HTTP::WWWAuthenticateParser.new
    @authenticate_methods = Hash.new do |methods, uri|
      methods[uri] = Hash.new do |realms, auth_scheme|
        realms[auth_scheme] = []
      end
    end
    @digest_auth          = Net::HTTP::DigestAuth.new
    @digest_challenges    = {}

    # SSL
    @pass = nil

    @scheme_handlers = Hash.new { |h, scheme|
      h[scheme] = lambda { |link, page|
        raise Mechanize::UnsupportedSchemeError, scheme
      }
    }

    @scheme_handlers['http']      = lambda { |link, page| link }
    @scheme_handlers['https']     = @scheme_handlers['http']
    @scheme_handlers['relative']  = @scheme_handlers['http']
    @scheme_handlers['file']      = @scheme_handlers['http']

    @http = Net::HTTP::Persistent.new 'mechanize'
    @http.idle_timeout = 5
    @http.keep_alive   = 300
  end

  ##
  # Adds credentials +user+, +pass+ for +uri+.  If +realm+ is set the
  # credentials are used only for that realm.  If +realm+ is not set the
  # credentials become the default for any realm on that URI.
  #
  # +domain+ and +realm+ are exclusive as NTLM does not follow RFC 2617.  If
  # +domain+ is given it is only used for NTLM authentication.

  def add_auth uri, user, password, realm = nil, domain = nil
    @auth_store.add_auth uri, user, password, realm, domain
  end

  ##
  # USE OF add_default_auth IS NOT RECOMMENDED AS IT MAY EXPOSE PASSWORDS TO
  # THIRD PARTIES
  #
  # Adds credentials +user+, +pass+ as the default authentication credentials.
  # If no other credentials are available  these will be returned from
  # credentials_for.
  #
  # If +domain+ is given it is only used for NTLM authentication.

  def add_default_auth user, password, domain = nil # :nodoc:
    @auth_store.add_default_auth user, password, domain
  end

  ##
  # Retrieves +uri+ and parses it into a page or other object according to
  # PluggableParser.  If the URI is an HTTP or HTTPS scheme URI the given HTTP
  # +method+ is used to retrieve it, along with the HTTP +headers+, request
  # +params+ and HTTP +referer+.
  #
  # +redirects+ tracks the number of redirects experienced when retrieving the
  # page.  If it is over the redirection_limit an error will be raised.

  def fetch uri, method = :get, headers = {}, params = [],
            referer = current_page, redirects = 0
    referer_uri = referer ? referer.uri : nil

    uri = resolve uri, referer

    uri, params = resolve_parameters uri, method, params

    request = http_request uri, method, params

    connection = connection_for uri

    request_auth             request, uri

    disable_keep_alive       request
    enable_gzip              request

    request_language_charset request
    request_cookies          request, uri
    request_host             request, uri
    request_referer          request, uri, referer_uri
    request_user_agent       request
    request_add_headers      request, headers

    pre_connect              request

    # Consult robots.txt
    if robots && uri.is_a?(URI::HTTP)
      robots_allowed?(uri) or raise Mechanize::RobotsDisallowedError.new(uri)
    end

    # Add If-Modified-Since if page is in history
    page = visited_page(uri)

    if (page = visited_page(uri)) and page.response['Last-Modified']
      request['If-Modified-Since'] = page.response['Last-Modified']
    end if(@conditional_requests)

    # Specify timeouts if given
    connection.open_timeout = @open_timeout if @open_timeout
    connection.read_timeout = @read_timeout if @read_timeout

    request_log request

    response_body_io = nil

    # Send the request
    begin
      response = connection.request(uri, request) { |res|
        response_log res

        response_body_io = response_read res, request, uri

        res
      }
    rescue Mechanize::ChunkedTerminationError => e
      raise unless @ignore_bad_chunking

      response = e.response
      response_body_io = e.body_io
    end

    hook_content_encoding response, uri, response_body_io

    response_body_io = response_content_encoding response, response_body_io if
      request.response_body_permitted?

    post_connect uri, response, response_body_io

    page = response_parse response, response_body_io, uri

    response_cookies response, uri, page

    meta = response_follow_meta_refresh response, uri, page, redirects
    return meta if meta

    case response
    when Net::HTTPSuccess
      if robots && page.is_a?(Mechanize::Page)
        page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
      end

      page
    when Mechanize::FileResponse
      page
    when Net::HTTPNotModified
      log.debug("Got cached page") if log
      visited_page(uri) || page
    when Net::HTTPRedirection
      response_redirect response, method, page, redirects, headers, referer
    when Net::HTTPUnauthorized
      response_authenticate(response, page, uri, request, headers, params,
                            referer)
    else
      if @allowed_error_codes.include? page.code 
        if robots && page.is_a?(Mechanize::Page)
          page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
        end

        page
      else
        raise Mechanize::ResponseCodeError.new(page, 'unhandled response')
      end
    end
  end

  # URI for a proxy connection

  def proxy_uri
    @http.proxy_uri
  end

  # Retry non-idempotent requests?
  def retry_change_requests
    @http.retry_change_requests
  end

  # Retry non-idempotent requests

  def retry_change_requests= retri
    @http.retry_change_requests = retri
  end

  # :section: Headers

  def user_agent= user_agent
    @webrobots = nil if user_agent != @user_agent
    @user_agent = user_agent
  end

  # :section: History

  # Equivalent to the browser back button.  Returns the most recent page
  # visited.
  def back
    @history.pop
  end

  ##
  # Returns the latest page loaded by the agent

  def current_page
    @history.last
  end

  def max_history
    @history.max_size
  end

  def max_history=(length)
    @history.max_size = length
  end

  # Returns a visited page for the url passed in, otherwise nil
  def visited_page url
    @history.visited_page resolve url
  end

  # :section: Hooks

  def hook_content_encoding response, uri, response_body_io
    @content_encoding_hooks.each do |hook|
      hook.call self, uri, response, response_body_io
    end
  end

  ##
  # Invokes hooks added to post_connect_hooks after a +response+ is returned
  # and the response +body+ is handled.
  #
  # Yields the +context+, the +uri+ for the request, the +response+ and the
  # response +body+.

  def post_connect uri, response, body_io # :yields: agent, uri, response, body
    @post_connect_hooks.each do |hook|
      begin
        hook.call self, uri, response, body_io.read
      ensure
        body_io.rewind
      end
    end
  end

  ##
  # Invokes hooks added to pre_connect_hooks before a +request+ is made.
  # Yields the +agent+ and the +request+ that will be performed to each hook.

  def pre_connect request # :yields: agent, request
    @pre_connect_hooks.each do |hook|
      hook.call self, request
    end
  end

  # :section: Request

  def connection_for uri
    case uri.scheme.downcase
    when 'http', 'https' then
      return @http
    when 'file' then
      return Mechanize::FileConnection.new
    end
  end

  ##
  # Decodes a gzip-encoded +body_io+.  If it cannot be decoded, inflate is
  # tried followed by raising an error.

  def content_encoding_gunzip body_io
    log.debug('gzip response') if log

    zio = Zlib::GzipReader.new body_io
    out_io = auto_io 'mechanize-gunzip', 16384, zio
    zio.finish

    return out_io
  rescue Zlib::Error => gz_error
    log.warn "unable to gunzip response: #{gz_error} (#{gz_error.class})" if
      log

    body_io.rewind
    body_io.read 10

    begin
      log.warn "trying raw inflate on response" if log
      return inflate body_io, -Zlib::MAX_WBITS
    rescue Zlib::Error => e
      log.error "unable to inflate response: #{e} (#{e.class})" if log
      raise
    end
  ensure
    # do not close a second time if we failed the first time
    zio.close if zio and not (zio.closed? or gz_error)
    body_io.close unless body_io.closed?
  end

  ##
  # Decodes a deflate-encoded +body_io+.  If it cannot be decoded, raw inflate
  # is tried followed by raising an error.

  def content_encoding_inflate body_io
    log.debug('deflate body') if log

    return inflate body_io
  rescue Zlib::Error
    log.error('unable to inflate response, trying raw deflate') if log

    body_io.rewind

    begin
      return inflate body_io, -Zlib::MAX_WBITS
    rescue Zlib::Error => e
      log.error("unable to inflate response: #{e}") if log
      raise
    end
  ensure
    body_io.close
  end

  def disable_keep_alive request
    request['connection'] = 'close' unless @keep_alive
  end

  def enable_gzip request
    request['accept-encoding'] = if @gzip_enabled
                                   'gzip,deflate,identity'
                                 else
                                   'identity'
                                 end
  end

  def http_request uri, method, params = nil
    case uri.scheme.downcase
    when 'http', 'https' then
      klass = Net::HTTP.const_get(method.to_s.capitalize)

      request ||= klass.new(uri.request_uri)
      request.body = params.first if params

      request
    when 'file' then
      Mechanize::FileRequest.new uri
    end
  end

  def request_add_headers request, headers = {}
    @request_headers.each do |k,v|
      request[k] = v
    end

    headers.each do |field, value|
      case field
      when :etag              then request["ETag"] = value
      when :if_modified_since then request["If-Modified-Since"] = value
      when Symbol then
        raise ArgumentError, "unknown header symbol #{field}"
      else
        request[field] = value
      end
    end
  end

  def request_auth request, uri
    base_uri = uri + '/'
    schemes = @authenticate_methods[base_uri]

    if realm = schemes[:digest].find { |r| r.uri == base_uri } then
      request_auth_digest request, uri, realm, base_uri, false
    elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then
      request_auth_digest request, uri, realm, base_uri, true
    elsif realm = schemes[:basic].find { |r| r.uri == base_uri } then
      user, password, = @auth_store.credentials_for uri, realm.realm
      request.basic_auth user, password
    end
  end

  def request_auth_digest request, uri, realm, base_uri, iis
    challenge = @digest_challenges[realm]

    user, password, = @auth_store.credentials_for uri, realm.realm
    uri.user     = user
    uri.password = password

    auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis
    request['Authorization'] = auth
  end

  def request_cookies request, uri
    return if @cookie_jar.empty? uri

    cookies = @cookie_jar.cookies uri

    return if cookies.empty?

    request.add_field 'Cookie', cookies.join('; ')
  end

  def request_host request, uri
    port = [80, 443].include?(uri.port.to_i) ? nil : uri.port
    host = uri.host

    request['Host'] = [host, port].compact.join ':'
  end

  def request_language_charset request
    request['accept-charset']  = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
    request['accept-language'] = 'en-us,en;q=0.5'
  end

  # Log specified headers for the request
  def request_log request
    return unless log

    log.info("#{request.class}: #{request.path}")

    request.each_header do |k, v|
      log.debug("request-header: #{k} => #{v}")
    end
  end

  # Sets a Referer header.  Fragment part is removed as demanded by
  # RFC 2616 14.36, and user information part is removed just like
  # major browsers do.
  def request_referer request, uri, referer
    return unless referer
    return if 'https'.casecmp(referer.scheme) == 0 and
              'https'.casecmp(uri.scheme) != 0
    if referer.fragment || referer.user || referer.password
      referer = referer.dup
      referer.fragment = referer.user = referer.password = nil
    end
    request['Referer'] = referer
  end

  def request_user_agent request
    request['User-Agent'] = @user_agent if @user_agent
  end

  def resolve(uri, referer = current_page)
    referer_uri = referer && referer.uri
    if uri.is_a?(URI)
      uri = uri.dup
    elsif uri.nil?
      if referer_uri
        return referer_uri
      end
      raise ArgumentError, "absolute URL needed (not nil)"
    else
      url = uri.to_s.strip
      if url.empty?
        if referer_uri
          return referer_uri.dup.tap { |u| u.fragment = nil }
        end
        raise ArgumentError, "absolute URL needed (not #{uri.inspect})"
      end

      url.gsub!(/[^#{0.chr}-#{126.chr}]/o) { |match|
        if RUBY_VERSION >= "1.9.0"
          Mechanize::Util.uri_escape(match)
        else
          begin
            sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C').first)
          rescue ArgumentError
            # workaround for ruby 1.8 with -Ku but ISO-8859-1 characters in
            # URIs.  See #227.  I can't wait to drop 1.8 support
            sprintf('%%%X', match.unpack('C').first)
          end
        end
      }

      escaped_url = Mechanize::Util.html_unescape(
        url.split(/((?:%[0-9A-Fa-f]{2})+|#)/).each_slice(2).map { |x, y|
          "#{WEBrick::HTTPUtils.escape(x)}#{y}"
        }.join('')
      )

      begin
        uri = URI.parse(escaped_url)
      rescue
        uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_url))
      end
    end

    scheme = uri.relative? ? 'relative' : uri.scheme.downcase
    uri = @scheme_handlers[scheme].call(uri, referer)

    if referer_uri
      if uri.path.length == 0 && uri.relative?
        uri.path = referer_uri.path
      end
    end

    uri.path = '/' if uri.path.length == 0

    if uri.relative?
      raise ArgumentError, "absolute URL needed (not #{uri})" unless
        referer_uri

      if referer.respond_to?(:bases) && referer.parser &&
          (lbase = referer.bases.last) && lbase.uri && lbase.uri.absolute?
        base = lbase
      else
        base = nil
      end

      uri = referer_uri + (base ? base.uri : referer_uri) + uri
      # Strip initial "/.." bits from the path
      uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
    end

    unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
      raise ArgumentError, "unsupported scheme: #{uri.scheme}"
    end

    uri
  end

  def resolve_parameters uri, method, parameters
    case method
    when :head, :get, :delete, :trace then
      if parameters and parameters.length > 0
        uri.query ||= ''
        uri.query << '&' if uri.query.length > 0
        uri.query << Mechanize::Util.build_query_string(parameters)
      end

      return uri, nil
    end

    return uri, parameters
  end

  # :section: Response

  def get_meta_refresh response, uri, page
    return nil unless @follow_meta_refresh

    if page.respond_to?(:meta_refresh) and
       (redirect = page.meta_refresh.first) then
      [redirect.delay, redirect.href] unless
        not @follow_meta_refresh_self and redirect.link_self
    elsif refresh = response['refresh']
      delay, href, link_self = Mechanize::Page::MetaRefresh.parse refresh, uri
      raise Mechanize::Error, 'Invalid refresh http header' unless delay
      [delay.to_f, href] unless
        not @follow_meta_refresh_self and link_self
    end
  end

  def response_authenticate(response, page, uri, request, headers, params,
                            referer)
    www_authenticate = response['www-authenticate']

    unless www_authenticate = response['www-authenticate'] then
      message = 'WWW-Authenticate header missing in response'
      raise Mechanize::UnauthorizedError.new(page, nil, message)
    end
                                               
    challenges = @authenticate_parser.parse www_authenticate

    unless @auth_store.credentials? uri, challenges then
      message = "no credentials found, provide some with #add_auth"
      raise Mechanize::UnauthorizedError.new(page, challenges, message)
    end

    if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then
      realm = challenge.realm uri

      auth_scheme = if response['server'] =~ /Microsoft-IIS/ then
                      :iis_digest
                    else
                      :digest
                    end

      existing_realms = @authenticate_methods[realm.uri][auth_scheme]

      if existing_realms.include? realm
        message = 'Digest authentication failed'
        raise Mechanize::UnauthorizedError.new(page, challeges, message)
      end

      existing_realms << realm
      @digest_challenges[realm] = challenge
    elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then
      existing_realms = @authenticate_methods[uri + '/'][:ntlm]

      if existing_realms.include?(realm) and not challenge.params then
        message = 'NTLM authentication failed'
        raise Mechanize::UnauthorizedError.new(page, challenges, message)
      end

      existing_realms << realm

      if challenge.params then
        type_2 = Net::NTLM::Message.decode64 challenge.params

        user, password, domain = @auth_store.credentials_for uri, nil

        type_3 = type_2.response({ :user => user, :password => password,
                                   :domain => domain },
                                 { :ntlmv2 => true }).encode64

        headers['Authorization'] = "NTLM #{type_3}"
      else
        type_1 = Net::NTLM::Message::Type1.new.encode64
        headers['Authorization'] = "NTLM #{type_1}"
      end
    elsif challenge = challenges.find { |c| c.scheme == 'Basic' } then
      realm = challenge.realm uri

      existing_realms = @authenticate_methods[realm.uri][:basic]

      if existing_realms.include? realm then
        message = 'Basic authentication failed'
        raise Mechanize::UnauthorizedError.new(page, challenges, message)
      end

      existing_realms << realm
    else
      message = 'unsupported authentication scheme'
      raise Mechanize::UnauthorizedError.new(page, challenges, message)
    end

    fetch uri, request.method.downcase.to_sym, headers, params, referer
  end

  def response_content_encoding response, body_io
    length = response.content_length ||
      case body_io
      when Tempfile, IO then
        body_io.stat.size
      else
        body_io.length
      end

    return body_io if length.zero?

    out_io = case response['Content-Encoding']
             when nil, 'none', '7bit' then
               body_io
             when 'deflate' then
               content_encoding_inflate body_io
             when 'gzip', 'x-gzip' then
               content_encoding_gunzip body_io
             else
               raise Mechanize::Error,
                 "unsupported content-encoding: #{response['Content-Encoding']}"
             end

    out_io.flush
    out_io.rewind

    out_io
  rescue Zlib::Error => e
    message = "error handling content-encoding #{response['Content-Encoding']}:"
    message << " #{e.message} (#{e.class})"
    raise Mechanize::Error, message
  ensure
    begin
      if Tempfile === body_io and
         (StringIO === out_io or out_io.path != body_io.path) then
        body_io.close! 
      end
    rescue IOError
      # HACK ruby 1.8 raises IOError when closing the stream
    end
  end

  def response_cookies response, uri, page
    if Mechanize::Page === page and page.body =~ /Set-Cookie/n
      page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
        save_cookies(uri, meta['content'])
      end
    end

    header_cookies = response.get_fields 'Set-Cookie'

    return unless header_cookies

    header_cookies.each do |set_cookie|
      save_cookies(uri, set_cookie)
    end
  end

  def save_cookies(uri, set_cookie)
    log = log()	 # reduce method calls
    Mechanize::Cookie.parse(uri, set_cookie, log) { |c|
      if @cookie_jar.add(uri, c)
        log.debug("saved cookie: #{c}") if log
      else
        log.debug("rejected cookie: #{c}") if log
      end
    }
  end

  def response_follow_meta_refresh response, uri, page, redirects
    delay, new_url = get_meta_refresh(response, uri, page)
    return nil unless delay
    new_url = new_url ? resolve(new_url, page) : uri

    raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
      redirects + 1 > @redirection_limit

    sleep delay
    @history.push(page, page.uri)
    fetch new_url, :get, {}, [],
          Mechanize::Page.new, redirects + 1
  end

  def response_log response
    return unless log

    log.info("status: #{response.class} #{response.http_version} " \
             "#{response.code} #{response.message}")

    response.each_header do |k, v|
      log.debug("response-header: #{k} => #{v}")
    end
  end

  def response_parse response, body_io, uri
    @context.parse uri, response, body_io
  end

  def response_read response, request, uri
    content_length = response.content_length

    if use_tempfile? content_length then
      body_io = make_tempfile 'mechanize-raw'
    else
      body_io = StringIO.new
    end

    body_io.set_encoding Encoding::BINARY if body_io.respond_to? :set_encoding
    total = 0

    begin
      response.read_body { |part|
        total += part.length

        if StringIO === body_io and use_tempfile? total then
          new_io = make_tempfile 'mechanize-raw'

          new_io.write body_io.string

          body_io = new_io
        end

        body_io.write(part)
        log.debug("Read #{part.length} bytes (#{total} total)") if log
      }
    rescue EOFError => e
      # terminating CRLF might be missing, let the user check the document
      raise unless response.chunked? and total.nonzero?

      body_io.rewind
      raise Mechanize::ChunkedTerminationError.new(e, response, body_io, uri,
                                                   @context)
    rescue Net::HTTP::Persistent::Error => e
      body_io.rewind
      raise Mechanize::ResponseReadError.new(e, response, body_io, uri,
                                             @context)
    end

    body_io.flush
    body_io.rewind

    raise Mechanize::ResponseCodeError.new(response, uri) if
      Net::HTTPUnknownResponse === response

    content_length = response.content_length

    unless Net::HTTP::Head === request or Net::HTTPRedirection === response then
      raise EOFError, "Content-Length (#{content_length}) does not match " \
                      "response body length (#{body_io.length})" if
        content_length and content_length != body_io.length
    end

    body_io
  end

  def response_redirect(response, method, page, redirects, headers,
                        referer = current_page)
    case @redirect_ok
    when true, :all
      # shortcut
    when false, nil
      return page
    when :permanent
      return page unless Net::HTTPMovedPermanently === response
    end

    log.info("follow redirect to: #{response['Location']}") if log

    raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
      redirects + 1 > @redirection_limit

    redirect_method = method == :head ? :head : :get

    # Make sure we are not copying over the POST headers from the original request
    ['Content-Length', 'Content-MD5', 'Content-Type'].each do |key|
      headers.delete key
    end

    @history.push(page, page.uri)
    new_uri = resolve response['Location'].to_s, page

    fetch new_uri, redirect_method, headers, [], referer, redirects + 1
  end

  # :section: Robots

  def get_robots(uri) # :nodoc:
    fetch(uri).body
  rescue Mechanize::ResponseCodeError => e
    return '' if e.response_code == '404'
    raise e
  end

  def robots= value
    require 'webrobots' if value
    @webrobots = nil if value != @robots
    @robots = value
  end

  ##
  # Tests if this agent is allowed to access +url+, consulting the site's
  # robots.txt.

  def robots_allowed? uri
    return true if uri.request_uri == '/robots.txt'

    webrobots.allowed? uri
  end

  # Opposite of robots_allowed?

  def robots_disallowed? url
    !robots_allowed? url
  end

  # Returns an error object if there is an error in fetching or parsing
  # robots.txt of the site +url+.
  def robots_error(url)
    webrobots.error(url)
  end

  # Raises the error if there is an error in fetching or parsing robots.txt of
  # the site +url+.
  def robots_error!(url)
    webrobots.error!(url)
  end

  # Removes robots.txt cache for the site +url+.
  def robots_reset(url)
    webrobots.reset(url)
  end

  def webrobots
    @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
  end

  # :section: SSL

  # Path to an OpenSSL CA certificate file
  def ca_file
    @http.ca_file
  end

  # Sets the path to an OpenSSL CA certificate file
  def ca_file= ca_file
    @http.ca_file = ca_file
  end

  # The SSL certificate store used for validating connections
  def cert_store
    @http.cert_store
  end

  # Sets the SSL certificate store used for validating connections
  def cert_store= cert_store
    @http.cert_store = cert_store
  end

  # The client X509 certificate
  def certificate
    @http.certificate
  end

  # Sets the client certificate to given X509 certificate.  If a path is given
  # the certificate will be loaded and set.
  def certificate= certificate
    certificate = if OpenSSL::X509::Certificate === certificate then
                    certificate
                  else
                    OpenSSL::X509::Certificate.new File.read certificate
                  end

    @http.certificate = certificate
  end

  # An OpenSSL private key or the path to a private key
  def private_key
    @http.private_key
  end

  # Sets the client's private key
  def private_key= private_key
    private_key = if OpenSSL::PKey::PKey === private_key then
                    private_key
                  else
                    OpenSSL::PKey::RSA.new File.read(private_key), @pass
                  end

    @http.private_key = private_key
  end

  # SSL version to use
  def ssl_version
    @http.ssl_version
  end if RUBY_VERSION > '1.9'

  # Sets the SSL version to use
  def ssl_version= ssl_version
    @http.ssl_version = ssl_version
  end if RUBY_VERSION > '1.9'

  # A callback for additional certificate verification.  See
  # OpenSSL::SSL::SSLContext#verify_callback
  #
  # The callback can be used for debugging or to ignore errors by always
  # returning +true+.  Specifying nil uses the default method that was valid
  # when the SSLContext was created
  def verify_callback
    @http.verify_callback
  end

  # Sets the certificate verify callback
  def verify_callback= verify_callback
    @http.verify_callback = verify_callback
  end

  # How to verify SSL connections.  Defaults to VERIFY_PEER
  def verify_mode
    @http.verify_mode
  end

  # Sets the mode for verifying SSL connections
  def verify_mode= verify_mode
    @http.verify_mode = verify_mode
  end

  # :section: Timeouts

  # Reset connections that have not been used in this many seconds
  def idle_timeout
    @http.idle_timeout
  end

  # Sets the connection idle timeout for persistent connections
  def idle_timeout= timeout
    @http.idle_timeout = timeout
  end

  # :section: Utility

  ##
  # Creates a new output IO by reading +input_io+ in +read_size+ chunks.  If
  # the output is over the max_file_buffer size a Tempfile with +name+ is
  # created.
  #
  # If a block is provided, each chunk of +input_io+ is yielded for further
  # processing.

  def auto_io name, read_size, input_io
    out_io = StringIO.new

    out_io.set_encoding Encoding::BINARY if out_io.respond_to? :set_encoding

    until input_io.eof? do
      if StringIO === out_io and use_tempfile? out_io.size then
        new_io = make_tempfile name
        new_io.write out_io.string
        out_io = new_io
      end

      chunk = input_io.read read_size
      chunk = yield chunk if block_given?

      out_io.write chunk
    end

    out_io.rewind

    out_io
  end

  def inflate compressed, window_bits = nil
    inflate = Zlib::Inflate.new window_bits

    out_io = auto_io 'mechanize-inflate', 1024, compressed do |chunk|
      inflate.inflate chunk
    end

    inflate.finish

    out_io
  ensure
    inflate.close
  end

  def log
    @context.log
  end

  ##
  # Sets the proxy address, port, user, and password +addr+ should be a host,
  # with no "http://", +port+ may be a port number, service name or port
  # number string.

  def set_proxy addr, port, user = nil, pass = nil
    unless addr and port then
      @http.proxy = nil

      return
    end

    unless Integer === port then
      begin
        port = Socket.getservbyname port
      rescue SocketError
        begin
          port = Integer port
        rescue ArgumentError
          raise ArgumentError, "invalid value for port: #{port.inspect}"
        end
      end
    end

    proxy_uri = URI "http://#{addr}"
    proxy_uri.port = port
    proxy_uri.user     = user if user
    proxy_uri.password = pass if pass

    @http.proxy = proxy_uri
  end

  def make_tempfile name
    io = Tempfile.new name
    io.unlink
    io.binmode if io.respond_to? :binmode
    io
  end

  def use_tempfile? size
    return false unless @max_file_buffer
    return false unless size

    size >= @max_file_buffer
  end

 end

 require 'mechanize/http/auth_store'