Skip to content

Instantly share code, notes, and snippets.

@mtrolle
Last active December 3, 2018 02:51
Show Gist options
  • Save mtrolle/96f55822122ecabd3cc46190a6dc18a5 to your computer and use it in GitHub Desktop.
Save mtrolle/96f55822122ecabd3cc46190a6dc18a5 to your computer and use it in GitHub Desktop.
High memory consumption, question asked on StackOverflow: https://stackoverflow.com/questions/53169751/ruby-memory-usage-gone-wild
require 'bundler/inline'
require 'json'
gemfile do
source 'https://rubygems.org'
gem 'rest-client'
gem 'nokogiri'
gem 'memory_profiler'
end
xml_base_url = 'http://noneed.dk/test.xml?date='
auth_end_point = 'http://noneed.dk/session.php'
auth_payload = {"user" => "[email protected]", "password" => "secret"}
image_search_base_url = 'http://noneed.dk/images.php?id='
image_base_url = 'http://noneed.dk/'
logger = Logger.new(STDOUT)
download_size = 0
time_start = Time.now
requests = 0
report = MemoryProfiler.report do
# Authenticates
response = RestClient::Request.execute({
method: :post,
url: auth_end_point,
payload: auth_payload,
})
cookie_jar = response.cookie_jar
logger.debug "Got a cookiejar - #{cookie_jar.cookies}"
download_size += response.body.bytesize
requests += 1
# Run X days of data
1.times do |n|
date = (Time.now + n * 86400).strftime("%F")
logger.info "Downloads data for #{date}"
response = RestClient.get(xml_base_url + date)
download_size += response.body.bytesize
requests += 1
logger.debug "Download completed with code #{response.code} with a total size of #{response.body.bytesize/1024}Kb."
# Parse downloaded XML and process data items
xml = Nokogiri::XML(response.body)
items = xml.xpath("//noneed//item")
logger.debug "-- found #{items.count} records in the file"
items.each do |item|
id = item.at("id").content
unless id.nil?
logger.info "Find images for (id:#{id})"
request = {
method: :post,
url: image_search_base_url + id,
cookies: cookie_jar
}
images = RestClient::Request.execute(request)
download_size += images.body.bytesize
requests += 1
logger.debug "Download completed with code #{images.code} with a total size of #{images.body.bytesize}."
image_json = JSON.parse(images.body)
image_json['ids'].each do |img_id|
logger.debug "-- downloading image #{img_id}"
request = {
method: :post,
url: image_base_url + img_id.to_s,
cookies: cookie_jar
}
image = RestClient::Request.execute(request)
download_size += image.body.bytesize
requests += 1
logger.debug "---- download status: #{image.code} with a total size of #{image.body.bytesize/1024.0/1024.0}Mb."
end
end
end
end
logger.info "Total download size: #{(download_size / 1024.0 / 1024.0).round(2)}Mb in #{Time.now - time_start} seconds through #{requests} requests."
end
report.pretty_print
logger.info "Completed!"
@mtrolle
Copy link
Author

mtrolle commented Nov 19, 2018

Setting ENV variable RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR to 1.3 halves the memory usage.
https://samsaffron.com/archive/2014/04/08/ruby-2-1-garbage-collection-ready-for-production

@wasifhossain
Copy link

wasifhossain commented Dec 1, 2018

https://stackoverflow.com/a/53574813/1977104

Using HTTP.rb results in:
Total download size: 96.92Mb through 118 unique requests.
Memory consumption:

Total allocated: 7107283 bytes (83437 objects)
Total retained:  44221 bytes (385 objects)

Code:

require 'bundler/inline'
require 'json'
require 'logger'

gemfile do
  source 'https://rubygems.org'
  ruby '2.5.3'
  gem 'http', '~> 4.0.0'
  gem 'nokogiri', '~> 1.8.3'
  gem 'memory_profiler', '~> 0.9.11'
end

xml_base_url = 'http://noneed.dk/test.xml?date='
auth_end_point = 'http://noneed.dk/session.php'
auth_payload = {"user" => "[email protected]", "password" => "secret"}
image_search_base_url = 'http://noneed.dk/images.php?id='
image_base_url = 'http://noneed.dk/'


logger = Logger.new(STDOUT)
download_size = 0
time_start = Time.now
requests = 0

report = MemoryProfiler.report do
  # Authenticates
  response = HTTP.post(auth_end_point, form: auth_payload)
  cookie_jar = response.cookies
  logger.debug "Got a cookiejar - #{cookie_jar.cookies}"
  download_size += response.content_length
  requests += 1

  # Run X days of data
  1.times do |n|
    date = (Time.now + n * 86400).strftime("%F")
    logger.info "Downloads data for #{date}"

    response = HTTP.get(xml_base_url + date)
    download_size += response.content_length
    requests += 1
    logger.debug "Download completed with code #{response.code} with a total size of #{response.content_length/1024}Kb."

    # Parse downloaded XML and process data items
    xml = Nokogiri::XML(response.body)
    items = xml.xpath("//noneed//item")
    logger.debug "-- found #{items.count} records in the file"

    items.each do |item|
      id = item.at("id").content
      unless id.nil?
        logger.info "Find images for (id:#{id})"

        images = HTTP.cookies(cookie_jar).post(image_search_base_url + id)
        download_size += images.content_length
        requests += 1
        logger.debug "Download completed with code #{images.code} with a total size of #{images.content_length}."

        image_json = JSON.parse(images.body)
        image_json['ids'].each do |img_id|
          logger.debug "-- downloading image #{img_id}"

          image = HTTP.cookies(cookie_jar).post(image_base_url + img_id.to_s)
          download_size += image.content_length
          requests += 1
          logger.debug "---- download status: #{image.code} with a total size of #{image.content_length/1024.0/1024.0}Mb."
        end
      end
    end
  end

  logger.info "Total download size: #{(download_size / 1024.0 / 1024.0).round(2)}Mb in #{Time.now - time_start} seconds through #{requests} requests."
end

report.pretty_print

logger.info "Completed!"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment