corona6 · July 7, 2015 05:53
diff --git a/app_net_scraping.rb b/app_net_scraping.rb
 require 'capybara'
 require 'capybara/poltergeist'

 # create session
 Capybara.javascript_driver = :poltergeist
 options = { js_errors: false, timeout: 180, phantomjs_logger: StringIO.new, logger: nil, phantomjs_options: ['--load-images=no', '--ignore-ssl-errors=yes'] }
 Capybara.register_driver(:poltergeist) do |app|
  Capybara::Poltergeist::Driver.new app, options
 end
 session = Capybara::Session.new(:poltergeist)

 # access to alpha.app.net trending
 url = "https://alpha.app.net/browse/trending/"
 session.visit(url)

 # login(replace USERNAME and PASSWORD)
 session.fill_in "id_username", with:"USERNAME"
 session.fill_in "id_password", with:"PASSWORD"
 session.find(".btn.btn-primary").click

 # scroll down * 3
 3.times {
  sleep 2
  session.driver.scroll_to(0, 10000)
 }

 # extract content
 session.all(".subpixel.h-entry.post-container").each do |post|
  puts post.find(".post-content.e-content").text
  puts "https://alpha.app.net" + post.find(".u-url.timestamp")[:href]
  puts
  puts
 end
	require 'capybara'
	require 'capybara/poltergeist'

	# create session
	Capybara.javascript_driver = :poltergeist
	options = { js_errors: false, timeout: 180, phantomjs_logger: StringIO.new, logger: nil, phantomjs_options: ['--load-images=no', '--ignore-ssl-errors=yes'] }
	Capybara.register_driver(:poltergeist) do \|app\|
	Capybara::Poltergeist::Driver.new app, options
	end
	session = Capybara::Session.new(:poltergeist)

	# access to alpha.app.net trending
	url = "https://alpha.app.net/browse/trending/"
	session.visit(url)

	# login(replace USERNAME and PASSWORD)
	session.fill_in "id_username", with:"USERNAME"
	session.fill_in "id_password", with:"PASSWORD"
	session.find(".btn.btn-primary").click

	# scroll down * 3
	3.times {
	sleep 2
	session.driver.scroll_to(0, 10000)
	}

	# extract content
	session.all(".subpixel.h-entry.post-container").each do \|post\|
	puts post.find(".post-content.e-content").text
	puts "https://alpha.app.net" + post.find(".u-url.timestamp")[:href]
	puts
	puts
	end
No results found