p5k6 · August 29, 2015 13:57
diff --git a/scraper.rb b/scraper.rb
 require 'wombat'

 Pry.config.pager = false

 set1 = Wombat.crawl do base_url "http://www.nhl.com"; path '/ice/news.htm?id=675589';
  to_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[3]//img/@src', :list
  to_players 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[2]', :list
  from_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[5]//img/@src', :list
 end


 set2 = Wombat.crawl do base_url "http://www.nhl.com"; path '/ice/news.htm?id=675589';
  from_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[3]//img/@src', :list
  to_players 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[6]', :list
  to_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[5]//img/@src', :list
 end

 #### the regex:  \/[a-z]+_(logo|dark)

 trades1 = []
 trades2 = []

 set1['to_team'].each { |t| trades1 << { "to_team" => t[/\/([a-z]+)_(logo|dark)/, 1] } }

 set1['from_team'].each_with_index { |t,i| trades1[i]['from_team'] = t[/\/([a-z]+)_(logo|dark)/, 1]  }

 set1['to_players'].each_with_index { |t,i| trades1[i]["players"] = t }

 set2['to_team'].each { |t| trades2 << { "to_team" => t[/\/([a-z]+)_(logo|dark)/, 1] } }

 set2['from_team'].each_with_index { |t,i| trades2[i]["from_team"] = t[/\/([a-z]+)_(logo|dark)/, 1] }

 set2['to_players'].each_with_index { |t,i| trades2[i]["players"] = t }

 trades = trades1 + trades2

 ## remember this: Pry.config.pager = false

 my_trades = trades.select { |t| t['players'].split("\r\n").length == 1 }

 tmp_ar = []
 trades.each { |t| 
  tmp = t['players'].split("\r\n")
  if tmp.length == 1
    nil
  else 
    tmp.each { |t2| 
      tmp_ar << { 'to_team' => t['to_team'], 'from_team' => t['from_team'], 'players' => t2.strip }
    }
  end
 }

 all_trades_one_way = my_trades.concat(tmp_ar).map { |t| { 'from_team' => t['from_team'], 'to_team' => t['to_team'], 'player' => t['players'].split(",").first[/(^[FGD] |^)(.*)/, 2] } }

 ## scrape capgeek


 salaries_raw = []
 for n in 10..3325
  salaries_raw << Wombat.crawl do base_url "http://www.capgeek.com"; path "/player/#{n}";
    player 'xpath=//title', :list
    salary 'xpath=//tr[@class="odd" and td = "2013-14"]', :list
  end.merge("id" => n)
  sleep 10
 end

 Pry.config.pager = true
	require 'wombat'

	Pry.config.pager = false

	set1 = Wombat.crawl do base_url "http://www.nhl.com"; path '/ice/news.htm?id=675589';
	to_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[3]//img/@src', :list
	to_players 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[2]', :list
	from_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[5]//img/@src', :list
	end


	set2 = Wombat.crawl do base_url "http://www.nhl.com"; path '/ice/news.htm?id=675589';
	from_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[3]//img/@src', :list
	to_players 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[6]', :list
	to_team 'xpath=//*[@id="cmstable_7607"]/tbody[1]/tr/td[5]//img/@src', :list
	end

	#### the regex: \/[a-z]+_(logo\|dark)

	trades1 = []
	trades2 = []

	set1['to_team'].each { \|t\| trades1 << { "to_team" => t[/\/([a-z]+)_(logo\|dark)/, 1] } }

	set1['from_team'].each_with_index { \|t,i\| trades1[i]['from_team'] = t[/\/([a-z]+)_(logo\|dark)/, 1] }

	set1['to_players'].each_with_index { \|t,i\| trades1[i]["players"] = t }

	set2['to_team'].each { \|t\| trades2 << { "to_team" => t[/\/([a-z]+)_(logo\|dark)/, 1] } }

	set2['from_team'].each_with_index { \|t,i\| trades2[i]["from_team"] = t[/\/([a-z]+)_(logo\|dark)/, 1] }

	set2['to_players'].each_with_index { \|t,i\| trades2[i]["players"] = t }

	trades = trades1 + trades2

	## remember this: Pry.config.pager = false

	my_trades = trades.select { \|t\| t['players'].split("\r\n").length == 1 }

	tmp_ar = []
	trades.each { \|t\|
	tmp = t['players'].split("\r\n")
	if tmp.length == 1
	nil
	else
	tmp.each { \|t2\|
	tmp_ar << { 'to_team' => t['to_team'], 'from_team' => t['from_team'], 'players' => t2.strip }
	}
	end
	}

	all_trades_one_way = my_trades.concat(tmp_ar).map { \|t\| { 'from_team' => t['from_team'], 'to_team' => t['to_team'], 'player' => t['players'].split(",").first[/(^[FGD] \|^)(.*)/, 2] } }

	## scrape capgeek


	salaries_raw = []
	for n in 10..3325
	salaries_raw << Wombat.crawl do base_url "http://www.capgeek.com"; path "/player/#{n}";
	player 'xpath=//title', :list
	salary 'xpath=//tr[@class="odd" and td = "2013-14"]', :list
	end.merge("id" => n)
	sleep 10
	end

	Pry.config.pager = true
No results found