aninder · February 2, 2017 02:27
diff --git a/block_news_websites b/block_news_websites
 require 'mechanize'
 @all_links = []
 m=Mechanize.new
 p=m.get("http://www.alexa.com/topsites/category/News")
 while true
  puts "current page url #{p.uri}"
  links=p.css('li.site-listing .desc-paragraph a').map { |l| l.text }
  if(links.size ==0)
    puts "no links found on current page #{p.uri}"
    break
  end
  @all_links += links
  current_page=p.css('.pagination-current').text
  if current_page == ""
    puts "wot page is this!!"
    break
  end
  next_page=current_page.to_i+1
  puts "finding pagination link number #{next_page}"
  next_link=p.css('.pagination-page').find { |e| e.text.to_i == next_page }
  puts "next link ===> #{next_link}"
  if (next_link)
    puts "next link found"
    puts next_link.attr('href');
    sleep 1;
  else
    break
  end
  next_page = p.uri.merge next_link.attr('href')
  puts "going to #{next_page}"
  p=m.get(next_page)
  sleep 1
 end
 puts @all_links

 @links=@all_links.map {|url| url = "http://#{url}" unless url=~/^http[s]/i; URI.parse(url).host};
 @[email protected]{|l| if l=~/^www/i ; l[4..-1] ; else l; end}
 @final_links.uniq.each{|l| puts "0.0.0.0 #{l}"; puts "0.0.0.0 www.#{l}"};
	require 'mechanize'
	@all_links = []
	m=Mechanize.new
	p=m.get("http://www.alexa.com/topsites/category/News")
	while true
	puts "current page url #{p.uri}"
	links=p.css('li.site-listing .desc-paragraph a').map { \|l\| l.text }
	if(links.size ==0)
	puts "no links found on current page #{p.uri}"
	break
	end
	@all_links += links
	current_page=p.css('.pagination-current').text
	if current_page == ""
	puts "wot page is this!!"
	break
	end
	next_page=current_page.to_i+1
	puts "finding pagination link number #{next_page}"
	next_link=p.css('.pagination-page').find { \|e\| e.text.to_i == next_page }
	puts "next link ===> #{next_link}"
	if (next_link)
	puts "next link found"
	puts next_link.attr('href');
	sleep 1;
	else
	break
	end
	next_page = p.uri.merge next_link.attr('href')
	puts "going to #{next_page}"
	p=m.get(next_page)
	sleep 1
	end
	puts @all_links

	@links=@all_links.map {\|url\| url = "http://#{url}" unless url=~/^http[s]/i; URI.parse(url).host};
	@[email protected]{\|l\| if l=~/^www/i ; l[4..-1] ; else l; end}
	@final_links.uniq.each{\|l\| puts "0.0.0.0 #{l}"; puts "0.0.0.0 www.#{l}"};
No results found