Skip to content

Instantly share code, notes, and snippets.

@itolosa
Created February 24, 2015 22:30
Show Gist options
  • Save itolosa/489742ff19054fce6beb to your computer and use it in GitHub Desktop.
Save itolosa/489742ff19054fce6beb to your computer and use it in GitHub Desktop.
Mulideal download link fetcher
require 'selenium-webdriver'
require 'mechanize'
class LinkCrawler
def initialize
@driver = nil
end
def start
@driver = Selenium::WebDriver.for :firefox
end
def webdriver
@driver ||= start
end
def quit
@driver.quit
end
def getlink(spam_url)
webdriver.get(spam_url)
url = nil
10.times do
sleep 1
begin
webdriver.find_element(id: 'skiplink').click
url = webdriver.find_element(id: 'skiplink').attribute('href')
rescue
unless webdriver.current_url == spam_url
return webdriver.current_url
end
webdriver.get(spam_url)
end
if url and not url.end_with?('#')
break
end
end
url
end
end
class Scraper
def initialize
@mech_agent = Mechanize.new do |agent|
agent.user_agent_alias = 'Mac Safari'
end
end
def getlinks(siteurl)
site = @mech_agent.get(siteurl)
content_div = site.search('.post-body.entry-content')
possible_pass = content_div.search('//span').collect { |n|
n.text
}.select { |s|
s.downcase.include?("pass")
}
spam_download_urls = content_div.search('a').collect { |x|
x.text
}.select{ |x|
x.downcase.include?("http")
}
possible_pass_txt = possible_pass.join(' or ')
crawler = LinkCrawler.new
crawler.start
download_links = Hash.new
spam_download_urls.each do |spam_url|
download_links[spam_url] = crawler.getlink(spam_url)
end
crawler.quit
download_links
end
def reload
@mech_agent = Mechanize.new do |agent|
agent.user_agent_alias = 'Mac Safari'
end
end
end
s = Scraper.new
#display download links
p s.getlinks('http://mulideal320.blogspot.com.es/2014/04/dd-discografia-oasis-320-kbps-mega.html')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment