Created
November 16, 2009 17:07
-
-
Save Maffsie/236140 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'net/http' | |
require 'sqlite3' | |
require 'hpricot' | |
require 'open-uri' | |
category_ids = [100, 200, 300, 400, 600] | |
domain = 'thepiratebay.org' | |
url = '/browse/%s/%s/3' | |
db = SQLite3::Database.new('torrents.sqlite') | |
db.execute('CREATE TABLE IF NOT EXISTS `torrents` (`url` TEXT NOT NULL,`filename` TEXT NOT NULL,`description` TEXT NULL)') | |
category_ids.each do |category| | |
page_num = 0 | |
last_page = false | |
while !last_page | |
Net::HTTP.start(domain) { |http| | |
resp = http.get(url % [category, page_num]) | |
$page = resp.body | |
} | |
if /Forbidden.$/.match($page) | |
last_page = true | |
end | |
# Hpricot shit | |
# Parse torrent page | |
parsepage = Hpricot.parse($page) | |
torrent_num = 0 | |
# For each torrent on the page, download it's webpage, output the torrent count and title | |
# Then get the torrent description and download link. | |
(parsepage/"//a[@class=\"detLink\"]").each do |parseTor| | |
torrent_num = torrent_num + 1 | |
torrent = parseTor.attributes["href"].to_s | |
# puts "Torrent number #{torrent_num} - #{parseTor.inner_html.to_s}" | |
$URL = domain + torrent | |
Net::HTTP.start(domain) { |http| | |
resp = http.get(torrent) | |
$torrent = resp.body | |
} | |
parseTor = Hpricot.parse($torrent) | |
$torrent = '' | |
(parseTor/"//div[@class=\"nfo\"]").each do |torDesc| | |
$desc = torDesc.inner_html.to_s | |
end | |
downTor = parseTor.search("//div[@class=\"download\"]") | |
$download = downTor.at('a')['href'].to_s | |
# puts "Download link: #{downTor.at('a')['href'].to_s}" | |
# Using a ruby-wget code snippet from Dzone - http://snippets.dzone.com/posts/show/4656 | |
begin | |
torrentData = open($download, | |
'User-Agent' => 'Ruby-Wget').read | |
rescue URI::InvalidURIError | |
# puts "Could not download torrent file." | |
else | |
# puts "Torrent downloaded with filename #{$torFile}" | |
end | |
$torFile = $download.gsub(/http:\/\/torrents.thepiratebay.org\/[0-9]{1,}\//,'') | |
filename = "torrents/#{$torFile}" | |
File.open(filename, 'w') {|f| f.write(torrentData) } | |
db.execute('INSERT INTO `torrents` (url, filename, description) VALUES (?, ?, ?)', $download, $torFile, $desc) | |
end | |
page_num = page_num + 1 | |
page_num_real = page_num + 1 | |
# puts "Moving to page number #{page_num_real}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment