Created
November 22, 2010 06:45
-
-
Save deoxxa/709617 to your computer and use it in GitHub Desktop.
SCARY STUFF
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function torrent_scrape($torrent_id) { | |
$times['start'] = microtime(1); | |
$times['last'] = microtime(1); | |
$torrents = torrent_get(get_str_build(array('extended' => '', 'torrent_id' => $torrent_id))); | |
$times['query'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
// Return if the torrent_id doesn't exist | |
if (!isset($torrents) || !is_array($torrents) || !count($torrents)) { return; } | |
$torrent = $torrents[0]; | |
// Only scrape torrents every 1800 seconds | |
if (($torrent['scrape_time']+1800) > time()) { return; } | |
api_ping('torrent_scrape', $torrent_id); | |
// This will be set if the user has an external application that runs off the ping API doing scrapes. | |
if (isset($config['no_scrape'])) { | |
return; | |
} | |
// The following code is inspired by the Weaboo language setting, because it's stupid and should never be used in production. | |
// Hopefully none of this will ever actually be needed. | |
// Set this to 1 if you want some verbose output for testing | |
$http_debug = 0; | |
if ($http_debug) { header('Content-Type: text/plain'); } | |
// Set up the scrape array | |
$scrape = array(); | |
$handles = array(); | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
// Create tracker entry | |
$scrape[$tracker_id] = array(); | |
// Store the ID for later | |
$scrape[$tracker_id]['id'] = $tracker_id; | |
// See -> http://wiki.theory.org/BitTorrentSpecification#Tracker_.27scrape.27_Convention | |
$scrape[$tracker_id]['url'] = sprintf('%s?info_hash=%s', str_replace('announce', 'scrape', $tracker['tracker']), urlencode($torrent['info_hash'])); | |
// Parse the tracker URL | |
$url = parse_url($scrape[$tracker_id]['url']); | |
// URL wa warui da yo | |
if (!is_array($url) || !count($url) || !isset($url['path'])) { | |
unset($scrape[$tracker_id]); | |
continue; | |
} | |
// Stupid parse_url doesn't provide defaults or even empty values | |
if (!isset($url['port'])) { $url['port'] = 80; } | |
if (!isset($url['query'])) { $url['query'] = ''; } | |
// Fix the stupid output from parse_url durr | |
$url['path'] = $url['path'].(strlen($url['query']) ? '?'.$url['query'] : ''); | |
unset($url['query']); | |
unset($url['scheme']); | |
$scrape[$tracker_id]['url'] = $url; | |
unset($url); | |
// Build the request | |
$scrape[$tracker_id]['req'] = sprintf( | |
"GET %s HTTP/1.1\r\nHost: %s:%s\r\nConnection: close\r\nUser-Agent: Jishaku/0.1\r\n\r\n", | |
$scrape[$tracker_id]['url']['path'], $scrape[$tracker_id]['url']['host'], $scrape[$tracker_id]['url']['port'] | |
); | |
$scrape[$tracker_id]['response'] = ''; | |
$scrape[$tracker_id]['headers'] = ''; | |
$scrape[$tracker_id]['body'] = ''; | |
$scrape[$tracker_id]['array'] = array(); | |
} | |
$times['setup'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
// Resolve DNS | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
if (!isset($scrape[$tracker_id])) { continue; } | |
if ($http_debug) { printf("init: getting ip for %s\n", $tracker['tracker']); flush(); } | |
$dns = dns_get_record($scrape[$tracker_id]['url']['host'], DNS_A); | |
if (is_array($dns) && count($dns)) { | |
if ($http_debug) { printf("init: got ip for %s (%s)\n", $tracker['tracker'], $dns[0]['ip']); flush(); } | |
$scrape[$tracker_id]['url']['ip'] = $dns[0]['ip']; | |
} else { | |
if ($http_debug) { printf("init: couldn't get ip for %s\n", $tracker['tracker']); flush(); } | |
unset($scrape[$tracker_id]); | |
} | |
} | |
$times['dns'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
// Open the sockets | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
if (!isset($scrape[$tracker_id])) { continue; } | |
if ($http_debug) { printf("init: opening socket for %s\n", $tracker['tracker']); flush(); } | |
$scrape[$tracker_id]['sent'] = 0; | |
$scrape[$tracker_id]['read'] = 0; | |
$scrape[$tracker_id]['error'] = 0; | |
$scrape[$tracker_id]['fh'] = @stream_socket_client(sprintf("tcp://%s:%s", $scrape[$tracker_id]['url']['ip'], $scrape[$tracker_id]['url']['port']), $errno, $errstr, 0, STREAM_CLIENT_ASYNC_CONNECT); | |
$handles[(int)$scrape[$tracker_id]['fh']] = $tracker_id; | |
} | |
$times['open-sockets'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
$http_timeout = 15; | |
$http_start = time(); | |
// Read stuff for $http_timeout seconds | |
while (time() < ($http_start + $http_timeout)) { | |
list($read, $write, $error) = array(array(), array(), array()); | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
if (!isset($scrape[$tracker_id])) { continue; } | |
if (!$scrape[$tracker_id]['sent']) { | |
if (@ftell($scrape[$tracker_id]['fh']) !== false) { $write[] = $scrape[$tracker_id]['fh']; } | |
} elseif (!$scrape[$tracker_id]['read']) { | |
if (@ftell($scrape[$tracker_id]['fh']) !== false) { $read[] = $scrape[$tracker_id]['fh']; } | |
} | |
// TODO: This might be bad? | |
// BUG:0000039 | |
if (@ftell($scrape[$tracker_id]['fh']) !== false) { $error[] = $scrape[$tracker_id]['fh']; } | |
} | |
if (!count($read) && !count($write)) { break; } | |
stream_select($read, $write, $error, 1); | |
if ($http_debug) { | |
printf("http: %s sockets ready for reading\n", count($read)); | |
printf("http: %s sockets ready for writing\n", count($write)); | |
printf("http: %s sockets with errors\n", count($error)); | |
flush(); | |
} | |
foreach ($error as $fh) { | |
if (!isset($handles[(int)$fh])) { continue; } | |
$tracker_id = $handles[(int)$fh]; | |
$tracker = $torrent['trackers'][$tracker_id]; | |
if ($http_debug) { printf("http: error on %s\n", $tracker['tracker']); flush(); } | |
fclose($fh); | |
//unset($scrape[$tracker_id]); | |
//unset($handles[$fh]); | |
} | |
foreach ($read as $fh) { | |
if (!isset($handles[(int)$fh])) { continue; } | |
$tracker_id = $handles[(int)$fh]; | |
$tracker = $torrent['trackers'][$tracker_id]; | |
if ($http_debug) { printf("http: activity on %s\n", $tracker['tracker']); flush(); } | |
if (($buf = @fread($scrape[$tracker_id]['fh'], 1024)) != false) { | |
if ($http_debug) { printf("http: read %s bytes from %s\n", strlen($buf), $tracker['tracker']); flush(); } | |
$scrape[$tracker_id]['response'] .= $buf; | |
} else { | |
if ($http_debug) { printf("http: closed socket for %s\n", $tracker['tracker']); flush(); } | |
@fclose($scrape[$tracker_id]['fh']); | |
$scrape[$tracker_id]['read'] = 1; | |
} | |
} | |
foreach ($write as $fh) { | |
if (!isset($handles[(int)$fh])) { continue; } | |
$tracker_id = $handles[(int)$fh]; | |
$tracker = $torrent['trackers'][$tracker_id]; | |
if ($http_debug) { printf("http: ready to write on %s\n", $tracker['tracker']); flush(); } | |
if (!$scrape[$tracker_id]['sent']) { | |
if (@fwrite($scrape[$tracker_id]['fh'], $scrape[$tracker_id]['req']) === false) { | |
unset($scrape[$tracker_id]); | |
unset($handles[(int)$fh]); | |
continue; | |
} | |
$scrape[$tracker_id]['sent'] = 1; | |
} | |
} | |
} | |
$times['http'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
unset($handles); | |
// Split the headers/body of the HTTP response | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
// Only try to parse stuff that exists still | |
if (isset($scrape[$tracker_id]) && isset($scrape[$tracker_id]['response']) && strlen($scrape[$tracker_id]['response']) && preg_match('/\r\n\r\n/', $scrape[$tracker_id]['response'])) { | |
list($scrape[$tracker_id]['headers'], $scrape[$tracker_id]['body']) = explode("\r\n\r\n", $scrape[$tracker_id]['response'], 2); | |
} elseif (isset($scrape[$tracker_id])) { | |
unset($scrape[$tracker_id]); | |
} | |
} | |
$times['split'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
// Parse stuff | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
// Only try to parse stuff that exists still (kyon-kun, denwa) | |
if (isset($scrape[$tracker_id]) && isset($scrape[$tracker_id]['body']) && strlen($scrape[$tracker_id]['body'])) { | |
$scrape[$tracker_id]['array'] = torrent_parse($scrape[$tracker_id]['body']); | |
} | |
// If it didn't parse properly, discard it | |
if (!isset($scrape[$tracker_id]['array']) || !is_array($scrape[$tracker_id]['array']) || !count($scrape[$tracker_id]['array'])) { | |
if ($http_debug) { printf("parse: couldn't parse data for %s\n", $tracker['tracker']); flush(); } | |
unset($scrape[$tracker_id]); | |
} else { | |
if ($http_debug) { | |
printf("parse: parsed data for %s\n", $tracker['tracker']); flush(); | |
$scrape[$tracker_id]['response'] = '<removed>'; | |
} | |
} | |
} | |
$times['parse'] = microtime(1) - $times['last']; | |
$times['last'] = microtime(1); | |
$times['total'] = microtime(1) - $times['start']; | |
unset($times['last']); | |
unset($times['start']); | |
if ($http_debug) { | |
var_dump($scrape); | |
var_dump($times); | |
exit(); | |
} | |
foreach ($torrent['trackers'] as $tracker_id => $tracker) { | |
if (!isset($scrape[$tracker_id])) { continue; } | |
$stats = array_shift($scrape[$tracker_id]['array']['files']); | |
$torrents_trackers_update_query = sprintf( | |
"UPDATE `torrents_trackers` SET `scrape_time` = %d, `seeds` = %d, `leechers` = %d, `completed` = %d WHERE `torrent_id` = %d AND `tracker_id` = %d LIMIT 1;", | |
time(), $stats['complete'], $stats['incomplete'], $stats['downloaded'], $torrent['torrent_id'], $tracker_id | |
); | |
$torrents_update_query = sprintf( | |
"UPDATE `torrents` SET `scrape_time` = %d, `seeds` = (SELECT MAX(`seeds`) FROM `torrents_trackers` WHERE `torrent_id` = %d LIMIT 1), `leechers` = (SELECT MAX(`leechers`) FROM `torrents_trackers` WHERE `torrent_id` = %d LIMIT 1), `completed` = (SELECT MAX(`completed`) FROM `torrents_trackers` WHERE `torrent_id` = %d LIMIT 1) WHERE `torrent_id` = %d LIMIT 1;", | |
time(), $torrent_id, $torrent_id, $torrent_id, $torrent_id | |
); | |
mysql_query($torrents_trackers_update_query); | |
mysql_query($torrents_update_query); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment