Created
March 23, 2014 10:37
-
-
Save bzikarsky/9721378 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?hh | |
async function stream_check(resource $stream, string $mode, int $usec): Awaitable<void> | |
{ | |
$r = $w = $e = null; | |
do { | |
if ($mode == "r") { | |
$r = Vector{$stream}; | |
} | |
if ($mode == "w") { | |
$w = Vector{$stream}; | |
} | |
// on stream activity - return | |
if (0 != stream_select($r, $w, $e, 0, 0)) { | |
return; | |
} | |
// wait given amount of usecs before next activity check | |
await SleepWaitHandle::create($usec); | |
} while (true); | |
} | |
async function fetch_url(string $url): Awaitable<?string> | |
{ | |
// init stream | |
$stream = stream_socket_client( | |
"tcp://" . parse_url($url)['host'] . ':80', | |
$errno = null, | |
$errstr = null, | |
$timeout = 30, | |
/* ASYNC support is missing as of 2014-03-22 */ | |
STREAM_CLIENT_CONNECT | STREAM_CLIENT_ASYNC_CONNECT | |
); | |
stream_set_blocking($stream, 0); | |
// Since the connect is blocking anyway, we don't need to wait | |
// for the stream to become available | |
// | |
// await stream_check($stream, "write", 100); | |
fwrite(STDERR, "CON $url\n"); | |
// send request | |
fwrite($stream, "GET / HTTP/1.0\r\n\r\n"); | |
fwrite(STDERR, "REQ $url\n"); | |
$response = ""; | |
while (true) { | |
await stream_check($stream, "r", 20000); | |
$response .= fread($stream, 2048); | |
if (feof($stream)) { | |
fwrite(STDERR, "RSP $url\n"); | |
fclose($stream); | |
return $response; | |
} | |
} | |
} | |
function fetch_urls_async(Set<string> $urls): Map<string, string> | |
{ | |
$crawler = Map{}; | |
foreach ($urls as $url) { | |
$crawler[$url] = fetch_url($url); | |
} | |
return GenMapWaitHandle::create($crawler)->join(); | |
} | |
function fetch_urls_sync(Set<string> $urls): Map<string, string> | |
{ | |
$result = Map{}; | |
foreach ($urls as $url) { | |
$stream = stream_socket_client("tcp://" . parse_url($url)['host'] . ":80"); | |
fwrite(STDERR, "CON $url\n"); | |
fwrite($stream, "GET / HTTP/1.0\r\n\r\n"); | |
fwrite(STDERR, "REQ $url\n"); | |
$result[$url] = ""; | |
while (!feof($stream)) { | |
$result[$url] .= fread($stream, 8196); | |
} | |
fwrite(STDERR, "RES $url\n"); | |
fclose($stream); | |
} | |
return $result; | |
} | |
function main(array<string> $argv): void | |
{ | |
$urls = Set{ | |
"http://google.com", | |
"http://github.com", | |
"http://php.net", | |
"http://facebook.com", | |
"http://hhvm.com", | |
"http://reddit.com", | |
"http://wikipedia.com", | |
"http://example.org", | |
"http://www.iana.org", | |
"http://netflix.com", | |
"http://bing.com" | |
}; | |
$async = !isset($argv[1]) || $argv[1] == "async"; | |
$pages = $async ? fetch_urls_async($urls) : fetch_urls_sync($urls); | |
$result = $pages->map('strlen'); | |
echo "Results - " . ($async ? "async" : "sync") . ":\n"; | |
$result->mapWithKey(function(string $url, int $size) { | |
echo " - $url: $size\n"; | |
}); | |
} | |
main($argv); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Somehow Github didn't notfiy me about those comments. When I benchmarked this test-script I came to the conclusion that the blocking TCP/IP connect is the "biggest" performance-concern.