Created
September 28, 2013 07:44
-
-
Save xyqfer/6739617 to your computer and use it in GitHub Desktop.
使用Sockets获取网页内容
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Returns the contents of a web page | |
| * | |
| * Opens a remote web document, and returns the contents. This method | |
| * uses sockets to get the document. Returns the contents of the document with | |
| * the headers being stored in the $headers variable. | |
| * This method can handle 301 and 302 redirects, but will return false | |
| * if anything other than those status codes, or a 200, is returned by | |
| * the web server. | |
| * | |
| * @param string $url The URL to the web page | |
| * @param string $headers The HTTP headers will be stored in this variable | |
| * @param int $port Optional port number to use for grabbing the document | |
| * @param int $timeout The number of seconds to try before giving up | |
| * @return mixed | |
| */ | |
| function get_http_document($url, &$headers = null, $port = 80, $timeout = 8) { | |
| $pURL = parse_url($url); | |
| if (empty($pURL['host'])) { | |
| return false; | |
| } | |
| $remotePath = (isset($pURL['path'])) ? $pURL['path'] : '/'; | |
| $remoteDocument = (empty($pURL['query'])) ? $remotePath : $remotePath . '?' . $pURL['query']; | |
| if (!$fp = fsockopen($pURL['host'], $port, $errno, $errstr, $timeout)) { | |
| return false; | |
| } | |
| $out = "GET $remoteDocument HTTP/1.0\r\n"; | |
| $out .= "Host: {$pURL['host']}\r\n"; | |
| $out .= "Connection: Close\r\n\r\n"; | |
| fwrite($fp, $out); | |
| unset($out); | |
| $received = ''; | |
| while (!feof($fp)) { | |
| $received .= fread($fp, 128); | |
| } | |
| fclose($fp); | |
| // Seperate the headers from the content | |
| $parts = explode("\r\n\r\n", $received, 2); | |
| $headers = $parts[0]; | |
| $content = $parts[1]; | |
| unset($parts); | |
| $headerParts = explode("\r\n", $headers); | |
| if (!preg_match('~HTTP/1\.\d ([\d]+)~i', $headerParts[0], $matches)) { | |
| return false; | |
| } | |
| $statusCode = $matches[1]; | |
| if ($statusCode == 200) { | |
| return $content; | |
| } else if ($statusCode != 301 && $statusCode != 302) { | |
| return false; | |
| } | |
| if (!preg_match('~^Location:(.*)$~im', $headers, $matches)) { | |
| return false; | |
| } | |
| $newLocation = trim($matches[1]); | |
| return get_http_document($newLocation, $headers, $port, $timeout); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment