Last active
January 30, 2023 14:05
-
-
Save wesinator/97f97270c86c863b6eaf00ebae783ceb to your computer and use it in GitHub Desktop.
Remove line numbers from code snippets of old cnblogs.com posts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://web.archive.org/web/20150421052738/https://www.cnblogs.com/wz19860913/archive/2010/04/29/1723586.html | |
var spans = document.getElementsByTagName('span'); | |
for (var span of spans) { | |
style = span.attributes.style; | |
if (style && (style.textContent == "color: #008080" || style.textContent == "color: rgba(0, 128, 128, 1)")) | |
span.remove(); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
https://web.archive.org/web/20150421052738/https://www.cnblogs.com/wz19860913/archive/2010/04/29/1723586.html | |
那片土在蓝天上 | |
燃烧的翅膀 | |
[转]利用Winsock实现HTTP的GET请求 | |
网络爬虫需要从指定的URL通过HTTP协议来获得HTML文件信息,以此从一个URL爬到另一个URL。在Windows平台上,这往往通过WinINet接口实现。 | |
但是,如果对HTTP协议熟悉的话,也可以通过Winsock接口实现。代码如下。 | |
*/ | |
#pragma warning (disable:4996) | |
#define DEFAULT_URL "http://www.google.com" | |
BOOL WinsockStartup(BYTE highVer, BYTE lowVer) | |
{ | |
WSADATA wsaData; | |
return WSAStartup(MAKEWORD(highVer, lowVer), &wsaData) == 0; | |
} | |
int SendData(SOCKET s, char * data) | |
{ | |
return send(s, data, strlen(data), 0); | |
} | |
void ParseTheURL(char * pszURL, char * pszHostName) | |
{ | |
char * p, * pHostStart; | |
p = strstr(pszURL, "http://"); | |
if (p && p == pszURL) | |
{ | |
pHostStart = pszURL + 7; | |
} | |
else | |
{ | |
pHostStart = pszURL; | |
} | |
p = strchr(pHostStart, '/'); | |
if (p) | |
{ | |
memcpy(pszHostName, pHostStart, p - pHostStart); | |
} | |
else | |
{ | |
memcpy(pszHostName, pHostStart, strlen(pHostStart)); | |
} | |
} | |
int _tmain() | |
{ | |
int iRet = 0; | |
DWORD dwError = 0; | |
BOOL bOk = FALSE; | |
char szURL[256] = { 0 }; // 主机文件,即URL | |
char szHostName[256] = { 0 }; // 主机名 | |
char szPortName[] = "80"; // 端口号 | |
if (!WinsockStartup(2, 2)) | |
{ | |
_tcprintf(TEXT("初始化Windows Sockets失败!")); | |
cin.getline(szURL, 255); | |
return -1; | |
} | |
addrinfo aiHints = { 0 }; | |
addrinfo * aiList; | |
aiHints.ai_family = AF_INET; | |
aiHints.ai_socktype = SOCK_STREAM; | |
aiHints.ai_protocol = IPPROTO_TCP; | |
cout<<"输入URL:"; | |
cin.getline(szURL, 255); | |
if (strcmp(szURL, "") == 0) | |
{ | |
strcpy(szURL, DEFAULT_URL); | |
cout<<DEFAULT_URL<<endl; | |
} | |
ParseTheURL(szURL, szHostName); | |
if (getaddrinfo(szHostName, szPortName, NULL, &aiList) != 0) | |
{ | |
_tcprintf_s(TEXT("getaddrinfo失败:%d"), WSAGetLastError()); | |
WSACleanup(); | |
cin.getline(szURL, 255); | |
return -1; | |
} | |
SOCKET s; | |
for (addrinfo * aiPtr = aiList; aiPtr != NULL; aiPtr = aiPtr->ai_next) | |
{ | |
s = socket(aiList->ai_family, aiList->ai_socktype, aiList->ai_protocol); | |
if (s == INVALID_SOCKET) | |
{ | |
_tcprintf_s(TEXT("socket创建失败:%d"), WSAGetLastError()); | |
continue; | |
} | |
if (connect(s, aiPtr->ai_addr, aiPtr->ai_addrlen) == SOCKET_ERROR) | |
{ | |
closesocket(s); | |
s = INVALID_SOCKET; | |
_tcprintf_s(TEXT("connect失败:%d"), WSAGetLastError()); | |
continue; | |
} | |
break; | |
} | |
freeaddrinfo(aiList); | |
if (s == INVALID_SOCKET) | |
{ | |
WSACleanup(); | |
cin.getline(szURL, 255); | |
return -1; | |
} | |
char requestData[512] = { 0 }; | |
sprintf(requestData, "GET %s HTTP/1.1\r\n", szURL); | |
SendData(s, requestData); | |
//SendData(s, "GET / HTTP/1.1\r\n"); | |
sprintf(requestData, "Host:%s\r\n", szHostName); | |
SendData(s, requestData); | |
SendData(s, "Accept: */*\r\n"); | |
SendData(s, "User-Agent: Mozilla/4.0(compatible; MSIE 5.00; Windows NT)\r\n"); | |
SendData(s, "Connection:Close\r\n"); | |
//SendData(s, "Connection:Keep-Alive\r\n"); | |
SendData(s, "\r\n"); | |
SendData(s, "\r\n");//最后要加空行 | |
BOOL done = FALSE; | |
char buffer[1024] = { 0 }; | |
int l, chars = 0; | |
// 打印http响应的头部 | |
while (!done) | |
{ | |
l = recv(s, buffer, 1, 0); | |
if (l <= 0) | |
done = TRUE; | |
switch(*buffer) | |
{ | |
case '\r': | |
break; | |
case '\n': | |
if(chars == 0) | |
done = TRUE; | |
chars = 0; // 表示另起一行 | |
break; | |
default: | |
++chars; | |
break; | |
} | |
printf("%c",*buffer); | |
} | |
// 接收正文部分 | |
int sum = 0; | |
do | |
{ | |
l = recv(s, buffer, sizeof (buffer) - 1, 0); | |
if( l <= 0 ) | |
break; | |
sum += l; | |
*(buffer + l) = 0; | |
printf(buffer); | |
} while( l > 0 ); | |
//这里输出正文部分大小,发现其实和响应消息头部的Content-length大小是一样的 | |
//这样就可以检查是否接受完毕 | |
printf("\n\n大小 = %d字节\n",sum); | |
WSACleanup(); | |
cin.getline(szURL, 255); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment