类似天猫商品的网址以GET方法获取的是301。
(1)断开程序,截取其中的Location继续GET仍然是301,多次运行,多次截取上一次返回的Location最终会返回200 OK
(2)若采用自动截取,循环方法GET无法得到200 OK。
采用截取上次返回的location获取重定向后的网址思路对吗?为什么采用循环GET无法得到200OK?源码附上,谢谢指点。
#include<string.h>
#include<sys/types.h>
#include<sys/socket.h>
#include<netdb.h>
#include<netinet/in.h>
#include<stdlib.h>
#include<unistd.h>
#include<fcntl.h>
#include<sys/stat.h>
#define RECVBUF 102400
/*思想:Ubuntu 11.04
*GET方法返回http响应消息和html源码;
*根据协议,响应消息最后一行为空行标志响应信息结束
*寻找两相邻换行符间字符个数<=2表示找到该标志
*提取响应消息,截取Location
*/
char *find_linebreak(char *pstart)
{
char *p = pstart;
for(*p; *p != '\n'; p++);
return p;
}
void geturl(char *url)
{
int sockfd;
struct sockaddr_in addr;
struct hostent *pURL;
char myurl[BUFSIZ];
char *pHost = 0, *pGET = 0;
char host[BUFSIZ], GET[BUFSIZ];
char header[BUFSIZ] = "";
char text[RECVBUF];
char text_tmp[RECVBUF];
int i ;
int redirect_num;
char *linebreak1, *linebreak2; //两换行标志
char *find_redirect, *finded_redirect;
do
{
strcpy(myurl, url);
for (pHost = myurl; *pHost != '/' && *pHost != '\0'; ++pHost);
if ( (int)(pHost - myurl) == strlen(myurl) )
strcpy(GET, "/");
else
strcpy(GET, pHost);
*pHost = '\0';
strcpy(host, myurl);
if(-1 == (sockfd = socket(AF_INET,SOCK_STREAM,0)))
{
printf("create socket failed of client!\n");
exit(-1);
}
pURL = gethostbyname(host);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr);
addr.sin_port = htons(80);
strcat(header, "GET ");
strcat(header, GET);
strcat(header, " HTTP/1.1\r\n");
strcat(header, "HOST: ");
strcat(header, host);
strcat(header, "\r\nConnection: Close\r\n\r\n");
int cc;
if(-1 == (cc = connect(sockfd,(struct sockaddr*)&addr,sizeof(addr))))
{
printf("connect failed of client!\n");
exit(1);
}
//向服务器发送url请求的request
int cs;
if(-1 == (cs = send(sockfd,header,strlen(header),0)))
{
printf("向服务器发送请求的request失败!\n");
exit(1);
}
FILE *fd;
fd= fopen("html.txt","w+");
recv(sockfd, text, RECVBUF, 0);
memcpy(text_tmp, text, RECVBUF);
/***截取http响应消息***/
linebreak2 = find_linebreak(text_tmp);
do{
linebreak1 = linebreak2;
linebreak2 = find_linebreak( linebreak1+1 );
}while(linebreak2 - linebreak1 > 2); //换行符为"\r\n"?
*linebreak2 = '\0';
/***提取重定向Location***/
find_redirect = strstr(text_tmp, "Location:");
if(NULL != find_redirect)
{
if(find_redirect = strstr(find_redirect,"://"))
{
find_redirect = find_redirect + 3;
finded_redirect = find_linebreak(find_redirect);
if ( *(--finded_redirect) == '\r' ) //不确定换行符是"\r\n"?
*finded_redirect = '\0';
else *(++finded_redirect) ='\0'; //还是'\n'?
url = find_redirect;
printf("\n\n***%d\n%s",redirect_num,url);//redirect_num 重定向次数
redirect_num ++;
continue;
}
}
while (recv(sockfd, text, RECVBUF, 0) > 0)
{
fprintf(fd, "%s", text);
memset(text, 0, RECVBUF);
}
fclose(fd);
printf("接受完毕。。。。\n");
close(sockfd);
break;
}while(redirect_num <= 10); //最多重定向次数
}
int main()
{
char url[2560];
printf("http://");
scanf("%s", url);
geturl(url);
return 0;
}