llldfs
风中的微笑y
2021-03-07 12:42

C++用openssl获取网页源码不全

#include<iostream>
#include<Winsock2.h>
#include<WS2tcpip.h>
#include<string>
#include<sstream>
#include<openssl/ssl.h>
#include<openssl/err.h>



#pragma comment(lib,"ws2_32.lib")
#pragma comment(lib,"libssl.lib")
#pragma comment(lib,"libcrypto.lib")

using namespace std;




string Utf8ToGbk(const char* utf8) {

	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);

	wchar_t* wstr = new wchar_t[len + (int64_t)1];
	memset(wstr, 0, len + (int64_t)1);
	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
	len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
	char* str = new char[len + (int64_t)1];
	memset(str, 0, len + (int64_t)1);
	WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
	if (wstr) delete[] wstr;
	return str;
}


int main() {

	SSL_load_error_strings();
	SSLeay_add_ssl_algorithms();
	const SSL_METHOD* meth = SSLv23_client_method();

	SSL_CTX* ctx = SSL_CTX_new(meth);

	if (ctx == NULL) {

		ERR_print_errors_fp(stderr);
		cout << "ctx error!" << endl;
		return -1;
	}


	WSADATA wsadata;
	ADDRINFO* res, * cur;
	ADDRINFO hints;
	SOCKADDR_IN* addr;

	memset(&hints, 0, sizeof(ADDRINFO));

	if (WSAStartup(MAKEWORD(2, 2), &wsadata) != 0) {

		cout << "wsastartup error" << endl;

		return -1;
	}

	hints.ai_family = AF_INET;
	hints.ai_socktype = SOCK_STREAM;

	getaddrinfo("www.baidu.com", "HTTP", &hints, &res);

	cur = res;

	addr = (SOCKADDR_IN*)res->ai_addr;

	addr->sin_port = htons(443);

	SOCKET sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	
	if (sock == INVALID_SOCKET) {

		cout << "socket error" << endl;
		
		return -1;
	}

	if (connect(sock, (SOCKADDR*)addr, sizeof(*addr)) == SOCKET_ERROR) {

		cout << "connect error" << endl;

		return -1;
	}

	int ret;

	SSL* ssl = SSL_new(ctx);
	
	if (ssl == NULL) {

		cout << "ssl error" << endl;

		return -1;
	}

	SSL_set_fd(ssl, sock);
	
	ret = SSL_connect(ssl);

	if (ret == -1) {

		cout << "ssl_connect error " << endl;

		return -1;
	}


	stringstream stream;

	stream << "GET / HTTP/1.1\r\n";
	stream << "Accept: */*\r\n";
	stream << "Accept-Language: zh-Hans-CN, zh-Hans; q=0.8, en-US; q=0.5, en; q=0.3\r\n";
	stream << "Connection: close\r\n";
	stream << "Host: www.baidu.com\r\n";
	stream << "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134\r\n";
	stream << "\r\n";

	string request = stream.str();

	ret = SSL_write(ssl, request.c_str(), request.size());

	if (ret == -1) {

		cout << "ssl_writ error" << endl;

		return -1;
	}

	char pageBuf[1024];

	string s;

	while (true) {

		if ((ret = SSL_read(ssl, pageBuf, 1023)) <= 0) {
			cout << ret;
			break;
		}

		s += Utf8ToGbk(pageBuf);
	}


	cout << s << endl;
	

	SSL_shutdown(ssl);
	SSL_free(ssl);
	SSL_CTX_free(ctx);
	closesocket(sock);
	freeaddrinfo(cur);
	WSACleanup();

}

当调用编码转换函数时候ssl_read最后一次返回值是0.但是获取的源码还缺几行,不调用的时候ssl_read最后一次返回值是-1.获取的源码只有大约三分之一。请问这是什么问题

  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 复制链接分享
  • 邀请回答