在改写C++过程中 遇到一些问题;
给出代码比较长,但是都是无错的,省略掉了资源建立销毁部分。
请详细阅读代码 就是一个阅读工作,本身代码应该bug问题不大。
请对比修改前后两个版本,指点一下为何第二版 没有推理结果输出。
yolo版本是 V5, 预训练权重是 yoloV5s.pt ,训练出的best.pt模型后来转的engine文件
不要照搬AI(照搬这一律举报,我只想进步,不要为难我)
第一版无错版本:就是截屏保存到本地 读取截屏文件 推理。代码无错且示例如下:
#pragma once
#include <time.h>
#include <string>
#include <filesystem>
#include <chrono>
#include <iomanip>
#include <sstream>
#include <iostream>
#include <d3d11.h>
#include <dxgi1_2.h>
#include <stdio.h>
#pragma comment(lib, "d3d11.lib")
#pragma comment(lib, "dxgi.lib")
#pragma warning(disable:4996)
// 结构体声明
struct ScreenCaptureContext
{
HWND hwnd = nullptr;
LPCWSTR windowClass = nullptr;
LPCWSTR windowTitle = nullptr;
int screenWidth = 0;
int screenHeight = 0;
ID3D11Device* pDX11Dev = nullptr;
ID3D11DeviceContext* pDX11DevCtx = nullptr;
IDXGIOutputDuplication* pDXGIOutputDup = nullptr;
ID3D11Texture2D* pCopyBuffer = nullptr;
ID3D11Texture2D* pDX11Texture = nullptr;
IDXGISurface* pCopySurface = nullptr;
unsigned char* p24bppData = nullptr;
bool useFirstBuffer = true;
};
ScreenCaptureContext context = {NULL,NULL,NULL, GetSystemMetrics(SM_CXSCREEN),GetSystemMetrics(SM_CYSCREEN)};
void RGBDataSaveAsBmpFile(const char* bmpFile, unsigned char* pRgbData, int width, int height, int biBitCount, bool flipvertical);
bool InitializeResources(ScreenCaptureContext& context, int bitDepth, int width, int height);
void CleanupResources(ScreenCaptureContext& context);
void Convert32To24bpp(unsigned char* p32bppData, unsigned char* p24bppData, int width, int height);
void CaptureAndSaveScreenshots(ScreenCaptureContext& context, int bitDepth, int CaptureStartX, int CaptureStartY, int CaptureWidth, int CaptureHeight);
// 将RGB格式图片转成bmp格式
void RGBDataSaveAsBmpFile(
const char* bmpFile, // BMP文件名称
unsigned char* pRgbData, // 图像数据
int width, // 图像宽度
int height, // 图像高度
int bitDepth, // 位图深度(24或32)
bool flipvertical) // 图像是否需要垂直翻转
{
int size = 0;
int bitsPerPixel = 0;
switch (bitDepth)
{
case 24:
bitsPerPixel = 3;
size = width * height * bitsPerPixel * sizeof(char); // 每个像素点3个字节
break;
case 32:
bitsPerPixel = 4;
size = width * height * bitsPerPixel * sizeof(char); // 每个像素点4个字节
break;
default:
return;
}
// 位图第一部分,文件信息
BITMAPFILEHEADER bfh;
bfh.bfType = (WORD)0x4d42; //图像格式 必须为'BM'格式
bfh.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);//真正的数据的位置
bfh.bfSize = size + bfh.bfOffBits;
bfh.bfReserved1 = 0;
bfh.bfReserved2 = 0;
BITMAPINFOHEADER bih;
bih.biSize = sizeof(BITMAPINFOHEADER);
bih.biWidth = width;
if (flipvertical)
bih.biHeight = -height;//BMP图片从最后一个点开始扫描,显示时图片是倒着的,所以用-height,这样图片就正了
else
bih.biHeight = height;
bih.biPlanes = 1;
bih.biBitCount = bitDepth; // 修改这里以匹配传入的位深
bih.biCompression = BI_RGB;
bih.biSizeImage = size;
bih.biXPelsPerMeter = 0;
bih.biYPelsPerMeter = 0;
bih.biClrUsed = 0;
bih.biClrImportant = 0;
FILE* fp = NULL;
fopen_s(&fp, bmpFile, "wb");
if (!fp)
return;
fwrite(&bfh, sizeof(BITMAPFILEHEADER), 1, fp);
fwrite(&bih, sizeof(BITMAPINFOHEADER), 1, fp);
fwrite(pRgbData, size, 1, fp);
fclose(fp);
}
// 将32位RGB+A 数据转换为24位RGB数据
void Convert32To24bpp(unsigned char* p32bppData, unsigned char* p24bppData, int width, int height)
{
if (!p32bppData || !p24bppData)
{
throw std::runtime_error("Source or destination buffer is null.");
}
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
size_t srcIndex = (y * width + x) * 4; // 32位数据,每个像素4字节
size_t dstIndex = (y * width + x) * 3; // 24位数据,每个像素3字节
// 获取Alpha值并进行预乘
unsigned char alpha = p32bppData[srcIndex + 3];
if (alpha != 0)
{
// 预乘RGB值
p24bppData[dstIndex + 0] = static_cast<unsigned char>((p32bppData[srcIndex + 2] * alpha) / 255); // 预乘后的Red
p24bppData[dstIndex + 1] = static_cast<unsigned char>((p32bppData[srcIndex + 1] * alpha) / 255); // 预乘后的Green
p24bppData[dstIndex + 2] = static_cast<unsigned char>((p32bppData[srcIndex + 0] * alpha) / 255); // 预乘后的Blue
}
else
{
// 如果Alpha为0,颜色无关紧要,因为像素是完全透明的,但这里我们简单设为黑色
p24bppData[dstIndex + 0]= p32bppData[srcIndex + 2]; // Red
p24bppData[dstIndex + 1] = p32bppData[srcIndex + 1]; // Green
p24bppData[dstIndex + 2] = p32bppData[srcIndex + 0]; // Blue
}
}
}
}
void CaptureAndSaveScreenshots(ScreenCaptureContext& context, int bitDepth, int CaptureStartX, int CaptureStartY, int CaptureWidth, int CaptureHeight)
{
IDXGIResource* desktopResource = nullptr;
DXGI_OUTDUPL_FRAME_INFO frameInfo;
HRESULT hr = context.pDXGIOutputDup->AcquireNextFrame(20, &frameInfo, &desktopResource);
if (FAILED(hr))
{
if (hr == DXGI_ERROR_WAIT_TIMEOUT)
{
if (desktopResource)
{
desktopResource->Release();
}
return;
}
else
{
return;
}
}
// 确保pDX11Texture和pCopySurface仅在需要时初始化
if (context.pDX11Texture == nullptr)
{
hr = desktopResource->QueryInterface(__uuidof(ID3D11Texture2D), reinterpret_cast<void**>(&context.pDX11Texture));
if (FAILED(hr)) return;
desktopResource->Release(); // 释放不再需要的桌面资源引用
hr = context.pCopyBuffer->QueryInterface(__uuidof(IDXGISurface), (void**)&context.pCopySurface);
if (FAILED(hr)) return;
}
D3D11_BOX srcBox;
srcBox.left = CaptureStartX;
srcBox.top = CaptureStartY;
srcBox.front = 0;
srcBox.right = CaptureStartX + CaptureWidth-1;
srcBox.bottom = CaptureStartY + CaptureHeight-1;
srcBox.back = 1;
context.pDX11DevCtx->CopySubresourceRegion(context.pCopyBuffer, 0, 0, 0, 0, context.pDX11Texture, 0, &srcBox);
DXGI_MAPPED_RECT MappedSurface;
hr = context.pCopySurface->Map(&MappedSurface, DXGI_MAP_READ);
if (FAILED(hr)) return;
// 生成时间戳并构建图片文件名
auto now = std::chrono::system_clock::now();
auto now_c = std::chrono::system_clock::to_time_t(now);
struct tm local_tm;
localtime_s(&local_tm, &now_c);
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count() % 1000000;
std::ostringstream oss;
oss << std::put_time(&local_tm, "%H%M%S") << "_" << std::setw(6) << std::setfill('0') << micros;
std::string timestamp = oss.str();
std::string picNameB = "C:\\Users\\Administrator\\Desktop\\picture\\" + timestamp + "_Screen.jpg";
// 保存截图
if (bitDepth == 32)
{
RGBDataSaveAsBmpFile(picNameB.c_str(), static_cast<unsigned char*>(MappedSurface.pBits), CaptureWidth, CaptureHeight, 32, true);
}
else if (bitDepth == 24)
{
// 确保24位缓冲区已分配
if (context.p24bppData == nullptr)
throw std::runtime_error("p24bppData must be allocated before converting to 24bpp");
Convert32To24bpp(reinterpret_cast<unsigned char*>(MappedSurface.pBits), context.p24bppData, CaptureWidth, CaptureHeight);
RGBDataSaveAsBmpFile(picNameB.c_str(), context.p24bppData, CaptureWidth, CaptureHeight, 24, true);
}
context.pCopySurface->Unmap();
context.pDXGIOutputDup->ReleaseFrame();
Sleep(3); // 避免过度占用CPU
}
int main()
{
Sleep(5000);
InitializeResources(context, 32, 1080, 1080);
int i = 0;
while (true && i<20)
{
i++;
CaptureAndSaveScreenshots(context, 32, 0, 0, 1080, 1080);
Sleep(1000);
}
CleanupResources(context);
InitializeResources(context, 24, 1080, 1080);
int mi = 0;
while (true && mi < 20)
{
mi++;
CaptureAndSaveScreenshots(context, 24, 0, 0, 1080, 1080);
Sleep(1000);
}
CleanupResources(context);
}
推理代码如下:
//释放与建立资源代码一省略
void YOLOv5TRTDetector::detect(cv::Mat& frame, std::vector<DetectResult>& results) {
int64 start = cv::getTickCount();
// 图象预处理 - 格式化操作
int w = frame.cols;
int h = frame.rows;
int _max = std::max(h, w);
cv::Mat image = cv::Mat::zeros(cv::Size(_max, _max), CV_8UC3);
cv::Rect roi(0, 0, w, h);
frame.copyTo(image(roi));
// HWC => CHW
float x_factor = image.cols / static_cast<float>(this->input_w);
float y_factor = image.rows / static_cast<float>(this->input_h);
cv::Mat tensor = cv::dnn::blobFromImage(image, 1.0f / 225.f, cv::Size(input_w, input_h), cv::Scalar(), true);
// 内存到GPU显存
cudaMemcpyAsync(buffers[0], tensor.ptr<float>(), input_h * input_w * 3 * sizeof(float), cudaMemcpyHostToDevice, stream);
// 推理
context->enqueueV2(buffers, stream, nullptr);
// GPU显存到内存
cudaMemcpyAsync(prob.data(), buffers[1], output_h * output_w * sizeof(float), cudaMemcpyDeviceToHost, stream);
// 后处理
// 后处理, 1x25200x85
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
cv::Mat det_output(output_h, output_w, CV_32F, (float*)prob.data());
for (int i = 0; i < det_output.rows; i++) {
float confidence = det_output.at<float>(i, 4);
if (confidence < this->conf_thresholod) {
continue;
}
cv::Mat classes_scores = det_output.row(i).colRange(5, output_w);
cv::Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
// 置信度 0~1之间
if (score > this->score_thresholod)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
// NMS省略
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int idx = indexes[i];
int cid = classIds[idx];
DetectResult dr;
dr.classId = classIds[idx];
dr.conf = confidences[idx];
dr.box = boxes[idx];
results.emplace_back(dr);
}
}
读取图片进行推理代码省略,目前代码无错。
后续想省略叼图片的保存过程,直接将屏幕信息传入推理。 由于已经将尺寸设置为6406403的RGB格式,归一化流程改编为:
void YOLOv5TRTDetector::detect(const unsigned char* imageData, int width, int height, std::vector<DetectResult>& results)
{
int64 start = cv::getTickCount();
// 直接从图像数据创建cv::Mat,假设图像数据是连续的RGB格式
cv::Mat image(height, width, CV_8UC3, const_cast<unsigned char*>(imageData));
// 由于尺寸已匹配模型输入(640x640),不需要调整大小,直接进行归一化处理
cv::Mat inputBlob;
cv::dnn::blobFromImage(image, inputBlob, 1.0f / 255.0f, cv::Size(), cv::Scalar(), true, false)
// 后处理逻辑保持不变
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
// 假设prob已经填充,根据prob数据进行解析
for (size_t i = 0; i < prob.size(); i += 85) { // 假设每85个元素代表一个预测结果
float confidence = prob[i + 4];
if (confidence < this->conf_threshold) continue;
int classId = static_cast<int>(prob[i + 5]);
float score = confidence;
// 解析边界框坐标,假设是归一化的,需要还原到原图尺度
float x = prob[i] * width;
float y = prob[i + 1] * height;
float w = prob[i + 2] * width;
float h = prob[i + 3] * height;
cv::Rect box(x - w / 2, y - h / 2, w, h);
boxes.push_back(box);
classIds.push_back(classId);
confidences.push_back(score);
}
// 非最大抑制
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, this->conf_threshold, this->nms_threshold, indices);
// 构建最终结果
for (int idx : indices) {
DetectResult dr;
dr.classId = classIds[idx];
dr.conf = confidences[idx];
dr.box = boxes[idx];
results.push_back(dr);
}
}
这一部分该如何修改,我直接将截屏中的 context.p24bppData传入转换为浮点型后 可以运行 但是没有结果输出。