我在使用pytorch的C++接口的张量转置函数permute时,发现这个函数在转换到CPU时调用的时间太多,大概是10ms,我现在想把这个速度提升到5毫秒左右,我应该怎么做?
代码如下:
#include<torch/torch.h>
#include<torch/script.h>
#include<opencv2/opencv.hpp>
#include<iostream>
using namespace std;
void main(){
cv::Mat image = cv::imread("C:\\Users\\Desktop\\1.jpg"); // filePaths[0]
cv::resize(image, image, cv::Size(256, 256));
torch::Tensor x = torch::from_blob(image.data, { 1,3,256,256 }, torch::kByte); // [1, 3, 256, 256]
try
{
x = x.squeeze(0);
double start = clock();
torch::Tensor argmax_out = x.permute({ 1, 2, 0 }).to(torch::kCPU).detach().div(255.0).to(torch::kFloat32); // 3
cv::Mat resultImg(256, 256, CV_32FC3, argmax_out.data_ptr<float>());
double end = clock();
cout << "时间: " << double(end - start) << endl;
cv::imshow("resultImg", resultImg);
}
catch (const c10::Error & e)
{
cout << e.msg() << endl;
}
cv::waitKey(0);
system("pause");
}
以下是程序输出: