zzz~986 2024-03-03 20:34 采纳率: 66.7%
浏览 47
已结题

ultralytics库导出onnx模型,模型失去预测能力

今天用ultralytics库把一个yolov8n模型导出成onnx格式,验证了一下,发现导出之后的模型相比.pt模型预测完全不准。导出代码如下:

from ultralytics import YOLO
import torch

# Load a model
model = YOLO('yolov8n.yaml')  # build a new model from YAML
model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights

# Train the model
# results = model.train(data='coco128_.yaml', epochs=100, imgsz=640)
# if __name__ == "__main__":
#     torch.onnx.export(model, torch.rand(1, 3, 640, 640), "yolov8n.onnx",
#                       dynamic_axes={'input.1' : {0 : 'batch_size'}, 'output' : {0 : 'batch_size'}})

model.export(format="onnx")

随后验证这个onnx模型的有效性,代码如下:

import onnxruntime as ort
from PIL import Image
import numpy as np
from ultralytics.utils import ops
import torch


# 加载ONNX模型
def load_model(model_path):
    sess = ort.InferenceSession(model_path)
    return sess


# 对图片进行预处理
def preprocess_image(image_path, target_size=(640, 640), if_mask=True, if_show=False):
    # 加载图片并调整大小,适用于1920*1080的原图片
    img = Image.open(image_path)
    size = img.size
    if size[0] > size[1] and if_mask:
        img = img.resize((target_size[0], target_size[1]*size[1]//size[0]))
    else:
        img = img.resize((target_size[0], target_size[1]))
    # 将图片转换为numpy数组
    img_array = np.array(img)
    img_array = img_array
    img_array = np.transpose(img_array, (2, 0, 1))
    if if_mask:
        ch, h, w = img_array.shape
        mask_ch = ch
        mask_h = (w - h)//2
        mask_w = w
        mask = np.zeros((mask_ch, mask_h, mask_w), dtype=np.float32)
        img_array = np.concatenate((mask, img_array, mask), axis=1)
    if if_show:
        img_array_ = img_array.copy()
        img_array_ = np.transpose(img_array_, (1, 2, 0))
        image = Image.fromarray(img_array_.astype(np.uint8))
        image.show()
    img_array = np.expand_dims(img_array, axis=0)
    # 转换数据类型
    img_array = img_array.astype(np.float32)
    return img_array


# 对图片进行预测
def predict(model, image_path):
    # 加载图片并预处理
    img_array = preprocess_image(image_path)
    # 获取模型输入和输出名称
    input_name = model.get_inputs()[0].name
    output_name = model.get_outputs()[0].name
    # 进行预测
    pred_onnx = model.run([output_name], {input_name: img_array})[0]
    return pred_onnx


# 主函数
def main(model_path, image_path):
    # 加载模型
    model = load_model(model_path)
    # 对图片进行预测
    predictions = predict(model, image_path)
    predictions = torch.tensor(predictions)
    preds = ops.non_max_suppression(predictions, conf_thres=0.5, iou_thres=0.25, nc=80, max_det=10)
    predictions_ = np.array(preds)
    print("Predictions:", predictions_)


# 设置模型路径和图片路径
model_path = "yolov8n.onnx"
image_path = "../video_process/processed_photos/frame_0100.jpg"
# 运行主函数
main(model_path, image_path)

这个是我用来预测的样图:

img

这个是用ultralytics下载的.pt文件的预测结果:

img

看起来完全正常。
但是,我用ultralytics把它导出成.onnx文件之后,我尝试了两种处理图片的方法,都处理成了(1,3,640,640)的张量

img

img

但是,预测的结果大差不差都是这样:

img

我是选的前10个置信度最大的结果,但是这个里面都是第45,49类,应该有至少两个人(第0类的,而且这个框的宽度连一个像素都没有,很明显这个模型是失效的。如何解决?

  • 写回答

17条回答 默认 最新

  • 关注

    博主你好,我根据你的问题,我帮你重新写了个代码,您看下准确率如何,有没有解决你的问题。

    # Ultralytics YOLO 🚀, AGPL-3.0 license
    
    import argparse
    
    import cv2
    import numpy as np
    import onnxruntime as ort
    import torch
    
    # from ultralytics.utils import ASSETS, yaml_load
    # from ultralytics.utils.checks import check_requirements, check_yaml
    from names import names
    
    class YOLOv8:
        """YOLOv8 object detection model class for handling inference and visualization."""
    
        def __init__(self, onnx_model, input_image, confidence_thres, iou_thres, if_show=False):
            """
            Initializes an instance of the YOLOv8 class.
    
            Args:
                onnx_model: Path to the ONNX model.
                input_image: Path to the input image.
                confidence_thres: Confidence threshold for filtering detections.
                iou_thres: IoU (Intersection over Union) threshold for non-maximum suppression.
            """
            self.if_show = if_show
            self.onnx_model = onnx_model
            self.input_image = input_image
            self.confidence_thres = confidence_thres
            self.iou_thres = iou_thres
    
            # Load the class names from the COCO dataset
            self.classes = names
    
            # Generate a color palette for the classes
            self.color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
    
        def draw_detections(self, img, box, score, class_id):
            """
            Draws bounding boxes and labels on the input image based on the detected objects.
    
            Args:
                img: The input image to draw detections on.
                box: Detected bounding box.
                score: Corresponding detection score.
                class_id: Class ID for the detected object.
    
            Returns:
                None
            """
    
            # Extract the coordinates of the bounding box
            x1, y1, w, h = box
    
            # Retrieve the color for the class ID
            color = self.color_palette[class_id]
    
            # Draw the bounding box on the image
            cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)
    
            # Create the label text with class name and score
            label = f"{self.classes[class_id]}: {score:.2f}"
    
            # Calculate the dimensions of the label text
            (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    
            # Calculate the position of the label text
            label_x = x1
            label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10
    
            # Draw a filled rectangle as the background for the label text
            cv2.rectangle(
                img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED
            )
    
            # Draw the label text on the image
            cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
    
        def preprocess(self):
            """
            Preprocesses the input image before performing inference.
    
            Returns:
                image_data: Preprocessed image data ready for inference.
            """
            # Read the input image using OpenCV
            self.img = cv2.imread(self.input_image)
    
            # Get the height and width of the input image
            self.img_height, self.img_width = self.img.shape[:2]
    
            # Convert the image color space from BGR to RGB
            img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
    
            # Resize the image to match the input shape
            img = cv2.resize(img, (self.input_width, self.input_height))
    
            # Normalize the image data by dividing it by 255.0
            image_data = np.array(img) / 255.0
    
            # Transpose the image to have the channel dimension as the first dimension
            image_data = np.transpose(image_data, (2, 0, 1))  # Channel first
    
            # Expand the dimensions of the image data to match the expected input shape
            image_data = np.expand_dims(image_data, axis=0).astype(np.float32)
    
            # Return the preprocessed image data
            return image_data
    
        def postprocess(self, input_image, output):
            """
            Performs post-processing on the model's output to extract bounding boxes, scores, and class IDs.
    
            Args:
                input_image (numpy.ndarray): The input image.
                output (numpy.ndarray): The output of the model.
    
            Returns:
                numpy.ndarray: The input image with detections drawn on it.
            """
    
            # Transpose and squeeze the output to match the expected shape
            outputs = np.transpose(np.squeeze(output[0]))
    
            # Get the number of rows in the outputs array
            rows = outputs.shape[0]
    
            # Lists to store the bounding boxes, scores, and class IDs of the detections
            boxes = []
            scores = []
            class_ids = []
    
            # Calculate the scaling factors for the bounding box coordinates
            x_factor = self.img_width / self.input_width
            y_factor = self.img_height / self.input_height
    
            # Iterate over each row in the outputs array
            for i in range(rows):
                # Extract the class scores from the current row
                classes_scores = outputs[i][4:]
    
                # Find the maximum score among the class scores
                max_score = np.amax(classes_scores)
    
                # If the maximum score is above the confidence threshold
                if max_score >= self.confidence_thres:
                    # Get the class ID with the highest score
                    class_id = np.argmax(classes_scores)
    
                    # Extract the bounding box coordinates from the current row
                    x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]
    
                    # Calculate the scaled coordinates of the bounding box
                    left = int((x - w / 2) * x_factor)
                    top = int((y - h / 2) * y_factor)
                    width = int(w * x_factor)
                    height = int(h * y_factor)
    
                    # Add the class ID, score, and box coordinates to the respective lists
                    class_ids.append(class_id)
                    scores.append(max_score)
                    boxes.append([left, top, width, height])
    
            # Apply non-maximum suppression to filter out overlapping bounding boxes
            indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres)
    
            # Iterate over the selected indices after non-maximum suppression
            detected = []
            for i in indices:
                # Get the box, score, and class ID corresponding to the index
                box = boxes[i]
                score = scores[i]
                class_id = class_ids[i]
                detected.append({'box': box, 'score': score, 'class_id': class_id})
                # Draw the detection on the input image
                if self.if_show:
                    self.draw_detections(input_image, box, score, class_id)
    
            # Return the modified input image
            if self.if_show:
                return input_image
            else:
                return detected
    
        def main(self):
            """
            Performs inference using an ONNX model and returns the output image with drawn detections.
    
            Returns:
                output_img: The output image with drawn detections.
            """
            # Create an inference session using the ONNX model and specify execution providers
            provider1 = ["CUDAExecutionProvider", "CPUExecutionProvider"]
            provider2 = ["CPUExecutionProvider"]
            session = ort.InferenceSession(self.onnx_model, providers=provider2)
    
            # Get the model inputs
            model_inputs = session.get_inputs()
    
            # Store the shape of the input for later use
            input_shape = model_inputs[0].shape
            self.input_width = input_shape[2]
            self.input_height = input_shape[3]
    
            # Preprocess the image data
            img_data = self.preprocess()
    
            # Run inference using the preprocessed image data
            outputs = session.run(None, {model_inputs[0].name: img_data})
    
            # Perform post-processing on the outputs to obtain output image.
            return self.postprocess(self.img, outputs)  # output image
    
    
    if __name__ == "__main__":
        # Create an argument parser to handle command-line arguments
        parser = argparse.ArgumentParser()
        parser.add_argument("--model", type=str, default="yolov8n.onnx", help="Input your ONNX model.")
        parser.add_argument("--img", type=str, default="../video_process/processed_photos/frame_0100.jpg", help="Path to input image.")
        parser.add_argument("--conf-thres", type=float, default=0.5, help="Confidence threshold")
        parser.add_argument("--iou-thres", type=float, default=0.5, help="NMS IoU threshold")
        parser.add_argument("--if-show", type=bool, default=False, help="If show the output image.")
        args = parser.parse_args()
    
        # Check the requirements and select the appropriate backend (CPU or GPU)
        # check_requirements("onnxruntime-gpu" if torch.cuda.is_available() else "onnxruntime")
    
        # Create an instance of the YOLOv8 class with the specified arguments
        import time
        time_start = time.time()
        detection = YOLOv8(args.model, args.img, args.conf_thres, args.iou_thres, args.if_show)
    
        # Perform object detection and obtain the output image
        output = detection.main()
    
        if detection.if_show:
            # Display the output image in a window
            cv2.namedWindow("Output", cv2.WINDOW_NORMAL)
            cv2.imshow("Output", output)
    
            # Wait for a key press to exit
            cv2.waitKey(0)
        else:
            print(output)
        time_end = time.time()
        print("time cost: ", time_end - time_start)
    
    
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(16条)

报告相同问题?

问题事件

  • 系统已结题 3月12日
  • 已采纳回答 3月4日
  • 创建了问题 3月3日

悬赏问题

  • ¥15 Stata链式中介效应代码修改
  • ¥15 latex投稿显示click download
  • ¥15 请问读取环境变量文件失败是什么原因?
  • ¥15 在若依框架下实现人脸识别
  • ¥15 添加组件无法加载页面,某块加载卡住
  • ¥15 网络科学导论,网络控制
  • ¥15 利用Sentinel-2和Landsat8做一个水库的长时序NDVI的对比,为什么Snetinel-2计算的结果最小值特别小,而Lansat8就很平均
  • ¥15 metadata提取的PDF元数据,如何转换为一个Excel
  • ¥15 关于arduino编程toCharArray()函数的使用
  • ¥100 vc++混合CEF采用CLR方式编译报错