树莓派搭载yolov5-lite模型

我按照您的讲解，并根据您提供的包安装好了onnx和opencv 但是识别单个图片提示上面错误，通过树莓派摄像头识别，卡在video窗口识别不出东西
测试一张图片


import cv2
import numpy as np

import onnxruntime as ort
import math
import time

def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    """
    description: Plots one bounding box on image img,
                 this function comes from YoLov5 project.
    param: 
        x:      a box likes [x1,y1,x2,y2]
        img:    a opencv image object
        color:  color to draw rectangle, such as (0,255,0)
        label:  str
        line_thickness: int
    return:
        no return
    """
    tl = (
        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
    )  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(
            img,
            label,
            (c1[0], c1[1] - 2),
            0,
            tl / 3,
            [225, 255, 255],
            thickness=tf,
            lineType=cv2.LINE_AA,
        )

def _make_grid( nx, ny):
        xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
        return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)

def cal_outputs(outs,nl,na,model_w,model_h,anchor_grid,stride):
    
    row_ind = 0
    grid = [np.zeros(1)] * nl
    for i in range(nl):
        h, w = int(model_w/ stride[i]), int(model_h / stride[i])
        length = int(na * h * w)
        if grid[i].shape[2:4] != (h, w):
            grid[i] = _make_grid(w, h)

        outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(
            grid[i], (na, 1))) * int(stride[i])
        outs[row_ind:row_ind + length, 2:4] = (outs[row_ind:row_ind + length, 2:4] * 2) ** 2 * np.repeat(
            anchor_grid[i], h * w, axis=0)
        row_ind += length
    return outs



def post_process_opencv(outputs,model_h,model_w,img_h,img_w,thred_nms,thred_cond):
    conf = outputs[:,4].tolist()
    c_x = outputs[:,0]/model_w*img_w
    c_y = outputs[:,1]/model_h*img_h
    w  = outputs[:,2]/model_w*img_w
    h  = outputs[:,3]/model_h*img_h
    p_cls = outputs[:,5:]
    if len(p_cls.shape)==1:
        p_cls = np.expand_dims(p_cls,1)
    cls_id = np.argmax(p_cls,axis=1)

    p_x1 = np.expand_dims(c_x-w/2,-1)
    p_y1 = np.expand_dims(c_y-h/2,-1)
    p_x2 = np.expand_dims(c_x+w/2,-1)
    p_y2 = np.expand_dims(c_y+h/2,-1)
    areas = np.concatenate((p_x1,p_y1,p_x2,p_y2),axis=-1)
    
    areas = areas.tolist()
    ids = cv2.dnn.NMSBoxes(areas,conf,thred_cond,thred_nms) 
    return  np.array(areas)[ids],np.array(conf)[ids],cls_id[ids]

def infer_img(img0,net,model_h,model_w,nl,na,stride,anchor_grid,thred_nms=0.4,thred_cond=0.5):
    # 图像预处理
    img = cv2.resize(img0, [model_w,model_h], interpolation=cv2.INTER_AREA)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0
    blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)

    # 模型推理
    outs = net.run(None, {net.get_inputs()[0].name: blob})[0].squeeze(axis=0)

    # 输出坐标矫正
    outs = cal_outputs(outs,nl,na,model_w,model_h,anchor_grid,stride)

    # 检测框计算
    img_h,img_w,_ = np.shape(img0)
    boxes,confs,ids = post_process_opencv(outs,model_h,model_w,img_h,img_w,thred_nms,thred_cond)

    return  boxes,confs,ids




if __name__ == "__main__":

    # 模型加载
    model_pb_path = "num1-8.onnx"
    so = ort.SessionOptions()
    net = ort.InferenceSession(model_pb_path, so)

    # 标签字典
    dic_labels = {0: '1',
                  1: '2',
                  2: '3',
                  3: '4',
                  4: '5',
                  5: '6',
                  6: '7',
                  7: '8'}

    # 模型参数
    model_h = 320
    model_w = 320
    nl = 3
    na = 3
    stride=[8.,16.,32.]
    anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
    anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(nl, -1, 2)
    
    # 进行推理
    img0 = cv2.imread('3.jpg')
    t1 = time.time()
    det_boxes,scores,ids = infer_img(img0,net,model_h,model_w,nl,na,stride,anchor_grid,thred_nms=0.4,thred_cond=0.5)
    t2 = time.time()
    print("%.2f"%(t2-t1))
    # 结果绘图
    for box,score,id in zip(det_boxes,scores,ids):
        label = '%s:%.2f'%(dic_labels[id],score)
        
        plot_one_box(box.astype(np.int), img0, color=(255,0,0), label=label, line_thickness=None)
    cv2.imshow('img',img0)

    cv2.waitKey(0)
    





    # img = cv2.resize(img0, [320,320], interpolation=cv2.INTER_AREA)

    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # img = img.astype(np.float32) / 255.0
    # blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)

    # outs = net.run(None, {net.get_inputs()[0].name: blob})[0].squeeze(axis=0)

    # nl = 3
    # na = 3
    # stride=[8.,16.,32.]
    # anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
    # anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(nl, -1, 2)
    # model_w = 320
    # model_h = 320
    # outs = cal_outputs(outs,nl,na,model_w,model_h,anchor_grid,stride)

    # print(outs)
    # boxes,confs,ids = post_process_opencv(outs,model_h,model_w,img_h=480,img_w=640,thred_nms=0.4,thred_cond=0.5)
    # print(boxes)

测试视频


```python

import cv2
import numpy as np
import onnxruntime as ort
import time

def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    """
    description: Plots one bounding box on image img,
                 this function comes from YoLov5 project.
    param: 
        x:      a box likes [x1,y1,x2,y2]
        img:    a opencv image object
        color:  color to draw rectangle, such as (0,255,0)
        label:  str
        line_thickness: int
    return:
        no return
    """
    tl = (
        line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
    )  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(
            img,
            label,
            (c1[0], c1[1] - 2),
            0,
            tl / 3,
            [225, 255, 255],
            thickness=tf,
            lineType=cv2.LINE_AA,
        )

def _make_grid( nx, ny):
        xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
        return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)

def cal_outputs(outs,nl,na,model_w,model_h,anchor_grid,stride):
    
    row_ind = 0
    grid = [np.zeros(1)] * nl
    for i in range(nl):
        h, w = int(model_w/ stride[i]), int(model_h / stride[i])
        length = int(na * h * w)
        if grid[i].shape[2:4] != (h, w):
            grid[i] = _make_grid(w, h)

        outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(
            grid[i], (na, 1))) * int(stride[i])
        outs[row_ind:row_ind + length, 2:4] = (outs[row_ind:row_ind + length, 2:4] * 2) ** 2 * np.repeat(
            anchor_grid[i], h * w, axis=0)
        row_ind += length
    return outs



def post_process_opencv(outputs,model_h,model_w,img_h,img_w,thred_nms,thred_cond):
    conf = outputs[:,4].tolist()
    c_x = outputs[:,0]/model_w*img_w
    c_y = outputs[:,1]/model_h*img_h
    w  = outputs[:,2]/model_w*img_w
    h  = outputs[:,3]/model_h*img_h
    p_cls = outputs[:,5:]
    if len(p_cls.shape)==1:
        p_cls = np.expand_dims(p_cls,1)
    cls_id = np.argmax(p_cls,axis=1)

    p_x1 = np.expand_dims(c_x-w/2,-1)
    p_y1 = np.expand_dims(c_y-h/2,-1)
    p_x2 = np.expand_dims(c_x+w/2,-1)
    p_y2 = np.expand_dims(c_y+h/2,-1)
    areas = np.concatenate((p_x1,p_y1,p_x2,p_y2),axis=-1)
    
    areas = areas.tolist()
    ids = cv2.dnn.NMSBoxes(areas,conf,thred_cond,thred_nms)
    if len(ids)>0:
        return  np.array(areas)[ids],np.array(conf)[ids],cls_id[ids]
    else:
        return [],[],[]
def infer_img(img0,net,model_h,model_w,nl,na,stride,anchor_grid,thred_nms=0.4,thred_cond=0.5):
    # 图像预处理
    img = cv2.resize(img0, [model_w,model_h], interpolation=cv2.INTER_AREA)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32) / 255.0
    blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)

    # 模型推理
    outs = net.run(None, {net.get_inputs()[0].name: blob})[0].squeeze(axis=0)

    # 输出坐标矫正
    outs = cal_outputs(outs,nl,na,model_w,model_h,anchor_grid,stride)

    # 检测框计算
    img_h,img_w,_ = np.shape(img0)
    boxes,confs,ids = post_process_opencv(outs,model_h,model_w,img_h,img_w,thred_nms,thred_cond)

    return  boxes,confs,ids




if __name__ == "__main__":

    # 模型加载
    model_pb_path = "num1-8.onnx"
    so = ort.SessionOptions()
    net = ort.InferenceSession(model_pb_path, so)
    
    # 标签字典
    dic_labels= {0:'1',
            1:'2',
            2:'3',
            3:'4',
            4:'5',
            5:'6',
            6:'7',
            7:'8'}
    
    # 模型参数
    model_h = 320
    model_w = 320
    nl = 3  #几层输出
    na = 3  #对于每一层有几种瞄框
    stride=[8.,16.,32.]
    anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
    anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(nl, -1, 2)
    
    video = 0
    cap = cv2.VideoCapture(video)
    flag_det = False
    while True:
        success, img0 = cap.read()
        if success:
            
            if flag_det:
                t1 = time.time()
                det_boxes,scores,ids = infer_img(img0,net,model_h,model_w,nl,na,stride,anchor_grid,thred_nms=0.4,thred_cond=0.5)
                t2 = time.time()
            
                
                for box,score,id in zip(det_boxes,scores,ids):
                    label = '%s:%.2f'%(dic_labels[id],score)
            
                    plot_one_box(box.astype(np.int16), img0, color=(255,0,0), label=label, line_thickness=None)
                    
                str_FPS = "FPS: %.2f"%(1./(t2-t1))
                
                cv2.putText(img0,str_FPS,(50,50),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),3)
                
            
            cv2.imshow("video",img0)
        key=cv2.waitKey(1) & 0xFF    
        if key == ord('q'):
        
            break
        elif key & 0xFF == ord('s'):
            flag_det = not flag_det
            print(flag_det)
            
    cap.release() 
    
    
    
    
    # # 进行推理
    # img0 = cv2.imread('3.jpg')
    # t1 = time.time()
    # det_boxes,scores,ids = infer_img(img0,net,model_h,model_w,nl,na,stride,anchor_grid,thred_nms=0.4,thred_cond=0.5)
    # t2 = time.time()
    # print("%.2f"%(t2-t1))
    # 结果绘图
    # for box,score,id in zip(det_boxes,scores,ids):
        # label = '%s:%.2f'%(dic_labels[id],score)
        
        # plot_one_box(box.astype(np.int), img0, color=(255,0,0), label=label, line_thickness=None)
    # cv2.imshow('img',img0)

    # cv2.waitKey(0)
    





    # img = cv2.resize(img0, [320,320], interpolation=cv2.INTER_AREA)

    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # img = img.astype(np.float32) / 255.0
    # blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)

    # outs = net.run(None, {net.get_inputs()[0].name: blob})[0].squeeze(axis=0)

    # nl = 3
    # na = 3
    # stride=[8.,16.,32.]
    # anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
    # anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(nl, -1, 2)
    # model_w = 320
    # model_h = 320
    # outs = cal_outputs(outs,nl,na,model_w,model_h,anchor_grid,stride)

    # print(outs)
    # boxes,confs,ids = post_process_opencv(outs,model_h,model_w,img_h=480,img_w=640,thred_nms=0.4,thred_cond=0.5)
    # print(boxes)

```

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除
收藏举报

2条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
阿里嘎多学长 2024-07-12 16:41
关注
以下内容由CHATGPT及阿里嘎多学长共同生成、有用望采纳：

您好！看起来您在使用树莓派搭载 YOLOv5-Lite 模型进行图像识别时遇到了一些问题。首先，让我们来解决您遇到的错误。

错误信息 ValueError: operands could not be broadcast together with shapes (3,2,40,13) (4800,2) 指出在执行 cal_outputs 函数时，两个数组的形状不兼容，无法进行元素级别的操作。具体来说，问题出现在以下这行代码：

outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(grid[i], (na, 1))) * int(stride[i])

这里，np.tile(grid[i], (na, 1)) 应该将 grid[i] 复制 na 次，沿着第二个维度（索引为 1 的维度）。但是，看起来 grid[i] 的形状与 outs 数组的形状不匹配，导致无法正确地进行 tile 操作。

为了解决这个问题，您需要确保 grid[i] 的形状是 (h, w, 2)，其中 h 和 w 分别是输出特征图的高度和宽度。如果 grid[i] 的形状不正确，您需要重新生成它。以下是可能的解决方案：

确保 _make_grid 函数返回正确形状的网格。

检查 stride 是否正确设置，它应该能够整除 model_w 和 model_h。

在 cal_outputs 函数中，确保 grid[i] 的生成是正确的，并且它的形状与输出张量的形状兼容。

另外，关于树莓派摄像头识别卡在 video 窗口的问题，这可能是由于模型推理速度跟不上视频帧率，或者是模型没有正确加载或执行。以下是一些可能的优化和调试步骤：

确保模型文件 num1-8.onnx 是正确的，并且与您的代码兼容。

检查 InferenceSession 是否正确加载了模型。

使用较小的输入分辨率来降低模型的计算负担。

确保 infer_img 函数能够在单个图像上成功运行，然后再应用到视频流上。

使用打印语句或日志记录来确定代码的哪一部分出现问题。

如果可能，尝试在树莓派上运行一些简化的代码，比如只加载模型而不进行推理，以确保基本功能正常。

由于您提供的代码较长，我无法立即提供完整的解决方案，但上述建议应该能帮助您开始调试过程。如果您需要更具体的帮助，可以提供更详细的代码片段或错误信息。

至于参考链接，以下是一些可能会有帮助的资源：

YOLOv5 官方 GitHub - 尽管您的模型是 YOLOv5-Lite，但这里可能有通用的解决方案和讨论。
OpenCV 文档 - 特别是如果您需要在 Android 或树莓派上使用 OpenCV。
ONNX 运行时文档 - 用于了解如何加载和运行 ONNX 模型。

希望这些信息对您有所帮助！如果您有进一步的问题或需要更详细的指导，请随时告诉我。
解决
无用 1
评论打赏
分享
举报编辑记录

评论

按下Enter换行，Ctrl+Enter发表内容

报告相同问题？

关注问题

TensorFlow-Lite Object-Detection转成tflite文件，代转 tensorflow 人工智能目标检测
2021-06-22 21:25

回答 1 已采纳望采纳，谢谢 TensorFlowLite 需要tflite文件模型，这个模型可以由TensorFlow训练的模型转换而成。所以首先需要知道如何保存训练好的TensorFlow模型。一般有这几
用Go的xml包编组DIDL-Lite xml
2012-05-30 08:02

回答 1 已采纳 The example you gave is marshalled. I assume you mean to ask, "how would one define Go data types
yolov5中遇到这个'int' object has no attribute 'endswith' python 深度学习目标跟踪
2022-03-20 08:54

回答 3 已采纳 endwith是用来判断字符串的，但是你传入的source确实int整数，也就是对应的opt.source就传错了https://www.runoob.com/python/att-string-en
Yolov5移植树莓派实现目标检测
2022-12-24 13:45

爱睡觉的咋的博客 Hallo，大家好啊！之前写了几篇Yolov5相关项目的博客，然后学习...接下来，以项目为驱动，介绍一下整个流程，包括Yolov5数据集获取、模型的训练、识别效果、树莓派环境的搭建、移植树莓派、模型在树莓派上的检测效果！
树莓派4B，ssh连接进入raspi-config设置页面，vnc无法enable linux ssh
2021-11-11 15:08

回答 2 已采纳我又试了一个输出电流为2A的usb2.0接口的电源适配器，发现也能正常使用树莓派，我原来一直用的是一个输出电流为0.5A的USB电源适配器和手机type-c充电线，导致树莓派供电不足无法正常使用。而电
在Android上部署TensorFlowLite报错（官方的TensorFlowLite demo） android tensorflow 人工智能
2021-10-13 00:03

回答 1 已采纳你开墙只是电脑开墙吧？是不是没有另外设置android studio的代理或者设置Gradle的代理，没设置的话还是访问不通的
在Android上部署TensorFlowLite报错（The model is not a valid Flatbuffer buffer） tensorflow 人工智能
2021-10-13 23:21

回答 1 已采纳兄弟，没玩过这个额，你看看下是不是这个原因啊： ByteBuffer is not a valid flatbuffer model - 简书
0x-在64位树莓派上安装TensorFlow Lite 2
2023-07-20 20:42

Slothwolf的博客本篇博客主要是记录复现案例：安装TensorFlow Lite 2的过程和遇到的问题，以及最后运行的结果。
外部h5混合应用如何分享微信小程序到微信 html5 微信小程序微信开放平台
2022-08-09 09:28

回答 1 已采纳微信自己有外链跳转的schema，获取 URL Link | 微信开放文档微信开发者平台文档
关于安卓部署AI，现在手上只有一个torchscript android pytorch 人工智能
2022-04-25 16:48

回答 1 已采纳自问自答，目前进度是能使用模型处理图像。首先是关于模型：torchscript模型是可以通过pytorch.jit.load直接进行导入，可以通过netron.app查看模型的结构。Android提供
Liteide运行Go语言代码错误 golang ide
2022-08-08 13:07

回答 4 已采纳好了，我知道了，还要在自定义gopath添加go安装根目录
使用libtorch、opencv、mjpg-stream、yolov5基于C++的远程摄像头目标检测任务环境搭建
2023-05-10 08:39

时光未语花先去的博客任务概述我想基于yolov5模型完成一个目标检测任务，其中摄像头应该搭载在无人机上，我的机载电脑是树莓派，这意味着我有两种选择：将模型部署在树莓派上做推理，然后通过远程软件VNC等将视频信息同步到本地电脑。...
华为liteos系统创建线程后，线程正常退出，task查看线程状态变成了invalid c++ c语言人工智能华为
2020-03-03 18:37

回答 1 已采纳 https://blog.csdn.net/tiantao2012/article/details/86616569
yolo树莓派_YOLO 系目标检测算法家族全景图！
2020-12-16 14:37

weixin_39551993的博客点击上方“计算机视觉life”，选择...在原作者JosephRedmon博士手中YOLO经历了三代到YOLOv3，今年初JosephRedmon宣告退出计算机视觉研究界后，YOLOv4、YOLOv5相继而出，且不论谁是正统，这YOLO算法家族在创始人拂袖...
OrangePi AIpro 香橙派测评体验——YOLOv5水果识别项目
2024-07-16 11:17

深度学习实战项目的博客 OrangePi AIpro（20T）采用昇腾AI技术路线，具体为4核64位处理器+AI处理器，集成图形处理器，支持20TOPS AI算力，拥有12GB/24GB LPDDR4X，可以外接32GB/64GB/256GB eMMC模块，支持双4K高清输出。
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
已结题（查看结题原因） 7月12日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
修改了问题 7月12日
关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 7月12日

悬赏问题

¥15 欧拉系统opt目录空间使用100%
¥15 ul做导航栏格式不对怎么改？
¥20 用户端如何上传图片到服务器和数据库里
¥15 现在研究生在烦开题，看了一些文献，但不知道自己要做什么，求指导。
¥30 vivado封装时总是显示缺少一个dcp文件
¥100 pxe uefi启动 tinycore
¥15 我pycharm运行jupyter时出现Jupyter server process exited with code 1,然后打开cmd显示如下
¥15 可否使用carsim-simulink进行四轮独立转向汽车的联合仿真，实现四轮独立转向汽车原地旋转、斜向形式、横移等动作，如果可以的话在carsim中如何进行相应设置
¥15 Caché 2016 在Java环境通过jdbc 执行sql报Parameter list mismatch错误，但是同样的sql使用连接工具可以查询出数据
¥15 疾病的获得与年龄是否有关

树莓派搭载yolov5-lite模型

2条回答 默认 最新

问题事件

悬赏问题

2条回答默认最新