我想要通过YOLOv5原本代码加上arduino二自由度云台实现目标检测加自动跟踪,于在YOLOv5原代码的基础上加入了自己做的arduino的上位机,串口和下位机代码。但是总是实现不了,老是报错,亦或者是检测到目标后云台无反应或者复位,希望能有大佬帮我检查下这个算法中的错误,求求了
Python端:
def detect_vid(self):
# pass
model = self.model
output_size = self.output_size
# source = self.img2predict # file/dir/URL/glob, 0 for webcam
imgsz = [640, 640] # inference size (pixels)
conf_thres = 0.25 # confidence threshold
iou_thres = 0.45 # NMS IOU threshold
max_det = 1000 # maximum detections per image
# device = self.device # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img = False # show results
save_txt = False # save results to *.txt
save_conf = False # save confidences in --save-txt labels
save_crop = False # save cropped prediction boxes
nosave = False # do not save images/videos
classes = None # filter by class: --class 0, or --class 0 2 3
agnostic_nms = False # class-agnostic NMS
augment = False # ugmented inference
visualize = False # visualize features
line_thickness = 3 # bounding box thickness (pixels)
hide_labels = False # hide labels
hide_conf = False # hide confidences
half = False # use FP16 half-precision inference
dnn = False # use OpenCV DNN for ONNX inference
source = str(self.vid_source)
webcam = self.webcam
device = select_device(self.device)
stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
imgsz = check_img_size(imgsz, s=stride) # check image size
save_img = not nosave and not source.endswith('.txt') # save inference images
# Dataloader
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
bs = len(dataset) # batch_size
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
bs = 1 # batch_size
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
if pt and device.type != 'cpu':
model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup
dt, seen = [0.0, 0.0, 0.0], 0
for path, im, im0s, vid_cap, s in dataset:
t1 = time_sync()
im = torch.from_numpy(im).to(device)
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time_sync()
dt[0] += t2 - t1
# Inference
# visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
t3 = time_sync()
dt[1] += t3 - t2
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
dt[2] += time_sync() - t3
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
# save_path = str(save_dir / p.name) # im.jpg
# txt_path = str(save_dir / 'labels' / p.stem) + (
# '' if dataset.mode == 'image' else f'_{frame}') # im.txt
s += '%gx%g ' % im.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Convert xyxy coordinates to corner points
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file,以TXT格式保存
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(
-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
# with open(txt_path + '.txt', 'a') as f:
# f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
annotator.box_label(xyxy, label, color=colors(c, True))
# if save_crop:
# save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg',
# BGR=True)
# Print time (inference-only)
LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
# Stream results
# Save results (image with detections)
im0 = annotator.result()
frame = im0
resize_scale = output_size / frame.shape[0]
frame_resized = cv2.resize(frame, (0, 0), fx=resize_scale, fy=resize_scale)
cv2.imwrite("images/tmp/single_result_vid.jpg", frame_resized)
self.vid_img.setPixmap(QPixmap("images/tmp/single_result_vid.jpg"))
# self.vid_img
# if view_img:
# cv2.imshow(str(p), im0)
# self.vid_img.setPixmap(QPixmap("images/tmp/single_result_vid.jpg"))
# cv2.waitKey(1) # 1 millisecond
if cv2.waitKey(25) & self.stopEvent.is_set() == True:
self.stopEvent.clear()
self.webcam_detection_btn.setEnabled(True)
self.mp4_detection_btn.setEnabled(True)
self.reset_vid()
break
# self.reset_vid()
def xywh_to_xyxy(det):
# det is a N x 4 array where each row is (x, y, w, h)
x, y, w, h = det[:, 0], det[:, 1], det[:, 2], det[:, 3]
# Calculate the corner points
x1 = x - w / 2
y1 = y - h / 2
x2 = x + w / 2
y2 = y + h / 2
# Stack the coordinates for each corner
corners = np.vstack([
np.hstack([x1.reshape(-1, 1), y1.reshape(-1, 1)]),
np.hstack([x2.reshape(-1, 1), y1.reshape(-1, 1)]),
np.hstack([x2.reshape(-1, 1), y2.reshape(-1, 1)]),
np.hstack([x1.reshape(-1, 1), y2.reshape(-1, 1)])
]).T
return corners
# Example usage
det = np.array([[100, 100, 50, 60], [150, 150, 40, 50]]) # Example detection array
corners = xywh_to_xyxy(det)
print(corners)
camera = cv2.VideoCapture(0)
# Arduino端口
serialPort = "COM3"
baudRate = 9600
ser = serial.Serial(serialPort, baudRate, timeout=0.5)
# 判断在哪个象限
def JudgeQuadrant(axis_x, axis_y):
distanceX = axis_x - 320;
distanceY = axis_y - 240;
if (distanceX < 0 and distanceY < 0):
return 2
if (distanceX > 0 and distanceY < 0):
return 1
if (distanceX < 0 and distanceY > 0):
return 3
if (distanceX > 0 and distanceY > 0):
return 4
# 返回与x轴的夹角
def computeAngle(axis_x, axis_y):
dx = abs(axis_x - 320);
dy = abs(axis_y - 240)
if dx == 0:
return
tanA = dy / dx;
angle1 = math.atan(tanA)
return math.degrees(angle1)
while camera.isOpened(): # 摄像头打开条件下
(grabbed, frame) = camera.read()
# camera.read()开启电脑默认摄像头,参数grabbed为True 或者False,代表有没有读取到图片 第二个参数frame表示截取到一帧的图片
marker = corners
# 对这一帧图片建框
print(marker)
if marker.all() == 0: # 无检测成果
# cv2.imshow("captureR", frame)
# cv2.destroyWindow("captureR")
# sendMsg("dis:%.0f" % (99999))
continue
##inches = distance_to_camera(KNOWN_WIDTH ,focalLength, marker[1][0])
# inches英寸
# sendMsg("dis:%.0f" % (inches))
box = cv2.boxPoints(marker)
# 获取矩形四个顶点,浮点型
box = np.int64(box)
# 储存为64位整数
# 画出轮廓
cv2.drawContours(frame, [box], -1, (0, 255, 0), 2)
# 在图像中某一位置显示文字
##cv2.putText(frame, "%.2fcm" % (inches),
##(frame.shape[1] - 600, frame.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX,
##2.0, (0, 255, 0), 3)
# cv2.destroyWindow("captureR")
# 输出图像,并将图像命名为frame
cv2.imshow("capture", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# camera.release()
cv2.destroyWindow("capture")
# client.close()
# (grabbed, frame) = camera.read()
# marker = find_marker(frame) # 获取物体轮廓,具体参考我的第一篇博客
# continue
box = cv2.boxPoints(marker)
# 获取矩形四个顶点,浮点型
box = np.int0(box)
# 取整
# 此轮廓中心 marker[0][0]为 x轴 marker[0][1]为y轴
JudgeQuadrant(marker[0][0], marker[0][1])
# 象限
Quadrant = JudgeQuadrant(marker[0][0], marker[0][1]);
# 角度
if computeAngle(marker[0][0], marker[0][1]) == None:
continue;
Angle = int(computeAngle(marker[0][0], marker[0][1]));
# 下面的舵机,如果与x轴的角度相差超过20度,则根据所在象限控制左右转动
if 90 - Angle > 10:
Quadrant = JudgeQuadrant(marker[0][0], marker[0][1])
ser.write(str.encode(str(Quadrant))
)
# 上面的舵机
Quadrant = JudgeQuadrant(marker[0][0], marker[0][1])
if Angle > 10 and (Quadrant == 1 or Quadrant == 2):
# 若是在1、2象限则控制上面的舵机向下转动
ser.write(str.encode(str(6)))
elif Angle > 10 and (Quadrant == 3 or Quadrant == 4):
# 若是在3、4象限则控制上面的舵机向上转动
ser.write(str.encode(str(5)))
def reset_vid(self):
self.webcam_detection_btn.setEnabled(True)
self.mp4_detection_btn.setEnabled(True)
self.vid_img.setPixmap(QPixmap("images/UI/up.jpeg"))
self.vid_source = '0'
self.webcam = True
'''
### 视频重置事件 ###
'''
def close_vid(self):
self.stopEvent.set()
self.reset_vid()
if __name__ == "__main__":
app = QApplication(sys.argv)
mainWindow = MainWindow()
mainWindow.show()
sys.exit(app.exec_())
arduino端:
#include <Servo.h>
char Quadrant;
int pos=0;
int pos2=0;
Servo myservo1;
Servo myservo2;
void setup() {
pinMode(11, OUTPUT);
Serial.begin(9600);
myservo1.attach(5,500,2500);
myservo2.attach(6,500,2500);
setMiddle();
delay(1000);
// put your setup code here, to run once:
}
void setMiddle(){
for(pos=0;pos<=90;pos+=1){
pos2+=1;
myservo1.write(pos);
myservo2.write(pos2);
delay(20);
}
}
//向下转
void toDown(){
pos2-=2;
if(pos2<0)return;
myservo2.write(pos2);
delay(10);
}
//向上转
void toUp(){
pos2+=2;
if(pos2>180)return;
myservo2.write(pos2);
delay(10);
}
//向左转
void toLeft(){
pos-=2;
if(pos<0)return;
myservo1.write(pos);
// delay(30);
}
//向右转
void toRight(){
pos+=2;
if(pos>180)return;
myservo1.write(pos);
// delay(30);
}
void loop() {
// while(!Serial)//检测是否有串口接通
// {}
if(Serial.available()>0) //判断是否具有输入
{
Quadrant=Serial.read(); //接收数据
switch(Quadrant){
case '1':
toLeft(); break;
case '2':
toRight(); break;
case '3':
toRight(); break;
case '4':
toLeft(); break;
case '5':
toDown(); break;
case '6':
toUp(); break;
default :
break;
}
}
}