detect.py
# YOLOv5 � by Ultralytics, AGPL-3.0 license
"""
Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
Usage - sources:
$ python detect.py --weights yolov5s.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
list.streams # list of streams
'path/*.jpg' # glob
'https://youtu.be/Zgi9g1ksQHc' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s_openvino_model # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
yolov5s_paddle_model # PaddlePaddle
LoadScreenshots这个函数是用于屏幕截图的
utils/plots.py 坐标以及画图函数box
"""
import mouse
import keyboard
import argparse
import os
import platform
import sys
from pathlib import Path
import torch
import win32con
import win32gui
import pynput
from pynput import mouse
import mouse_test.test_mouse
import win32api
import pynput
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QPainter, QColor, QPen
from PyQt5.QtWidgets import QApplication, QWidget
#创建鼠标移动类
move_mouse=mouse_test.test_mouse
# 加载鼠标控制
mouse_controller = pynput.mouse.Controller()
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
from utils.plots import Annotator, colors, save_one_box
from utils.torch_utils import select_device, smart_inference_mode
@smart_inference_mode()
def run(
weights=ROOT / 'yolov5s.pt', # model path or triton URL
source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
data=ROOT / 'data/coco128.yaml', # dataset.yaml path
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=True, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / 'runs/detect', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labelsp
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
):
source = str(source)
save_img = not nosave and not source.endswith('.txt') # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)
screenshot = source.lower().startswith('screen')
if is_url and is_file:
source = check_file(source) # download
if keyboard.is_pressed('p' or 'P'):
raise SystemExit("程序已退出")
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
if webcam:
view_img = check_imshow(warn=True)
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
bs = len(dataset)
elif screenshot:
dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
for path, im, im0s, vid_cap, s in dataset:
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred = model(im, augment=augment, visualize=visualize)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f'{i}: '
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
#print("qqqqqqqqqqqqqqqqqqqqqqqqqq")
s += '%gx%g ' % im.shape[2:] # print string
#print("qqqqqqqqqqqqqqqqqqqqqqqqqq")
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, 5].unique():
n = (det[:, 5] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# s为 screen 0 (LTWH): 480,270,960,540: 384x640 1 body, 1 head,这个输出信息
# Write results
for *xyxy, conf, cls in reversed(det):
xywh_list = []
# print(cls)
c = int(cls)
print(names[c])
# names[c]这个东西可以输出当前的识别的东西必须使用int(cls)
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
#xywh_list.append(xywh)
######################################################################
# if LOCK_AIM:
# mouse_aim_controller(xywh_list, mouse_controller,left, top, width, height)
#这里可以写瞄准的部位
#
# if(names[c]=="body"):
# continue
if (names[c] == "head"):
continue
print(xywh)
xywh_list.append(xywh)
mouse_aim_controller(xywh_list, mouse_controller)
######################################################################
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
# with open(f'{txt_path}.txt', 'a') as f:
# f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
annotator.box_label(xyxy, label, color=colors(c, True))
if save_crop:
save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
# Stream results
im0 = annotator.result()
# 创建一个具有特定名称的窗口
# cv2.namedWindow("tes", cv2.WINDOW_NORMAL)
# 设置窗口属性为置顶
#hwnd = cv2.getWindowProperty("tes", cv2.WND_PROP_FULLSCREEN)
##########################################################################################################################
# 原始图像的宽度和高度
original_width = im0.shape[1]
original_height = im0.shape[0]
# 设置目标宽度和高度
target_width = 480
target_height = 270
# 计算宽度和高度的缩放因子
width_scale = target_width / original_width
height_scale = target_height / original_height
# 选择较小的缩放因子,以确保图像完整显示在窗口中
scale_factor = min(width_scale, height_scale)
# 计算缩放后的宽度和高度
scaled_width = int(original_width * scale_factor)
scaled_height = int(original_height * scale_factor)
# 缩放图像
scaled_image = cv2.resize(im0, (scaled_width, scaled_height))
# 显示缩放后的图像
cv2.imshow("tes", scaled_image )
hwnd = win32gui.FindWindow(None, "tes")
#hwnd = cv2.getWindowProperty("tes", cv2.WND_PROP_FULLSCREEN)
# 设置窗口属性为置顶
win32gui.SetWindowPos(hwnd, win32con.HWND_TOPMOST, 0, 0, 0, 0,
win32con.SWP_NOMOVE | win32con.SWP_NOSIZE)
k=cv2.waitKey(1) # 1 millisecond
if k % 256 == 27: # ESC
cv2.destroyAllWindows()
listener.join()
exit('ESC ...')
if view_img:
if platform.system() == 'Linux' and p not in windows:
windows.append(p)
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
###############################################################################################################
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
else: # 'video' or 'stream'
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
#这个不注释,就会保存视频文件
# vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
# vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
# Print results
t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
def mouse_aim_controller(xywh_list,mouse):
left=left1
top=top1
width=width1
height=height1
print(height)
# 获取鼠标相对于屏幕的XY坐标
mouse_x, mouse_y = mouse.position
# 能获取到检测区域的大小以及位置
best_xy = None
for xywh in xywh_list:
x, y, _, _ = xywh
print(y)
# 还原相对于监测区域的 x y
x *= width
y *= height
# 转换坐标系,使得坐标系一致,统一为相对于屏幕的 x y 值
x += left
y += top
dist = ((x - mouse_x) ** 2 + (y - mouse_y) ** 2) ** .5
if not best_xy:
best_xy = ((x, y), dist)
else:
_, old_dist = best_xy
if dist < old_dist:
best_xy = ((x, y), dist)
x, y = best_xy[0]
sub_x, sub_y = x - mouse_x, y - mouse_y
move_mouse.mouse_xy(sub_x, sub_y)
def parse_opt():
global left1
global top1
global width1
global height1
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'CF.pt', help='model path or triton URL')
parser.add_argument('--source', type=str, default=ROOT / 'screen', help='file/dir/URL/glob/screen/0(webcam)')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
#parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
# 解析命令行参数
args = parser.parse_args()
if args.source:
source_params = args.source.split(" ")[1:] # 去掉第一个元素 "screen"
print(args.source)
left1=float(source_params[0])
top1=float(source_params[1])
width1=float(source_params[2])
height1=float(source_params[3])
else:
print("未提供 source 参数")
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
check_requirements(ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
run(**vars(opt))
def on_click(x, y, button, pressed):
global LOCK_AIM
if button == button.x1:
if pressed:
LOCK_AIM = not LOCK_AIM
print('自瞄状态: ', f"[{LOCK_AIM and '开' or '关'}]")
if __name__ == '__main__':
#LOCK_AIM = False
left1 = None
top1 = None
width1 = None
height1 = None
listener = mouse.Listener(on_click=on_click)
listener.start()
opt = parse_opt()
main(opt)
ghub_mouve.py
import ctypes
import os
import time
import pyautogui
import pynput
import winsound
import threading
from pynput import mouse
mouse_left_click = False
mouse_right_click = False
from ctypes import CDLL
mouses = mouse.Controller()
try:
print("正在加载罗技驱动")
driver = ctypes.CDLL(r'Z:\\HFDesktop\\ZFQ\\014-奈斯网咖\\桌面\\YoloV5-CfAim-master\\test1\\GHUB_MOUVE\\Ghub64.dll')
print("驱动加载完成,快去奔放")
except FileNotFoundError:
print("驱动调用失败,请检查原因")
#鼠标按钮按下的回调函数
# try:
# root = os.path.abspath(os.path.dirname(__file__))
# driver = ctypes.CDLL(f'{root}/mouse.dll')
# print('罗技驱动正在加载')
# ok = driver.device_open() == 1
# if not ok:
# print('Error, GHUB or LGS driver not found')
# except FileNotFoundError:
# print(f'Error, DLL file not found')
def on_click(x, y, button, pressed):
global mouse_left_click, mouse_right_click
if pressed:
if button == mouse.Button.left:
mouse_left_click = True
print("左键按下")
elif button == mouse.Button.right:
mouse_right_click = True
print("右键按下")
else:
mouse_left_click = False
mouse_right_click = False
print("按键松开")
class mouse_test:
def release(key):
if key == pynput.keyboard.Key.end: # 结束程序 End 键
winsound.Beep(400, 200)
return False
elif key == pynput.keyboard.Key.home: # 移动鼠标 Home 键
winsound.Beep(600, 200)
# 绝对平滑移动num_steps越大移动慢,delay为睡眠时间和前面同理
def linear_interpolation(self,x_end, y_end, num_steps, delay):
start_x, start_y = pyautogui.position()
dx = (x_end - start_x) / num_steps
dy = (y_end - start_y) / num_steps
for i in range(1, num_steps + 1):
next_x = int(start_x + dx * i)
next_y = int(start_y + dy * i)
driver.move_Abs(int(next_x), int(next_y))
time.sleep(delay)
# 相对平滑移动num_steps越大移动慢,delay为睡眠时间和前面同理
def r_linear_interpolation(self,r_x, r_y, num_steps, delay):
r_y = 0 - r_y
dx = r_x / num_steps
dy = r_y / num_steps
for i in range(1, num_steps + 1):
next_x, next_y = (dx), (dy)
driver.move_R(int(next_x), int(next_y))
time.sleep(delay)
def jiance(self):
# 创建鼠标监听器
listener = mouse.Listener(on_click=on_click)
# 启动监听器
listener.start()
#保持主线程运行,以便监听鼠标事件
# try:
# while True:
# pass
# except KeyboardInterrupt:
#listener.stop()
# 用户按下 Ctrl+C止程序时,停止监听器终
@staticmethod
def move(x, y):
if not ok:
return
if x == 0 and y == 0:
return
driver.moveR(x, y, True)
def mouse_aim_controller(self,xywh_list, left, top, width, height):
# 获取鼠标相对于屏幕的XY坐标
mouse_x, mouse_y = mouse.position
# 能获取到检测区域的大小以及位置
best_xy = None
for xywh in xywh_list:
x, y, _, _ = xywh
# 还原相对于监测区域的 x y
x *= width
y *= height
# 转换坐标系,使得坐标系一致,统一为相对于屏幕的 x y 值
x += left
y += top
dist = ((x - mouse_x) ** 2 + (y - mouse_y) ** 2) ** .5
if not best_xy:
best_xy = ((x, y), dist)
else:
_, old_dist = best_xy
if dist < old_dist:
best_xy = ((x, y), dist)
x, y = best_xy[0]
sub_x, sub_y = x - mouse_x, y - mouse_y
self.move(sub_x,sub_y)
test=mouse_test()
# # 创建两个线程来执行鼠标事件检测函数
# thread1 = threading.Thread(target=test.jiance)
# # thread2 = threading.Thread(target=test.linear_interpolation(30,30, num_steps=10, delay=0.01))
#
# # 启动两个线程
# thread1.start()
# # thread2.start()
# test.jiance()
# test.linear_interpolation(30,30, num_steps=10, delay=0.01)
# driver.click_Right_down();
esp.py
import sys
import time
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QPainter, QColor, QPen
from PyQt5.QtWidgets import QApplication, QWidget
class OverlayWindow(QWidget):
def __init__(self):
super().__init__()
self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
self.setAttribute(Qt.WA_TranslucentBackground)
self.x = 0
self.y = 0
self.w = 0
self.h = 0
def set_rect(self, x, y, w, h):
self.x = x
self.y = y
self.w = w
self.h = h
self.update()
def paintEvent(self, event):
painter = QPainter(self)
painter.setRenderHint(QPainter.Antialiasing)
pen = QPen()
pen.setWidth(2)
pen.setColor(QColor("#00FF00"))
painter.setPen(pen)
painter.drawRect(self.x, self.y, self.w, self.h)
if __name__ == "__main__":
app = QApplication(sys.argv)
# 创建 OverlayWindow 对象
overlay_window = OverlayWindow()
def update_rect(x, y, w, h):
overlay_window.set_rect(x, y, w, h)
overlay_window.show()
# 示例代码:传入动态的矩形参数
x = 50
y = 50
w = 200
h = 200
# 调用 update_rect 函数更新矩形显示
update_rect(x, y, w, h)
sys.exit(app.exec_())
mouse_test.py
import csv
import os
import sys
import time
import pyautogui
from pathlib import Path
from pynput import mouse
mouse_left_click = False
mouse_right_click = False
mouses = mouse.Controller()
# mouses_test=mouses.click()
def xywh(box):
# 将第一个张量转换为Python数字
x = box[0].item()
y = box[1].item()
w = box[2].item()
h = box[3].item()
# 计算框的中心点坐标
w_half = (w - x) / 2
h_half = (h - y) / 2
# 计算目标位置的坐标
x1 = x + w_half + 480
y1 = y + h_half + 270
# 是否设置安全移动,防止死循环
pyautogui.FAILSAFE = False
# 获取当前鼠标位置
current_position = pyautogui.position()
# 移动鼠标到目标位置
pyautogui.moveTo(x1, y1)
# 这是鼠标移动以及单机
def on_click(x, y, button, pressed):
global mouse_left_click, mouse_right_click
if pressed:
if button == mouse.Button.left:
# 按下鼠标左键
mouse_left_click = True
print("调用b1")
elif button == mouse.Button.right:
# 按下鼠标右键
mouse_right_click = True
print("调用b2")
else:
# 无论鼠标哪一个键松开,都会执行下面的东西
mouse_left_click = False
print("调用a1")
mouse_right_click = False
# a.move(100,100)
print("调用a2")
# 获取鼠标当前的位置
current_position = pyautogui.position()
# 输出鼠标当前的位置
print("鼠标当前位置:", current_position)
def on_move(self, x, y):
print('鼠标移动至坐标:({0}, {1})'.format(x, y))
for i in range(0,3):
on_click(1, 1, mouse.Button.left, False)
time.sleep(1)