weixin_54532612 2022-03-14 16:42 采纳率: 50%
浏览 141
已结题

mtcnn和facenet实现部分遮挡下的人脸识别

问题遇到的现象和发生背景
问题相关代码,请勿粘贴截图
运行结果及报错内容
我的解答思路和尝试过的方法
我想要达到的结果

对于这些代码我还不太清楚,希望可以帮我用中文详细注释一下代码,谢谢。
``

from facenet import MTCNN, InceptionResnetV1
from torchvision.transforms import functional as F
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from scipy.spatial.distance import canberra
from sklearn.preprocessing import Normalizer
import torch
import tkinter as tk
from tkinter.filedialog import askopenfilename
from PIL import Image, ImageTk

max_w_h = (700, 500)

def ShowImg(img, img_labels):
    rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
    show_img = Image.fromarray(rgba_img)
    show_img = ImageTk.PhotoImage(show_img)
    img_labels.config(image=show_img)
    img_labels.image = show_img

def Resize(img):
    h, w, _ = img.shape
    ratio = h / w
    if max_w_h[0] * ratio > max_w_h[1]:
        return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
    else:
        return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))

gpu = False
device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# loading the facenet model
facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Loading the mtcnn model
mtcnn = MTCNN(margin=30, device=device)
l2_encoder = Normalizer(norm='l2')

# load trained faces
data = np.load('data.npz')
trainx_embed, trainy = data['a'], data['b']

def calculate_distance(embedding, known_faces, known_labels):
    store = dict()
    for i in known_labels:
        if i not in store:
            store[i] = []
    for i in range(known_faces.shape[0]):
        store[known_labels[i]].append(canberra(embedding, known_faces[i]))
    for i in store.keys():
        store[i] = sum(store[i]) / len(store[i])
    dist = min(store.values())
    for i in store:
        if store[i] == dist:
            return (dist, i)

def run(imgp):
    try:
        img = Image.open(imgp)
        _, img_cropped, box = mtcnn(img)
        face_array = np.array(img_cropped)
        face_pixels = F.to_tensor(np.float32(face_array))
        face_pixels = (face_pixels - 127.5) / 128.0
        feas = facerec(face_pixels.unsqueeze(0).to(device))
        feas = feas.cpu().detach().numpy()
        distance, label = calculate_distance(feas, trainx_embed, trainy)
        print(label)
        # plot
        image = cv2.imread(imgp)
        if distance > 350:  # 368
            label = "UNKNOWN"
        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
        thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
        cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
        tf = max(thickness - 1, 1)  # font thickness
        w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0]  # text width, height
        outside = p1[1] - h - 3 >= 0  # label fits outside box
        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
                    thickness=tf, lineType=cv2.LINE_AA)
        data_img = Resize(image)
        ShowImg(data_img, img_l)
        img_l.update()
    except:
        print('Can not detect face!')

def select_img():
    path = askopenfilename()
    try:
        run(path)
    except:
        pass

if __name__ == '__main__':
    root = tk.Tk()
    root.title("Face Detection Demo")
    sc_width = root.winfo_screenwidth()
    sc_height = root.winfo_screenheight()
    WID = 600  # 窗口宽
    HEI = 600  # 窗口高
    root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
    type = ('宋体', "12", 'normal')

    up_frame = tk.Frame(root)
    tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)
    tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
    up_frame.pack(pady=1, anchor='n')

    img_frame = tk.Frame(root)
    img_l = tk.Label(img_frame)
    img_l.pack()
    img_frame.pack(pady=20)
    root.mainloop()



  • 写回答

2条回答 默认 最新

  • 关注

    对应代码块后面#写了注释

    from facenet import MTCNN, InceptionResnetV1             #导入各种需要用到的包
    from torchvision.transforms import functional as F
    import numpy as np
    import matplotlib.pyplot as plt
    import cv2
    from PIL import Image
    from scipy.spatial.distance import canberra
    from sklearn.preprocessing import Normalizer
    import torch
    import tkinter as tk
    from tkinter.filedialog import askopenfilename
    from PIL import Image, ImageTk
     
    max_w_h = (700, 500)
     
    def ShowImg(img, img_labels):      #定义展示图片的函数
        rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
        show_img = Image.fromarray(rgba_img)
        show_img = ImageTk.PhotoImage(show_img)
        img_labels.config(image=show_img)
        img_labels.image = show_img
     
    def Resize(img):                    #定义图片改变大小的函数
        h, w, _ = img.shape
        ratio = h / w
        if max_w_h[0] * ratio > max_w_h[1]:
            return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
        else:
            return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))
     
    gpu = False
    device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))     #如果gpu可用就用gpu,否则使用CPU
     
    # loading the facenet model
    facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)    #加载facenet模型
     
    # Loading the mtcnn model
    mtcnn = MTCNN(margin=30, device=device)     #加载mtcnn模型
    l2_encoder = Normalizer(norm='l2')
     
    # load trained faces
    data = np.load('data.npz')      #加载训练的数据
    trainx_embed, trainy = data['a'], data['b']
     
    def calculate_distance(embedding, known_faces, known_labels):  #定义计算距离的函数
        store = dict()
        for i in known_labels:
            if i not in store:
                store[i] = []
        for i in range(known_faces.shape[0]):
            store[known_labels[i]].append(canberra(embedding, known_faces[i]))
        for i in store.keys():
            store[i] = sum(store[i]) / len(store[i])
        dist = min(store.values())
        for i in store:
            if store[i] == dist:
                return (dist, i)
     
    def run(imgp):        #定义运行的过程
        try:
            img = Image.open(imgp)
            _, img_cropped, box = mtcnn(img)
            face_array = np.array(img_cropped)
            face_pixels = F.to_tensor(np.float32(face_array))
            face_pixels = (face_pixels - 127.5) / 128.0
            feas = facerec(face_pixels.unsqueeze(0).to(device))
            feas = feas.cpu().detach().numpy()
            distance, label = calculate_distance(feas, trainx_embed, trainy)
            print(label)
            # plot
            image = cv2.imread(imgp)
            if distance > 350:  # 368
                label = "UNKNOWN"
            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
            thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
            cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
            tf = max(thickness - 1, 1)  # font thickness
            w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0]  # text width, height
            outside = p1[1] - h - 3 >= 0  # label fits outside box
            cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
                        thickness=tf, lineType=cv2.LINE_AA)
            data_img = Resize(image)
            ShowImg(data_img, img_l)
            img_l.update()
        except:
            print('Can not detect face!')
     
    def select_img():     #定义选择图片的函数
        path = askopenfilename()
        try:
            run(path)
        except:
            pass
     
    if __name__ == '__main__':#主函数,程序入口
        root = tk.Tk()#创建tk窗体
        root.title("Face Detection Demo")#写好标题
        sc_width = root.winfo_screenwidth()#宽
        sc_height = root.winfo_screenheight()#高
        WID = 600  # 窗口宽
        HEI = 600  # 窗口高
        root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
        type = ('宋体', "12", 'normal')#字体
     
        up_frame = tk.Frame(root)
        tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)#创建按钮
        tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
        up_frame.pack(pady=1, anchor='n')
     
        img_frame = tk.Frame(root)
        img_l = tk.Label(img_frame)
        img_l.pack()
        img_frame.pack(pady=20)
        root.mainloop()
     
     
    
    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 3月21日
  • 已采纳回答 3月15日
  • 创建了问题 3月14日

悬赏问题

  • ¥15 对于这个复杂问题的解释说明
  • ¥50 三种调度算法报错 有实例
  • ¥15 关于#python#的问题,请各位专家解答!
  • ¥200 询问:python实现大地主题正反算的程序设计,有偿
  • ¥15 smptlib使用465端口发送邮件失败
  • ¥200 总是报错,能帮助用python实现程序实现高斯正反算吗?有偿
  • ¥15 对于squad数据集的基于bert模型的微调
  • ¥15 为什么我运行这个网络会出现以下报错?CRNN神经网络
  • ¥20 steam下载游戏占用内存
  • ¥15 CST保存项目时失败