weixin_54532612 2022-03-14 08:42 采纳率: 50%
浏览 146
已结题

mtcnn和facenet实现部分遮挡下的人脸识别

问题遇到的现象和发生背景
问题相关代码,请勿粘贴截图
运行结果及报错内容
我的解答思路和尝试过的方法
我想要达到的结果

对于这些代码我还不太清楚,希望可以帮我用中文详细注释一下代码,谢谢。
``

from facenet import MTCNN, InceptionResnetV1
from torchvision.transforms import functional as F
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from scipy.spatial.distance import canberra
from sklearn.preprocessing import Normalizer
import torch
import tkinter as tk
from tkinter.filedialog import askopenfilename
from PIL import Image, ImageTk

max_w_h = (700, 500)

def ShowImg(img, img_labels):
    rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
    show_img = Image.fromarray(rgba_img)
    show_img = ImageTk.PhotoImage(show_img)
    img_labels.config(image=show_img)
    img_labels.image = show_img

def Resize(img):
    h, w, _ = img.shape
    ratio = h / w
    if max_w_h[0] * ratio > max_w_h[1]:
        return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
    else:
        return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))

gpu = False
device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

# loading the facenet model
facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Loading the mtcnn model
mtcnn = MTCNN(margin=30, device=device)
l2_encoder = Normalizer(norm='l2')

# load trained faces
data = np.load('data.npz')
trainx_embed, trainy = data['a'], data['b']

def calculate_distance(embedding, known_faces, known_labels):
    store = dict()
    for i in known_labels:
        if i not in store:
            store[i] = []
    for i in range(known_faces.shape[0]):
        store[known_labels[i]].append(canberra(embedding, known_faces[i]))
    for i in store.keys():
        store[i] = sum(store[i]) / len(store[i])
    dist = min(store.values())
    for i in store:
        if store[i] == dist:
            return (dist, i)

def run(imgp):
    try:
        img = Image.open(imgp)
        _, img_cropped, box = mtcnn(img)
        face_array = np.array(img_cropped)
        face_pixels = F.to_tensor(np.float32(face_array))
        face_pixels = (face_pixels - 127.5) / 128.0
        feas = facerec(face_pixels.unsqueeze(0).to(device))
        feas = feas.cpu().detach().numpy()
        distance, label = calculate_distance(feas, trainx_embed, trainy)
        print(label)
        # plot
        image = cv2.imread(imgp)
        if distance > 350:  # 368
            label = "UNKNOWN"
        p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
        thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
        cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
        tf = max(thickness - 1, 1)  # font thickness
        w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0]  # text width, height
        outside = p1[1] - h - 3 >= 0  # label fits outside box
        cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
                    thickness=tf, lineType=cv2.LINE_AA)
        data_img = Resize(image)
        ShowImg(data_img, img_l)
        img_l.update()
    except:
        print('Can not detect face!')

def select_img():
    path = askopenfilename()
    try:
        run(path)
    except:
        pass

if __name__ == '__main__':
    root = tk.Tk()
    root.title("Face Detection Demo")
    sc_width = root.winfo_screenwidth()
    sc_height = root.winfo_screenheight()
    WID = 600  # 窗口宽
    HEI = 600  # 窗口高
    root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
    type = ('宋体', "12", 'normal')

    up_frame = tk.Frame(root)
    tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)
    tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
    up_frame.pack(pady=1, anchor='n')

    img_frame = tk.Frame(root)
    img_l = tk.Label(img_frame)
    img_l.pack()
    img_frame.pack(pady=20)
    root.mainloop()



展开全部

  • 写回答

2条回答 默认 最新

  • 关注

    对应代码块后面#写了注释

    from facenet import MTCNN, InceptionResnetV1             #导入各种需要用到的包
    from torchvision.transforms import functional as F
    import numpy as np
    import matplotlib.pyplot as plt
    import cv2
    from PIL import Image
    from scipy.spatial.distance import canberra
    from sklearn.preprocessing import Normalizer
    import torch
    import tkinter as tk
    from tkinter.filedialog import askopenfilename
    from PIL import Image, ImageTk
     
    max_w_h = (700, 500)
     
    def ShowImg(img, img_labels):      #定义展示图片的函数
        rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
        show_img = Image.fromarray(rgba_img)
        show_img = ImageTk.PhotoImage(show_img)
        img_labels.config(image=show_img)
        img_labels.image = show_img
     
    def Resize(img):                    #定义图片改变大小的函数
        h, w, _ = img.shape
        ratio = h / w
        if max_w_h[0] * ratio > max_w_h[1]:
            return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
        else:
            return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))
     
    gpu = False
    device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))     #如果gpu可用就用gpu,否则使用CPU
     
    # loading the facenet model
    facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)    #加载facenet模型
     
    # Loading the mtcnn model
    mtcnn = MTCNN(margin=30, device=device)     #加载mtcnn模型
    l2_encoder = Normalizer(norm='l2')
     
    # load trained faces
    data = np.load('data.npz')      #加载训练的数据
    trainx_embed, trainy = data['a'], data['b']
     
    def calculate_distance(embedding, known_faces, known_labels):  #定义计算距离的函数
        store = dict()
        for i in known_labels:
            if i not in store:
                store[i] = []
        for i in range(known_faces.shape[0]):
            store[known_labels[i]].append(canberra(embedding, known_faces[i]))
        for i in store.keys():
            store[i] = sum(store[i]) / len(store[i])
        dist = min(store.values())
        for i in store:
            if store[i] == dist:
                return (dist, i)
     
    def run(imgp):        #定义运行的过程
        try:
            img = Image.open(imgp)
            _, img_cropped, box = mtcnn(img)
            face_array = np.array(img_cropped)
            face_pixels = F.to_tensor(np.float32(face_array))
            face_pixels = (face_pixels - 127.5) / 128.0
            feas = facerec(face_pixels.unsqueeze(0).to(device))
            feas = feas.cpu().detach().numpy()
            distance, label = calculate_distance(feas, trainx_embed, trainy)
            print(label)
            # plot
            image = cv2.imread(imgp)
            if distance > 350:  # 368
                label = "UNKNOWN"
            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
            thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
            cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
            tf = max(thickness - 1, 1)  # font thickness
            w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0]  # text width, height
            outside = p1[1] - h - 3 >= 0  # label fits outside box
            cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
                        thickness=tf, lineType=cv2.LINE_AA)
            data_img = Resize(image)
            ShowImg(data_img, img_l)
            img_l.update()
        except:
            print('Can not detect face!')
     
    def select_img():     #定义选择图片的函数
        path = askopenfilename()
        try:
            run(path)
        except:
            pass
     
    if __name__ == '__main__':#主函数,程序入口
        root = tk.Tk()#创建tk窗体
        root.title("Face Detection Demo")#写好标题
        sc_width = root.winfo_screenwidth()#宽
        sc_height = root.winfo_screenheight()#高
        WID = 600  # 窗口宽
        HEI = 600  # 窗口高
        root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
        type = ('宋体', "12", 'normal')#字体
     
        up_frame = tk.Frame(root)
        tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)#创建按钮
        tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
        up_frame.pack(pady=1, anchor='n')
     
        img_frame = tk.Frame(root)
        img_l = tk.Label(img_frame)
        img_l.pack()
        img_frame.pack(pady=20)
        root.mainloop()
     
     
    

    展开全部

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

    如有帮助,请点击我评论上方【采纳该答案】按钮支持一下,谢谢!

    回复
    CSDN专家-深度学习进阶 回复 weixin_54532612 2022-03-15 01:30

    这里好像只是单纯计算,没看到具体的阈值

    回复
    CSDN专家-深度学习进阶 回复 weixin_54532612 2022-03-15 06:29

    建议你先从出处看看作者的介绍和相关的知识

    回复
    展开全部4条评论
查看更多回答(1条)
编辑
预览

报告相同问题?

问题事件

  • 已结题 (查看结题原因) 3月21日
  • 已采纳回答 3月15日
  • 创建了问题 3月14日
手机看
程序员都在用的中文IT技术交流社区

程序员都在用的中文IT技术交流社区

专业的中文 IT 技术社区,与千万技术人共成长

专业的中文 IT 技术社区,与千万技术人共成长

关注【CSDN】视频号,行业资讯、技术分享精彩不断,直播好礼送不停!

关注【CSDN】视频号,行业资讯、技术分享精彩不断,直播好礼送不停!

客服 返回
顶部