问题遇到的现象和发生背景
问题相关代码,请勿粘贴截图
运行结果及报错内容
我的解答思路和尝试过的方法
我想要达到的结果
对于这些代码我还不太清楚,希望可以帮我用中文详细注释一下代码,谢谢。
``
from facenet import MTCNN, InceptionResnetV1
from torchvision.transforms import functional as F
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from scipy.spatial.distance import canberra
from sklearn.preprocessing import Normalizer
import torch
import tkinter as tk
from tkinter.filedialog import askopenfilename
from PIL import Image, ImageTk
max_w_h = (700, 500)
def ShowImg(img, img_labels):
rgba_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
show_img = Image.fromarray(rgba_img)
show_img = ImageTk.PhotoImage(show_img)
img_labels.config(image=show_img)
img_labels.image = show_img
def Resize(img):
h, w, _ = img.shape
ratio = h / w
if max_w_h[0] * ratio > max_w_h[1]:
return cv2.resize(img, (int(max_w_h[1] / ratio), int(max_w_h[1])))
else:
return cv2.resize(img, (int(max_w_h[0]), int(max_w_h[0] * ratio)))
gpu = False
device = torch.device('cuda:0' if gpu & torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))
# loading the facenet model
facerec = InceptionResnetV1(pretrained='vggface2').eval().to(device)
# Loading the mtcnn model
mtcnn = MTCNN(margin=30, device=device)
l2_encoder = Normalizer(norm='l2')
# load trained faces
data = np.load('data.npz')
trainx_embed, trainy = data['a'], data['b']
def calculate_distance(embedding, known_faces, known_labels):
store = dict()
for i in known_labels:
if i not in store:
store[i] = []
for i in range(known_faces.shape[0]):
store[known_labels[i]].append(canberra(embedding, known_faces[i]))
for i in store.keys():
store[i] = sum(store[i]) / len(store[i])
dist = min(store.values())
for i in store:
if store[i] == dist:
return (dist, i)
def run(imgp):
try:
img = Image.open(imgp)
_, img_cropped, box = mtcnn(img)
face_array = np.array(img_cropped)
face_pixels = F.to_tensor(np.float32(face_array))
face_pixels = (face_pixels - 127.5) / 128.0
feas = facerec(face_pixels.unsqueeze(0).to(device))
feas = feas.cpu().detach().numpy()
distance, label = calculate_distance(feas, trainx_embed, trainy)
print(label)
# plot
image = cv2.imread(imgp)
if distance > 350: # 368
label = "UNKNOWN"
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
thickness = max(round(sum(image.shape) / 2 * 0.003), 2)
cv2.rectangle(image, p1, p2, (0, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
tf = max(thickness - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=thickness / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h - 3 >= 0 # label fits outside box
cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, thickness / 3, (0, 255, 255),
thickness=tf, lineType=cv2.LINE_AA)
data_img = Resize(image)
ShowImg(data_img, img_l)
img_l.update()
except:
print('Can not detect face!')
def select_img():
path = askopenfilename()
try:
run(path)
except:
pass
if __name__ == '__main__':
root = tk.Tk()
root.title("Face Detection Demo")
sc_width = root.winfo_screenwidth()
sc_height = root.winfo_screenheight()
WID = 600 # 窗口宽
HEI = 600 # 窗口高
root.geometry('%dx%d+%d+%d' % (WID, HEI, (sc_width - WID) / 2, (sc_height - HEI) / 2 - 30))
type = ('宋体', "12", 'normal')
up_frame = tk.Frame(root)
tk.Button(up_frame, text='加载图片', command=select_img, font=type).pack(padx=1, side=tk.LEFT)
tk.Button(up_frame, text='退出系统', command=root.destroy, font=type).pack(padx=1, side=tk.LEFT)
up_frame.pack(pady=1, anchor='n')
img_frame = tk.Frame(root)
img_l = tk.Label(img_frame)
img_l.pack()
img_frame.pack(pady=20)
root.mainloop()