利用 MTCNN 对人脸图像进行预处理,从而得到人脸图像的 Landmark 文件。其中,我们选择五个人脸关键点,即“左眼,右眼,鼻尖,左嘴角,右嘴角”
这一步在很多论文里都有,但是具体是怎么处理生成的呢?
利用 MTCNN 对人脸图像进行预处理,从而得到人脸图像的 Landmark 文件。其中,我们选择五个人脸关键点,即“左眼,右眼,鼻尖,左嘴角,右嘴角”
这一步在很多论文里都有,但是具体是怎么处理生成的呢?
请参考一下代码:
import sys
sys.path.append('D:\\Anaconda2\\libs') #导入系统路径,以便cv2模块的导入
import numpy as np
import cv2
import os
import numpy.random as npr
from utils import IoU
stdsize = 48
# 标注txt文件路径,celebA原图路径
anno_file = "E:/face_alignment/data/CelebA/Anno/celebA_bbox_landmark.txt"
im_dir = "E:/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/"
# landmark样本的保存路径
pos_save_dir = str(stdsize) + "/landmark"
save_dir = "./" + str(stdsize)
# 创建文件夹函数
def mkr(dr):
if not os.path.exists(dr):
os.mkdir(dr)
mkr(save_dir)
mkr(pos_save_dir)
f1 = open(os.path.join(save_dir, 'landmark_' + str(stdsize) + '.txt'), 'w')
with open(anno_file, 'r') as f:
annotations = f.readlines()
num = len(annotations)
print "%d pics in total" % num
p_idx = 0 # positive
d_idx = 0 # dont care
idx = 0
box_idx = 0
for annotation in annotations:
# strip():去除annotations开头、结尾处空白符,split(' ')按空格进行切片
annotation = annotation.strip().split(' ')
im_path = annotation[0] # 图片名
bbox = map(float, annotation[1:-10]) # bbox 坐标
pts = map(float, annotation[-10:]) # landmark 坐标
boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) # 将bbox转化为矩阵,并将列resize为4
im_path = im_dir + im_path # 图片全路径
img = cv2.imread(im_path) # 读取图片
idx += 1
if idx % 100 == 0:
print idx, "images done"
height, width, channel = img.shape
backupPts = pts[:]
for box in boxes:
# box (x_left, y_top, x_right, y_bottom)
x1, y1, x2, y2 = box
w = x2 - x1 + 1
h = y2 - y1 + 1
# ignore small faces
# in case the ground truth boxes of small faces are not accurate
if max(w, h) < 12 or x1 < 0 or y1 < 0:
continue
# generate landmark examples and part faces
# 对bbox进行随机scale,offset,得到新的crop区域,即对样本进行扰动,做样本增强
for i in range(10):
pts = backupPts[:]
size = npr.randint(int(min(w, h) * 0.9), np.ceil(1.1 * max(w, h)))
# scale做(0.9~1.1)之间扰动,offser做(-0.1~0.1)之间扰动
# delta here is the offset of box center
delta_x = npr.randint(-w * 0.1, w * 0.1)
delta_y = npr.randint(-h * 0.1, h * 0.1)
nx1 = max(x1 + w / 2 + delta_x - size / 2, 0)
ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
nx2 = nx1 + size
ny2 = ny1 + size
if nx2 > width or ny2 > height:
continue
crop_box = np.array([nx1, ny1, nx2, ny2])
offset_x1 = (x1 - nx1) / float(size)
offset_y1 = (y1 - ny1) / float(size)
offset_x2 = (x2 - nx2) / float(size)
offset_y2 = (y2 - ny2) / float(size)
# 当且仅当五个landmark都在随机扰动生成的crop区域中时,才保持使用
if pts[0] < nx1 or pts[0] > nx2:
continue
if pts[2] < nx1 or pts[2] > nx2:
continue
if pts[4] < nx1 or pts[4] > nx2:
continue
if pts[6] < nx1 or pts[6] > nx2:
continue
if pts[8] < nx1 or pts[8] > nx2:
continue
if pts[1] < ny1 or pts[1] > ny2:
continue
if pts[3] < ny1 or pts[3] > ny2:
continue
if pts[5] < ny1 or pts[5] > ny2:
continue
if pts[7] < ny1 or pts[7] > ny2:
continue
if pts[9] < ny1 or pts[9] > ny2:
continue
ptss = pts[:]
# 将landmark转化为相对于左上角的归一化值
for k in range(len(ptss) / 2):
pts[k] = (ptss[k*2] - nx1) / float(size);
pts[k+5] = (ptss[k*2+1] - ny1) / float(size);
# 从原图中crop图片区域,并resize成stdsize大小
cropped_im = img[int(ny1) : int(ny2), int(nx1) : int(nx2), :]
resized_im = cv2.resize(cropped_im, (stdsize, stdsize), interpolation=cv2.INTER_LINEAR)
box_ = box.reshape(1, -1)
# 将图片名,bbox偏移量写入txt文本中
save_file = os.path.join(pos_save_dir, "%s.jpg"%p_idx)
f1.write(str(stdsize)+"/landmark/%s.jpg"%p_idx + ' -1 -1 -1 -1 -1')
# 将landmark坐标写入txt文件中
for k in range(len(pts)):
f1.write(" %f" % pts[k])
f1.write("\n")
cv2.imwrite(save_file, resized_im) # 保存图片
p_idx += 1
box_idx += 1
print "%s images done, pos: %s part: %s "%(idx, p_idx, d_idx)
f1.close()