Commit a24be2ea authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

Clean-up

parent 7d5ee056
Pipeline #14162 passed with stages
in 15 minutes and 15 seconds
......@@ -17,11 +17,16 @@ from bob.ip.facedetect import BoundingBox
from .legacy import detect_face
from .utils import bob_to_dlib_image_convertion, rectangle2bounding_box2
class FaceDetector(object):
"""
Detects face and 5 landmarks using the MTCNN (https://github.com/kpzhang93/MTCNN_face_detection_alignment) from the paper.
Detects face and 5 landmarks using the MTCNN
(https://github.com/kpzhang93/MTCNN_face_detection_alignment) from the
paper.
Zhang, Kaipeng, et al. "Joint face detection and alignment using multitask cascaded convolutional networks." IEEE Signal Processing Letters 23.10 (2016): 1499-1503.
Zhang, Kaipeng, et al. "Joint face detection and alignment using multitask
cascaded convolutional networks." IEEE Signal Processing Letters 23.10
(2016): 1499-1503.
"""
......@@ -40,10 +45,15 @@ class FaceDetector(object):
# Loading the models
caffe.set_mode_cpu()
self.p_net = caffe.Net(os.path.join(caffe_base_path, "det1.prototxt"), os.path.join(caffe_base_path, "det1.caffemodel"), caffe.TEST)
self.r_net = caffe.Net(os.path.join(caffe_base_path, "det2.prototxt"), os.path.join(caffe_base_path, "det2.caffemodel"), caffe.TEST)
self.o_net = caffe.Net(os.path.join(caffe_base_path, "det3.prototxt"), os.path.join(caffe_base_path, "det3.caffemodel"), caffe.TEST)
self.p_net = caffe.Net(
os.path.join(caffe_base_path, "det1.prototxt"),
os.path.join(caffe_base_path, "det1.caffemodel"), caffe.TEST)
self.r_net = caffe.Net(
os.path.join(caffe_base_path, "det2.prototxt"),
os.path.join(caffe_base_path, "det2.caffemodel"), caffe.TEST)
self.o_net = caffe.Net(
os.path.join(caffe_base_path, "det3.prototxt"),
os.path.join(caffe_base_path, "det3.caffemodel"), caffe.TEST)
def _convert_list_to_landmarks(self, points):
"""
......@@ -51,38 +61,51 @@ class FaceDetector(object):
"""
landmarks = []
possible_landmarks = ['reye', 'leye', 'nose', 'mouthleft', 'mouthright']
possible_landmarks = ['reye', 'leye',
'nose', 'mouthleft', 'mouthright']
for i in range(points.shape[0]):
l = dict()
for offset, p in zip(range(len(possible_landmarks)), possible_landmarks):
l[p] = ( int(points[i][offset+5]), int(points[i][offset]) )
landmarks.append(l)
landmark = dict()
for offset, p in enumerate(possible_landmarks):
landmark[p] = (int(points[i][offset + 5]),
int(points[i][offset]))
landmarks.append(landmark)
return landmarks
def detect_all_faces(self, image, return_bob_bb = True):
def detect_all_faces(self, image, return_bob_bb=True):
"""
Detect all the faces with its respective landmarks, if any, in a COLORED image
**Parameters**
image: numpy array with color image [c, w, h]
return_bob_bb: if true will return the faces wrapped using py:class:`bob.ip.facedetect.BoundingBox`
**Returns**
Returns two lists; the first on contains the bounding boxes with the detected faces and the second one
contains list with the faces landmarks. The CNN returns 5 facial landmarks
(leye, reye, nose, mouthleft, mouthright). If there's no face, `None` will be returned
Detect all the faces with its respective landmarks, if any, in a
COLORED image
Parameters
----------
image : numpy.array
The color image [c, w, h]
return_bob_bb : bool, optional
If true, will return faces wrapped using
:any:`bob.ip.facedetect.BoundingBox`.
Returns
-------
object
Returns two lists; the first on contains the bounding boxes with
the detected faces and the second one contains list with the faces
landmarks. The CNN returns 5 facial landmarks (leye, reye, nose,
mouthleft, mouthright). If there's no face, `None` will be returned
Raises
------
ValueError
When image.ndim is not 3.
"""
assert image is not None
if len(image.shape) !=3:
if len(image.shape) != 3:
raise ValueError("Only color images is supported")
bb, landmarks = detect_face(bob_to_dlib_image_convertion(image), self.minsize, self.p_net, self.r_net, self.o_net, self.threshold, self.fastresize, self.factor)
bb, landmarks = detect_face(bob_to_dlib_image_convertion(
image), self.minsize, self.p_net, self.r_net, self.o_net, self.threshold, self.fastresize, self.factor)
# If there's no face, return none
if len(bb) == 0:
......@@ -93,18 +116,19 @@ class FaceDetector(object):
return bb, self._convert_list_to_landmarks(landmarks)
def detect_single_face(self, image):
"""
Returns the biggest face in a COLORED image, if any.
**Parameters**
image: numpy array with color image [c, w, h]
return_bob_bb: if true will return the faces wrapped using py:class:`bob.ip.facedetect.BoundingBox`
Parameters
----------
image : numpy.array
numpy array with color image [c, w, h]
**Returns**
The face bounding box and its respective 5 landmarks (leye, reye, nose, mouthleft, mouthright).
If there's no face, `None` will be returned
Returns
-------
The face bounding box and its respective 5 landmarks (leye, reye, nose,
mouthleft, mouthright). If there's no face, `None` will be returned
"""
......@@ -113,11 +137,12 @@ class FaceDetector(object):
if faces is None:
return None, None
index = numpy.argmax([(f.bottomright[0] - f.topleft[0]) * (f.bottomright[1] - f.topleft[1]) for f in faces])
index = numpy.argmax([(f.bottomright[0] - f.topleft[0])
* (f.bottomright[1] - f.topleft[1]) for f in faces])
return faces[index], landmarks[index]
def detect_crop_align(self, image, final_image_size=(160, 160) ):
def detect_crop_align(self, image, final_image_size=(160, 160)):
"""
Detects the biggest face and crop it based in the eyes location
using py:class:`bob.ip.base.FaceEyesNorm`.
......@@ -142,10 +167,13 @@ class FaceDetector(object):
CROPPED_IMAGE_HEIGHT = final_image_size[1]
# final image position w.r.t the image size
RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44, CROPPED_IMAGE_WIDTH / 3.02)
LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44 , CROPPED_IMAGE_WIDTH / 1.49)
RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44,
CROPPED_IMAGE_WIDTH / 3.02)
LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44,
CROPPED_IMAGE_WIDTH / 1.49)
extractor = bob.ip.base.FaceEyesNorm((CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), RIGHT_EYE_POS, LEFT_EYE_POS)
extractor = bob.ip.base.FaceEyesNorm(
(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), RIGHT_EYE_POS, LEFT_EYE_POS)
return extractor(image, landmark['reye'], landmark['leye'])
def detect_crop(self, image, final_image_size=(182, 182), margin=44):
......@@ -166,11 +194,13 @@ class FaceDetector(object):
if face is None:
return None
top = numpy.uint(numpy.maximum(face.top - margin/2, 0))
left = numpy.uint(numpy.maximum(face.left - margin/2, 0))
top = numpy.uint(numpy.maximum(face.top - margin / 2, 0))
left = numpy.uint(numpy.maximum(face.left - margin / 2, 0))
bottom = numpy.uint(numpy.minimum(face.bottom + margin/2, image.shape[1]))
right = numpy.uint(numpy.minimum(face.right + margin/2, image.shape[2]))
bottom = numpy.uint(numpy.minimum(
face.bottom + margin / 2, image.shape[1]))
right = numpy.uint(numpy.minimum(
face.right + margin / 2, image.shape[2]))
cropped = image[:, top:bottom, left:right]
......
......@@ -2,57 +2,56 @@
# Legacy code extracted from: https://github.com/DuinoDu/mtcnn
#######
from time import time
import caffe
import cv2
import numpy as np
import os
def bbreg(boundingbox, reg):
reg = reg.T
reg = reg.T
# calibrate bouding boxes
if reg.shape[1] == 1:
pass # reshape of reg
w = boundingbox[:,2] - boundingbox[:,0] + 1
h = boundingbox[:,3] - boundingbox[:,1] + 1
bb0 = boundingbox[:,0] + reg[:,0]*w
bb1 = boundingbox[:,1] + reg[:,1]*h
bb2 = boundingbox[:,2] + reg[:,2]*w
bb3 = boundingbox[:,3] + reg[:,3]*h
boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T
#print "bb", boundingbox
pass # reshape of reg
w = boundingbox[:, 2] - boundingbox[:, 0] + 1
h = boundingbox[:, 3] - boundingbox[:, 1] + 1
bb0 = boundingbox[:, 0] + reg[:, 0] * w
bb1 = boundingbox[:, 1] + reg[:, 1] * h
bb2 = boundingbox[:, 2] + reg[:, 2] * w
bb3 = boundingbox[:, 3] + reg[:, 3] * h
boundingbox[:, 0:4] = np.array([bb0, bb1, bb2, bb3]).T
# print "bb", boundingbox
return boundingbox
def pad(boxesA, w, h):
boxes = boxesA.copy() # shit, value parameter!!!
tmph = boxes[:,3] - boxes[:,1] + 1
tmpw = boxes[:,2] - boxes[:,0] + 1
boxes = boxesA.copy() # shit, value parameter!!!
tmph = boxes[:, 3] - boxes[:, 1] + 1
tmpw = boxes[:, 2] - boxes[:, 0] + 1
numbox = boxes.shape[0]
dx = np.ones(numbox)
dy = np.ones(numbox)
edx = tmpw
edx = tmpw
edy = tmph
x = boxes[:,0:1][:,0]
y = boxes[:,1:2][:,0]
ex = boxes[:,2:3][:,0]
ey = boxes[:,3:4][:,0]
x = boxes[:, 0:1][:, 0]
y = boxes[:, 1:2][:, 0]
ex = boxes[:, 2:3][:, 0]
ey = boxes[:, 3:4][:, 0]
tmp = np.where(ex > w)[0]
if tmp.shape[0] != 0:
edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]
ex[tmp] = w-1
edx[tmp] = -ex[tmp] + w - 1 + tmpw[tmp]
ex[tmp] = w - 1
tmp = np.where(ey > h)[0]
if tmp.shape[0] != 0:
edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]
ey[tmp] = h-1
edy[tmp] = -ey[tmp] + h - 1 + tmph[tmp]
ey[tmp] = h - 1
tmp = np.where(x < 1)[0]
if tmp.shape[0] != 0:
......@@ -63,30 +62,29 @@ def pad(boxesA, w, h):
if tmp.shape[0] != 0:
dy[tmp] = 2 - y[tmp]
y[tmp] = np.ones_like(y[tmp])
# for python index from 0, while matlab from 1
dy = np.maximum(0, dy-1)
dx = np.maximum(0, dx-1)
y = np.maximum(0, y-1)
x = np.maximum(0, x-1)
edy = np.maximum(0, edy-1)
edx = np.maximum(0, edx-1)
ey = np.maximum(0, ey-1)
ex = np.maximum(0, ex-1)
return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
dy = np.maximum(0, dy - 1)
dx = np.maximum(0, dx - 1)
y = np.maximum(0, y - 1)
x = np.maximum(0, x - 1)
edy = np.maximum(0, edy - 1)
edx = np.maximum(0, edx - 1)
ey = np.maximum(0, ey - 1)
ex = np.maximum(0, ex - 1)
return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
def rerec(bboxA):
# convert bboxA to square
w = bboxA[:,2] - bboxA[:,0]
h = bboxA[:,3] - bboxA[:,1]
l = np.maximum(w,h).T
bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5
bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5
bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T
w = bboxA[:, 2] - bboxA[:, 0]
h = bboxA[:, 3] - bboxA[:, 1]
max_length = np.maximum(w, h).T
bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - max_length * 0.5
bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - max_length * 0.5
bboxA[:, 2:4] = bboxA[:, 0:2] + np.repeat([max_length], 2, axis=0).T
return bboxA
......@@ -99,15 +97,15 @@ def nms(boxes, threshold, type):
"""
if boxes.shape[0] == 0:
return np.array([])
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
s = boxes[:,4]
area = np.multiply(x2-x1+1, y2-y1+1)
I = np.array(s.argsort()) # read s using I
pick = [];
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
s = boxes[:, 4]
area = np.multiply(x2 - x1 + 1, y2 - y1 + 1)
I = np.array(s.argsort()) # read s using I
pick = []
while len(I) > 0:
xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
......@@ -121,7 +119,7 @@ def nms(boxes, threshold, type):
else:
o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
pick.append(I[-1])
I = I[np.where( o <= threshold)[0]]
I = I[np.where(o <= threshold)[0]]
return pick
......@@ -129,96 +127,101 @@ def generateBoundingBox(map, reg, scale, t):
stride = 2
cellsize = 12
map = map.T
dx1 = reg[0,:,:].T
dy1 = reg[1,:,:].T
dx2 = reg[2,:,:].T
dy2 = reg[3,:,:].T
dx1 = reg[0, :, :].T
dy1 = reg[1, :, :].T
dx2 = reg[2, :, :].T
dy2 = reg[3, :, :].T
(x, y) = np.where(map >= t)
yy = y
xx = x
score = map[x,y]
reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])
score = map[x, y]
reg = np.array([dx1[x, y], dy1[x, y], dx2[x, y], dy2[x, y]])
if reg.shape[0] == 0:
pass
boundingbox = np.array([yy, xx]).T
bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"
bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to
# matlab index from 1, so with "boundingbox-1"
bb1 = np.fix((stride * (boundingbox) + 1) / scale).T
bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) /
scale).T # while python don't have to
score = np.array([score])
boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)
return boundingbox_out.T
def drawBoxes(im, boxes):
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
for i in range(x1.shape[0]):
cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)
cv2.rectangle(im, (int(x1[i]), int(y1[i])),
(int(x2[i]), int(y2[i])), (0, 255, 0), 1)
return im
from time import time
_tstart_stack = []
def tic():
_tstart_stack.append(time())
def toc(fmt="Elapsed: %s s"):
print(fmt % (time()-_tstart_stack.pop()))
print(fmt % (time() - _tstart_stack.pop()))
def detect_face(img, minsize, PNet, RNet, ONet, threshold, fastresize, factor):
img2 = img.copy()
factor_count = 0
total_boxes = np.zeros((0,9), np.float)
total_boxes = np.zeros((0, 9), np.float)
points = []
h = img.shape[0]
w = img.shape[1]
minl = min(h, w)
img = img.astype(float)
m = 12.0/minsize
minl = minl*m
m = 12.0 / minsize
minl = minl * m
# create scale pyramid
scales = []
while minl >= 12:
scales.append(m * pow(factor, factor_count))
minl *= factor
factor_count += 1
# first stage
#print("#############################")
# print("#############################")
for scale in scales:
hs = int(np.ceil(h*scale))
ws = int(np.ceil(w*scale))
hs = int(np.ceil(h * scale))
ws = int(np.ceil(w * scale))
if fastresize:
im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]
im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear
else:
im_data = cv2.resize(img, (ws,hs)) # default is bilinear
im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]
#im_data = imResample(img, hs, ws); print "scale:", scale
im_data = (img - 127.5) * 0.0078125 # [0,255] -> [-1,1]
im_data = cv2.resize(im_data, (ws, hs)) # default is bilinear
else:
im_data = cv2.resize(img, (ws, hs)) # default is bilinear
im_data = (im_data - 127.5) * 0.0078125 # [0,255] -> [-1,1]
# im_data = imResample(img, hs, ws); print "scale:", scale
im_data = np.swapaxes(im_data, 0, 2)
im_data = np.array([im_data], dtype = np.float)
#print(im_data.shape)
im_data = np.array([im_data], dtype=np.float)
# print(im_data.shape)
PNet.blobs['data'].reshape(1, 3, ws, hs)
PNet.blobs['data'].data[...] = im_data
out = PNet.forward()
boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])
boxes = generateBoundingBox(
out['prob1'][0, 1, :, :], out['conv4-2'][0], scale, threshold[0])
if boxes.shape[0] != 0:
pick = nms(boxes, 0.5, 'Union')
if len(pick) > 0 :
if len(pick) > 0:
boxes = boxes[pick, :]
if boxes.shape[0] != 0:
......@@ -229,19 +232,19 @@ def detect_face(img, minsize, PNet, RNet, ONet, threshold, fastresize, factor):
# nms
pick = nms(total_boxes, 0.7, 'Union')
total_boxes = total_boxes[pick, :]
# revise and convert to square
regh = total_boxes[:,3] - total_boxes[:,1]
regw = total_boxes[:,2] - total_boxes[:,0]
t1 = total_boxes[:,0] + total_boxes[:,5]*regw
t2 = total_boxes[:,1] + total_boxes[:,6]*regh
t3 = total_boxes[:,2] + total_boxes[:,7]*regw
t4 = total_boxes[:,3] + total_boxes[:,8]*regh
t5 = total_boxes[:,4]
total_boxes = np.array([t1,t2,t3,t4,t5]).T
total_boxes = rerec(total_boxes) # convert box to square
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])
regh = total_boxes[:, 3] - total_boxes[:, 1]
regw = total_boxes[:, 2] - total_boxes[:, 0]
t1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
t2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
t3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
t4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
t5 = total_boxes[:, 4]
total_boxes = np.array([t1, t2, t3, t4, t5]).T
total_boxes = rerec(total_boxes) # convert box to square
total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4])
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
......@@ -250,92 +253,105 @@ def detect_face(img, minsize, PNet, RNet, ONet, threshold, fastresize, factor):
# second stage
# construct input for RNet
tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
for k in range(numbox):
tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))
tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]
tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))
tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python
tmp = np.zeros((int(tmph[k]) + 1, int(tmpw[k]) + 1, 3))
tmp[int(dy[k]):int(edy[k]) + 1, int(dx[k]):int(edx[k]) +
1] = img[int(y[k]):int(ey[k]) + 1, int(x[k]):int(ex[k]) + 1]
tempimg[k, :, :, :] = cv2.resize(tmp, (24, 24))
# done in imResample function wrapped by python
tempimg = (tempimg - 127.5) * 0.0078125
# RNet
tempimg = np.swapaxes(tempimg, 1, 3)
RNet.blobs['data'].reshape(numbox, 3, 24, 24)
RNet.blobs['data'].data[...] = tempimg
out = RNet.forward()
score = out['prob1'][:,1]
pass_t = np.where(score>threshold[1])[0]
score = np.array([score[pass_t]]).T
total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)
score = out['prob1'][:, 1]
pass_t = np.where(score > threshold[1])[0]
score = np.array([score[pass_t]]).T
total_boxes = np.concatenate((total_boxes[pass_t, 0:4], score), axis=1)
mv = out['conv5-2'][pass_t, :].T
if total_boxes.shape[0] > 0:
pick = nms(total_boxes, 0.7, 'Union')
if len(pick) > 0 :
if len(pick) > 0:
total_boxes = total_boxes[pick, :]
total_boxes = bbreg(total_boxes, mv[:, pick])
total_boxes = rerec(total_boxes)
numbox = total_boxes.shape[0]
if numbox > 0:
# third stage
total_boxes = np.fix(total_boxes)
[dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
[dy, edy, dx, edx, y, ey, x, ex, tmpw,
tmph] = pad(total_boxes, w, h)
tempimg = np.zeros((numbox, 48, 48, 3))
for k in range(numbox):
tmp = np.zeros((int(tmph[k]), int(tmpw[k]),3))