diff --git a/bob/ip/tensorflow_extractor/MTCNN.py b/bob/ip/tensorflow_extractor/MTCNN.py index 1e21778714e17bf259c8b9180b3d5f24eec339fe..02fb3423bf3e65b34125b599826fdab3625d0eff 100644 --- a/bob/ip/tensorflow_extractor/MTCNN.py +++ b/bob/ip/tensorflow_extractor/MTCNN.py @@ -3,12 +3,14 @@ import pkg_resources import tensorflow as tf import multiprocessing import bob.io.image +from bob.ip.facedetect import BoundingBox +import numpy MODEL_PATH = pkg_resources.resource_filename(__name__, "data/mtcnn/mtcnn.pb") -class MTCNN: +class FaceDetector: """MTCNN v1 wrapper. See https://kpzhang93.github.io/MTCNN_face_detection_alignment/index.html for more @@ -48,6 +50,130 @@ class MTCNN: ) self.sess = tf.Session(graph=graph, config=config) + + def rectangle2bounding_box2(self,raw_bounding_boxes): + """ + Converts a bob.ip.facedetect.BoundingBox + """ + + def convert_one(bb): + assert len(bb)==4 # TODO: see why it was 5, probability + # topleft = (bb[1], bb[0]) + topleft = (numpy.max([0, bb[0]]), numpy.max([0, bb[1]])) + size = (bb[2] - bb[0], bb[3] - bb[1]) + return BoundingBox(topleft, size) + + # If it is only one + if raw_bounding_boxes.ndim == 1: + return convert_one(raw_bounding_boxes) + else: + bounding_boxes = [] + for b in raw_bounding_boxes: + bounding_boxes.append(convert_one(b)) + + return bounding_boxes + + + def _convert_list_to_landmarks(self, points): + """ + Convert the list to 10 landmarks to a dictionary with the points + """ + + landmarks = [] + possible_landmarks = ['reye', 'leye', + 'nose', 'mouthleft', 'mouthright'] + for i in range(points.shape[0]): + landmark = dict() + for offset, p in enumerate(possible_landmarks): + landmark[p] = (int(points[i][offset + 5]), + int(points[i][offset])) + landmarks.append(landmark) + + return landmarks + + def detect_all_faces(self,img, return_bob_bb=True): + """ + Detect all the faces with its respective landmarks, if any, in a + COLORED image + + Parameters + ---------- + image : numpy.array + The color image [c, w, h] + return_bob_bb : bool, optional + If true, will return faces wrapped using + :any:`bob.ip.facedetect.BoundingBox`. + + Returns + ------- + object + Returns two lists; the first on contains the bounding boxes with + the detected faces and the second one contains list with the faces + landmarks. The CNN returns 5 facial landmarks (leye, reye, nose, + mouthleft, mouthright). If there's no face, `None` will be returned + + Raises + ------ + ValueError + When image.ndim is not 3. + + """ + # faces , bounding boxes + + array_boxes, array_probs, array_landmarks=self.detect(img) + + # print('array_boxes',type(array_boxes), array_boxes.shape) + + # print('array_landmarks',type(array_landmarks), array_landmarks.shape) + + # faces=[] + # landmarks=[] + + # for i,lm in zip(list_boxes,list_landmarks): + + # top, left, bottom, right=i + + + # faces.append(BoundingBox((top, left),(bottom-top, right-left))) + # landmarks.append(lm) + + if len(array_boxes) == 0: + return None, None + + # from bob.ip.mtcnn + bb=array_boxes.copy() + if return_bob_bb: + bb = self.rectangle2bounding_box2(bb) + return bb, self._convert_list_to_landmarks(array_landmarks) + + # return faces,landmarks + + def detect_single_face(self, image): + """ + Returns the biggest face in a COLORED image, if any. + + Parameters + ---------- + image : numpy.array + numpy array with color image [c, w, h] + + Returns + ------- + The face bounding box and its respective 5 landmarks (leye, reye, nose, + mouthleft, mouthright). If there's no face, `None` will be returned + + """ + + faces, landmarks = self.detect_all_faces(image) + # Return None if + if faces is None: + return None, None + + index = numpy.argmax([(f.bottomright[0] - f.topleft[0]) + * (f.bottomright[1] - f.topleft[1]) for f in faces]) + + return faces[index], landmarks[index] + def detect(self, img): """Detects all faces in the image. @@ -80,6 +206,72 @@ class MTCNN: prob, landmarks, box = self.sess.run(fetches, feeds) return box, prob, landmarks + def detect_crop_align(self, image, final_image_size=(160, 160)): + """ + Detects the biggest face and crop it based in the eyes location + using py:class:`bob.ip.base.FaceEyesNorm`. + + Final eyes location was inspired here: https://gitlab.idiap.ch/bob/bob.bio.caffe_face/blob/master/bob/bio/caffe_face/config/preprocessor/vgg_preprocessor.py + + **Parameters** + image: numpy array with color image [c, w, h] + final_image_size: Image dimensions [w, h] + + **Returns** + The cropped image. If there's no face, `None` will be returned + + """ + + face, landmark = self.detect_single_face(image) + + if face is None: + return None + + CROPPED_IMAGE_WIDTH = final_image_size[0] + CROPPED_IMAGE_HEIGHT = final_image_size[1] + + # final image position w.r.t the image size + RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44, + CROPPED_IMAGE_WIDTH / 3.02) + LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT / 3.44, + CROPPED_IMAGE_WIDTH / 1.49) + + extractor = bob.ip.base.FaceEyesNorm( + (CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), RIGHT_EYE_POS, LEFT_EYE_POS) + return extractor(image, landmark['reye'], landmark['leye']) + + def detect_crop(self, image, final_image_size=(182, 182), margin=44): + """ + Detects the biggest face and crop it + + **Parameters** + image: numpy array with color image [c, w, h] + final_image_size: Image dimensions [w, h] + + **Returns** + The cropped image. If there's no face, `None` will be returned + + """ + + face, landmark = self.detect_single_face(image) + + if face is None: + return None + + top = numpy.uint(numpy.maximum(face.top - margin / 2, 0)) + left = numpy.uint(numpy.maximum(face.left - margin / 2, 0)) + + bottom = numpy.uint(numpy.minimum( + face.bottom + margin / 2, image.shape[1])) + right = numpy.uint(numpy.minimum( + face.right + margin / 2, image.shape[2])) + + cropped = image[:, top:bottom, left:right] + + dst = numpy.zeros(shape=(3, final_image_size[0], final_image_size[1])) + bob.ip.base.scale(cropped, dst) + return dst + def annotations(self, img): """Detects all faces in the image diff --git a/bob/ip/tensorflow_extractor/__init__.py b/bob/ip/tensorflow_extractor/__init__.py index 3899dd2eb915bedb2ac434089f95d739ea96f276..d6fa7d0ceb5a5a6c47ee7f8d0a5528bf9cc793fa 100755 --- a/bob/ip/tensorflow_extractor/__init__.py +++ b/bob/ip/tensorflow_extractor/__init__.py @@ -37,7 +37,7 @@ from .Extractor import Extractor from .FaceNet import FaceNet from .DrGanMSU import DrGanMSUExtractor from .Vgg16 import VGGFace, vgg_16 -from .MTCNN import MTCNN +from .MTCNN import FaceDetector # gets sphinx autodoc done right - don't remove it @@ -61,7 +61,7 @@ __appropriate__( FaceNet, DrGanMSUExtractor, VGGFace, - MTCNN, + FaceDetector, ) # gets sphinx autodoc done right - don't remove it diff --git a/bob/ip/tensorflow_extractor/data/image_r10.hdf5 b/bob/ip/tensorflow_extractor/data/image_r10.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..9b2779770c9656f731e98f9024c5eb525985dc53 Binary files /dev/null and b/bob/ip/tensorflow_extractor/data/image_r10.hdf5 differ diff --git a/bob/ip/tensorflow_extractor/data/jeep.jpg b/bob/ip/tensorflow_extractor/data/jeep.jpg new file mode 100644 index 0000000000000000000000000000000000000000..734983ba9e620294dbbd1cfffe798aab96434e76 Binary files /dev/null and b/bob/ip/tensorflow_extractor/data/jeep.jpg differ diff --git a/bob/ip/tensorflow_extractor/data/multiple-faces.jpg b/bob/ip/tensorflow_extractor/data/multiple-faces.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e39305e1931e63dbb11926ae5b16d96c9a6b6e39 Binary files /dev/null and b/bob/ip/tensorflow_extractor/data/multiple-faces.jpg differ diff --git a/bob/ip/tensorflow_extractor/data/test_image.hdf5 b/bob/ip/tensorflow_extractor/data/test_image.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..b12025fd9a3b481b52c6c00b617b61ea5a5868d7 Binary files /dev/null and b/bob/ip/tensorflow_extractor/data/test_image.hdf5 differ diff --git a/bob/ip/tensorflow_extractor/test.py b/bob/ip/tensorflow_extractor/test.py index dff72a5ba1faa6372228f12bdcc6454a5a395aa0..305e119138b95e9a40ab69b576860b363e003ad6 100644 --- a/bob/ip/tensorflow_extractor/test.py +++ b/bob/ip/tensorflow_extractor/test.py @@ -9,11 +9,16 @@ import numpy import json import os +import numpy as np + numpy.random.seed(10) slim = tf.contrib.slim from . import scratch_network +detector = bob.ip.tensorflow_extractor.FaceDetector() +from bob.ip.facedetect import BoundingBox + def test_output(): @@ -74,9 +79,9 @@ def test_mtcnn(): test_image = datafile("mtcnn/test_image.png", __name__) ref_numbers = datafile("mtcnn/mtcnn.hdf5", __name__) ref_annots = datafile("mtcnn/mtcnn.json", __name__) - from bob.ip.tensorflow_extractor import MTCNN + from bob.ip.tensorflow_extractor import FaceDetector - mtcnn = MTCNN() + mtcnn = FaceDetector() img = bob.io.base.load(test_image) bbox, prob, landmarks = mtcnn.detect(img) with bob.io.base.HDF5File(ref_numbers, "r") as f: @@ -95,3 +100,68 @@ def test_mtcnn(): for k, v in a.items(): vref = aref[k] assert numpy.allclose(v, vref) + + +def test_face_detection(): + + ### Testing multiple detections + color_image = bob.io.base.load(pkg_resources.resource_filename('bob.ip.tensorflow_extractor', 'data/multiple-faces.jpg')) + faces, landmarks = detector.detect_all_faces(color_image) + assert len(faces) == 18 + assert len(landmarks) == 18 + + possible_landmarks = ['reye', 'leye', 'nose', 'mouthleft', 'mouthright'] + for p in possible_landmarks: + assert p in landmarks[0] + + ### Testing single detections + color_image = bob.io.base.load(pkg_resources.resource_filename('bob.ip.facedetect', 'data/testimage.jpg')) + faces, landmarks = detector.detect_single_face(color_image) + assert isinstance(faces, BoundingBox) + + ### Testing no detections + color_image = bob.io.base.load(pkg_resources.resource_filename('bob.ip.tensorflow_extractor', 'data/jeep.jpg')) + faces, landmarks = detector.detect_single_face(color_image) + assert faces is None + assert landmarks is None + + #========================================================================= + # assert negative coordinates and even arrays: + + image = np.zeros((3, 100, 100)) + result = detector.detect_single_face(image) + assert result == (None, None) + + image = np.ones((3, 100, 100)) + result = detector.detect_single_face(image) + assert result == (None, None) + + # test on the actual image: + test_file = pkg_resources.resource_filename('bob.ip.tensorflow_extractor', 'data/test_image.hdf5') + + f = bob.io.base.HDF5File(test_file) #read only + image = f.read('image') #reads integer + del f + + result = detector.detect_single_face(image) + print(result[0].topleft) + print(result[0].bottomright) + + #Investigate whether x and y are swapped + + # assert result[0].topleft == (0, 58) + # assert result[0].bottomright == (228, 232) + + +# (75, 0) +# (239, 225) + + + +def test_crop(): + + ### Testing multiple detections + color_image = bob.io.base.load(pkg_resources.resource_filename('bob.ip.facedetect', 'data/testimage.jpg')) + face = detector.detect_crop_align(color_image, final_image_size=(224, 224)) + assert face.shape == (3, 224, 224) + diff --git a/conda/meta.yaml b/conda/meta.yaml index f4d97e9f0407079f3652d5914c4b871f9a1c06a1..b8a81f22bf0c3474891f4425effc92333ac215d6 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -24,6 +24,7 @@ requirements: - bob.io.image - bob.db.mnist - bob.ip.color + - bob.ip.facedetect - six {{ six }} - tensorflow {{ tensorflow }} - numpy {{ numpy }} diff --git a/requirements.txt b/requirements.txt index f2c95a248699d5901e69443f861d1c9b3cb7a3c4..e6a3676995ad3af989790e9e7ea6dc5e03bf8024 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ bob.io.base bob.io.image bob.db.mnist bob.ip.color +bob.ip.facedetect numpy