From 8129781dc508ae99578b59c67eb68100573c5072 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Mon, 23 Oct 2017 09:53:06 +0200
Subject: [PATCH] Created a dummy db that dump some fake missing files

Removed dummy-missingfiles database

Created a mechanism to return None at certain probability
---
 bob/bio/base/test/dummy/preprocessor.py | 8 ++++++--
 bob/bio/base/tools/preprocessor.py      | 9 +++++++++
 setup.py                                | 2 +-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/bob/bio/base/test/dummy/preprocessor.py b/bob/bio/base/test/dummy/preprocessor.py
index 7ccfcdcb..3440e953 100644
--- a/bob/bio/base/test/dummy/preprocessor.py
+++ b/bob/bio/base/test/dummy/preprocessor.py
@@ -1,15 +1,19 @@
 from bob.bio.base.preprocessor import Preprocessor
+import numpy
+numpy.random.seed(10)
 
 
 class DummyPreprocessor (Preprocessor):
-  def __init__(self, return_none=False, **kwargs):
+  def __init__(self, return_none=False, probability_of_none=1,  **kwargs):
     Preprocessor.__init__(self)
     self.return_none = return_none
+    self.probability_of_none = probability_of_none
 
   def __call__(self, data, annotation):
     """Does nothing, simply converts the data type of the data, ignoring any annotation."""
     if self.return_none:
-      return None
+      return numpy.random.choice([None, data], p=[self.probability_of_none, 1-self.probability_of_none])
+      
     return data
 
 
diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py
index 045d1e71..92f8c387 100644
--- a/bob/bio/base/tools/preprocessor.py
+++ b/bob/bio/base/tools/preprocessor.py
@@ -68,6 +68,15 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files
     if not utils.check_file(preprocessed_data_file, force,
                             preprocessor.min_preprocessed_file_size):
       logger.debug("... Processing original data file '%s'", file_name)
+
+      # Maybe we have missing file in the databse
+      if not os.path.exists(file_name):
+        if allow_missing_files:
+          logger.debug("... Original data file is missing '%s' and will be skipped", file_name)
+          continue
+        else:
+          raise RuntimeError("Original data file is missing '%s' " % file_name)
+      
       data = preprocessor.read_original_data(file_object, original_directory, original_extension)
       # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere)
       bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file))
diff --git a/setup.py b/setup.py
index f1a05cce..4d92b64f 100644
--- a/setup.py
+++ b/setup.py
@@ -89,7 +89,7 @@ setup(
       ],
 
       'bob.bio.database': [
-        'dummy             = bob.bio.base.test.dummy.database:database', # for test purposes only
+        'dummy              = bob.bio.base.test.dummy.database:database', # for test purposes only
       ],
 
       'bob.bio.preprocessor': [
-- 
GitLab