From 1e0796e6d0c33db42d12fa94ef7311abc37c52d3 Mon Sep 17 00:00:00 2001
From: Amir MOHAMMADI <amir.mohammadi@idiap.ch>
Date: Tue, 13 Jun 2017 11:07:35 +0200
Subject: [PATCH] return dtype, shape as a tuple

---
 bob/bio/base/utils/io.py | 75 +++++-----------------------------------
 1 file changed, 9 insertions(+), 66 deletions(-)

diff --git a/bob/bio/base/utils/io.py b/bob/bio/base/utils/io.py
index e7fefe8e..b581ec45 100644
--- a/bob/bio/base/utils/io.py
+++ b/bob/bio/base/utils/io.py
@@ -5,7 +5,7 @@ import collections  # this is needed for the sphinx documentation
 import functools  # this is needed for the sphinx documentation
 import numpy
 import logging
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("bob.bio.base")
 
 from .. import database
 import bob.io.base
@@ -97,8 +97,7 @@ def save(data, file, compression=0):
   """Saves the data to file using HDF5. The given file might be an HDF5 file open for writing, or a string.
   If the given data contains a ``save`` method, this method is called with the given HDF5 file.
   Otherwise the data is written to the HDF5 file using the given compression."""
-  f = file if isinstance(
-      file, bob.io.base.HDF5File) else bob.io.base.HDF5File(file, 'w')
+  f = file if isinstance(file, bob.io.base.HDF5File) else bob.io.base.HDF5File(file, 'w')
   if hasattr(data, 'save'):
     data.save(f)
   else:
@@ -190,64 +189,9 @@ def _generate_features(reader, paths):
   Yields
   ------
   object
-      The first object returned is the :py:class:`numpy.dtype` of features. The
-      second objects returned is the shape of the first feature. The rest of
-      objects are the actual values in features. The features are returned in
-      C order.
-
-  Examples
-  --------
-  This function can be used to with :py:func:`numpy.fromiter`:
-
-  >>> def reader(path):
-  ...     # in each file, there are 5 samples and features are 2 dimensional.
-  ...     return numpy.arange(10).reshape(5,2)
-  >>> paths = ['path1', 'path2']
-  >>> iterator = _generate_features(reader, paths)
-  >>> dtype = next(iterator)
-  >>> dtype
-  dtype('int64')
-  >>> first_feature_shape = next(iterator)
-  >>> first_feature_shape
-  (5, 2)
-  >>> all_features_flat = numpy.fromiter(iterator, dtype)
-  >>> all_features_flat
-  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-  >>> all_features = all_features_flat.reshape(-1, first_feature_shape[1])
-  >>> all_features
-  array([[0, 1],
-         [2, 3],
-         [4, 5],
-         [6, 7],
-         [8, 9],
-         [0, 1],
-         [2, 3],
-         [4, 5],
-         [6, 7],
-         [8, 9]])
-  >>> all_features_with_more_memory = numpy.vstack([reader(p) for p in paths])
-  >>> assert numpy.allclose(all_features == all_features_with_more_memory)
-
-  You can allocate the array at once to improve the performance if you know
-  that all features in paths have the same shape and you know the total number
-  of the paths:
-  >>> iterator = _generate_features(reader, paths)
-  >>> dtype = next(iterator)
-  >>> first_feature_shape = next(iterator)
-  >>> total_size = len(paths) * numpy.prod(first_feature_shape)
-  >>> all_features_flat = numpy.fromiter(iterator, dtype, total_size)
-  >>> all_features = all_features_flat.reshape(-1, first_feature_shape[1])
-  >>> all_features
-  array([[0, 1],
-         [2, 3],
-         [4, 5],
-         [6, 7],
-         [8, 9],
-         [0, 1],
-         [2, 3],
-         [4, 5],
-         [6, 7],
-         [8, 9]])
+      The first object returned is a tuple of :py:class:`numpy.dtype` of
+      features and the shape of the first feature. The rest of objects are
+      the actual values in features. The features are returned in C order.
   """
   for i, path in enumerate(paths):
     feature = numpy.atleast_2d(reader(path))
@@ -255,11 +199,11 @@ def _generate_features(reader, paths):
     if i == 0:
       dtype = feature.dtype
       shape = list(feature.shape)
-      yield dtype
-      yield shape
+      yield (dtype, shape)
     else:
-      # make sure all features have the same shape[1:]
+      # make sure all features have the same shape[1:] and dtype
       assert shape[1:] == list(feature.shape[1:])
+      assert dtype == feature.dtype
 
     for value in feature.flat:
       yield value
@@ -334,8 +278,7 @@ def vstack_features(reader, paths, same_size=False):
          [8, 9]])
   """
   iterable = _generate_features(reader, paths)
-  dtype = next(iterable)
-  shape = next(iterable)
+  dtype, shape = next(iterable)
   if same_size:
     total_size = int(len(paths) * numpy.prod(shape))
     all_features = numpy.fromiter(iterable, dtype, total_size)
-- 
GitLab