diff --git a/bob/learn/em/data/gmm_MAP.hdf5 b/bob/learn/em/data/gmm_MAP.hdf5 index 0f57a2bde913bba76c2d9ab5a07cf81b07346d0f..8106590f1721a6c6084c63bc9637b52a5b39b24e 100644 Binary files a/bob/learn/em/data/gmm_MAP.hdf5 and b/bob/learn/em/data/gmm_MAP.hdf5 differ diff --git a/bob/learn/em/data/gmm_ML.hdf5 b/bob/learn/em/data/gmm_ML.hdf5 index 20362881b1661a826a8773d1658a8df559099d46..5e667e2498f69cf59303fc30c675c9f6f7b1fcc3 100644 Binary files a/bob/learn/em/data/gmm_ML.hdf5 and b/bob/learn/em/data/gmm_ML.hdf5 differ diff --git a/bob/learn/em/mixture/gmm.py b/bob/learn/em/mixture/gmm.py index 0be5a1772bc19184cd8534312f8901192f96c575..63171cf48c610a86e0cfaf73aecb0b2468c337a5 100644 --- a/bob/learn/em/mixture/gmm.py +++ b/bob/learn/em/mixture/gmm.py @@ -80,42 +80,42 @@ class GMMStats: if isinstance(hdf5, str): hdf5 = HDF5File(hdf5, "r") try: - version_major, version_minor = hdf5.get("meta_file_version")[()].split(".") + version_major, version_minor = hdf5.attrs["file_version"].split(".") logger.debug( f"Reading a GMMStats HDF5 file of version {version_major}.{version_minor}" ) - except TypeError: + except (KeyError, RuntimeError): version_major, version_minor = 0, 0 if int(version_major) >= 1: - if hdf5["meta_writer_class"][()] != str(cls): - logger.warning(f"{hdf5['meta_writer_class'][()]} is not {cls}.") + if hdf5.attrs["writer_class"] != str(cls): + logger.warning(f"{hdf5.attrs['writer_class']} is not {cls}.") self = cls( n_gaussians=hdf5["n_gaussians"][()], n_features=hdf5["n_features"][()] ) self.log_likelihood = hdf5["log_likelihood"][()] self.t = hdf5["T"][()] - self.n = hdf5["n"][()] - self.sum_px = hdf5["sumPx"][()] - self.sum_pxx = hdf5["sumPxx"][()] + self.n = hdf5["n"][...] + self.sum_px = hdf5["sumPx"][...] + self.sum_pxx = hdf5["sumPxx"][...] else: # Legacy file version logger.info("Loading a legacy HDF5 stats file.") self = cls( n_gaussians=int(hdf5["n_gaussians"][()]), n_features=int(hdf5["n_inputs"][()]), ) - self.log_likelihood = float(hdf5["log_liklihood"][()]) + self.log_likelihood = hdf5["log_liklihood"][()] self.t = int(hdf5["T"][()]) - self.n = hdf5["n"][()].reshape((self.n_gaussians,)) - self.sum_px = hdf5["sumPx"][()].reshape(self.shape) - self.sum_pxx = hdf5["sumPxx"][()].reshape(self.shape) + self.n = np.reshape(hdf5["n"], (self.n_gaussians,)) + self.sum_px = np.reshape(hdf5["sumPx"], (self.shape)) + self.sum_pxx = np.reshape(hdf5["sumPxx"], (self.shape)) return self def save(self, hdf5): """Saves the current statistsics in an `HDF5File` object.""" if isinstance(hdf5, str): hdf5 = HDF5File(hdf5, "w") - hdf5["meta_file_version"] = "1.0" - hdf5["meta_writer_class"] = str(self.__class__) + hdf5.attrs["file_version"] = "1.0" + hdf5.attrs["writer_class"] = str(self.__class__) hdf5["n_gaussians"] = self.n_gaussians hdf5["n_features"] = self.n_features hdf5["log_likelihood"] = float(self.log_likelihood) @@ -438,16 +438,16 @@ class GMMMachine(BaseEstimator): if isinstance(hdf5, str): hdf5 = HDF5File(hdf5, "r") try: - version_major, version_minor = hdf5.get("meta_file_version")[()].split(".") + version_major, version_minor = hdf5.attrs["file_version"].split(".") logger.debug( f"Reading a GMMMachine HDF5 file of version {version_major}.{version_minor}" ) - except (TypeError, RuntimeError): + except (KeyError, RuntimeError): version_major, version_minor = 0, 0 if int(version_major) >= 1: - if hdf5["meta_writer_class"][()] != str(cls): - logger.warning(f"{hdf5['meta_writer_class'][()]} is not {cls}.") - if hdf5["trainer"][()] == "map" and ubm is None: + if hdf5.attrs["writer_class"] != str(cls): + logger.warning(f"{hdf5.attrs['writer_class']} is not {cls}.") + if hdf5["trainer"] == "map" and ubm is None: raise ValueError("The UBM is needed when loading a MAP machine.") self = cls( n_gaussians=hdf5["n_gaussians"][()], @@ -455,30 +455,30 @@ class GMMMachine(BaseEstimator): ubm=ubm, convergence_threshold=1e-5, max_fitting_steps=hdf5["max_fitting_steps"][()], - weights=hdf5["weights"][()], + weights=hdf5["weights"][...], k_means_trainer=None, update_means=hdf5["update_means"][()], update_variances=hdf5["update_variances"][()], update_weights=hdf5["update_weights"][()], ) gaussians_group = hdf5["gaussians"] - self.means = gaussians_group["means"][()] - self.variances = gaussians_group["variances"][()] - self.variance_thresholds = gaussians_group["variance_thresholds"][()] + self.means = gaussians_group["means"][...] + self.variances = gaussians_group["variances"][...] + self.variance_thresholds = gaussians_group["variance_thresholds"][...] else: # Legacy file version logger.info("Loading a legacy HDF5 machine file.") - n_gaussians = int(hdf5["m_n_gaussians"][()]) + n_gaussians = hdf5["m_n_gaussians"][()] g_means = [] g_variances = [] g_variance_thresholds = [] for i in range(n_gaussians): gaussian_group = hdf5[f"m_gaussians{i}"] - g_means.append(gaussian_group["m_mean"][()]) - g_variances.append(gaussian_group["m_variance"][()]) + g_means.append(gaussian_group["m_mean"][...]) + g_variances.append(gaussian_group["m_variance"][...]) g_variance_thresholds.append( - gaussian_group["m_variance_thresholds"][()] + gaussian_group["m_variance_thresholds"][...] ) - weights = hdf5["m_weights"][()].reshape(n_gaussians) + weights = np.reshape(hdf5["m_weights"], (n_gaussians,)) self = cls(n_gaussians=n_gaussians, ubm=ubm, weights=weights) self.means = np.array(g_means).reshape(n_gaussians, -1) self.variances = np.array(g_variances).reshape(n_gaussians, -1) @@ -491,8 +491,8 @@ class GMMMachine(BaseEstimator): """Saves the current statistics in an `HDF5File` object.""" if isinstance(hdf5, str): hdf5 = HDF5File(hdf5, "w") - hdf5["meta_file_version"] = "1.0" - hdf5["meta_writer_class"] = str(self.__class__) + hdf5.attrs["file_version"] = "1.0" + hdf5.attrs["writer_class"] = str(self.__class__) hdf5["n_gaussians"] = self.n_gaussians hdf5["trainer"] = self.trainer hdf5["convergence_threshold"] = self.convergence_threshold diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py index 785b0ae760049013d889a3e8723ba099b73e35d4..e99e1601fb5cc0a9e1ef3b89b6ce1c3c0bcd5d3a 100644 --- a/bob/learn/em/test/test_gmm.py +++ b/bob/learn/em/test/test_gmm.py @@ -55,6 +55,10 @@ def test_GMMStats(): assert (gs != gs_loaded ) is False assert gs.is_similar_to(gs_loaded) + assert type(gs_loaded.n_gaussians) is np.int64 + assert type(gs_loaded.n_features) is np.int64 + assert type(gs_loaded.log_likelihood) is np.float64 + # Saves and load from file using `load` filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=HDF5File(filename, "w")) @@ -183,14 +187,22 @@ def test_GMMMachine(): assert gmm.is_similar_to(gmm6) is False # Saving and loading - filename = tempfile.mkstemp(suffix=".hdf5")[1] - gmm.save(HDF5File(filename, "w")) - gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r")) - assert gmm == gmm1 - gmm.save(filename) - gmm1 = GMMMachine.from_hdf5(filename) - assert gmm == gmm1 - os.unlink(filename) + with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: + filename = f.name + gmm.save(HDF5File(filename, "w")) + gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r")) + assert type(gmm1.n_gaussians) is np.int64 + assert type(gmm1.update_means) is np.bool_ + assert type(gmm1.update_variances) is np.bool_ + assert type(gmm1.update_weights) is np.bool_ + assert type(gmm1.trainer) is str + assert gmm1.ubm is None + assert gmm == gmm1 + with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: + filename = f.name + gmm.save(filename) + gmm1 = GMMMachine.from_hdf5(filename) + assert gmm == gmm1 # Weights n_gaussians = 5 diff --git a/conda/meta.yaml b/conda/meta.yaml index 37bcc615cf5ef6b85465592622c7152a62b01dbc..8ba3b142709c9ef386eddd185f79ca6442567161 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -35,6 +35,7 @@ requirements: - dask {{ dask }} - dask-ml {{ dask_ml }} - h5py {{ h5py }} + - h5py >=3 run: - python - setuptools diff --git a/requirements.txt b/requirements.txt index 8a86a69f50e30281292a5f1c6a71ecd22fb71f01..0a01048ef7e4c6adf624b636f594fd27df5ac9d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ bob.learn.activation bob.learn.linear dask dask-ml -h5py +h5py >= 3