From 20eedc46418470c42ff2069c1da98df0176bda0c Mon Sep 17 00:00:00 2001
From: Yannick DAYER <yannick.dayer@idiap.ch>
Date: Thu, 10 Nov 2022 12:57:04 +0100
Subject: [PATCH] [fix] Specify the chunk size of arrays for k_init.

The "auto" chunk size somehow breaks k_init for large numpy arrays.
---
 src/bob/learn/em/kmeans.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/bob/learn/em/kmeans.py b/src/bob/learn/em/kmeans.py
index 8f968eb..bc2664f 100644
--- a/src/bob/learn/em/kmeans.py
+++ b/src/bob/learn/em/kmeans.py
@@ -308,6 +308,7 @@ class KMeansMachine(BaseEstimator):
         # k_init requires da.Array as input.
         logger.debug("Transform k-means data to dask array")
         data = da.array(data)
+        data.rechunk(1, data.shape[-1])  # Prevents issue with large arrays.
         logger.debug("Get k-means centroids")
         self.centroids_ = k_init(
             X=data,
-- 
GitLab