Skip to content
Snippets Groups Projects
Verified Commit dbe31f76 authored by Yannick DAYER's avatar Yannick DAYER
Browse files

fix(kmeans): initialization depends on the chunks.

Rechunk the data array to keep the same results of k_init.
parent 8d701056
No related branches found
No related tags found
No related merge requests found
...@@ -307,11 +307,11 @@ class KMeansMachine(BaseEstimator): ...@@ -307,11 +307,11 @@ class KMeansMachine(BaseEstimator):
logger.debug(f"Initializing k-means means with '{self.init_method}'.") logger.debug(f"Initializing k-means means with '{self.init_method}'.")
# k_init requires da.Array as input. # k_init requires da.Array as input.
logger.debug("Transform k-means data to dask array") logger.debug("Transform k-means data to dask array")
data = da.array(data) init_data = da.array(data)
data.rechunk(1, data.shape[-1]) # Prevents issue with large arrays. init_data = init_data.rechunk({0: data.shape[0], -1: data.shape[-1]})
logger.debug("Get k-means centroids") logger.debug("Get k-means centroids")
self.centroids_ = k_init( self.centroids_ = k_init(
X=data, X=init_data,
n_clusters=self.n_clusters, n_clusters=self.n_clusters,
init=self.init_method, init=self.init_method,
random_state=self.random_state, random_state=self.random_state,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment