Skip to content

Nightlies are failing because of this package

Check here

https://gitlab.idiap.ch/bob/nightlies/-/jobs/250661

and

https://gitlab.idiap.ch/bob/bob.pipelines/-/jobs/250818

This is blocking the development of the upper stack.

=================================== FAILURES ===================================
 ______________________ test_dataset_pipeline_with_dask_ml ______________________
     def test_dataset_pipeline_with_dask_ml():
     
         scaler = dask_ml.preprocessing.StandardScaler()
         pca = dask_ml.decomposition.PCA(n_components=3, random_state=0)
         clf = SGDClassifier(random_state=0, loss="log", penalty="l2", tol=1e-3)
         clf = dask_ml.wrappers.Incremental(clf, scoring="accuracy")
     
         iris_ds = _build_iris_dataset(shuffle=True)
     
         estimator = mario.xr.DatasetPipeline(
             [
                 dict(
                     estimator=scaler,
                     output_dims=[("feature", None)],
                     input_dask_array=True,
                 ),
                 dict(
                     estimator=pca,
                     output_dims=[("pca_features", 3)],
                     input_dask_array=True,
                 ),
                 dict(
                     estimator=clf,
                     fit_input=["data", "target"],
                     output_dims=[],
                     input_dask_array=True,
                     fit_kwargs=dict(classes=range(3)),
                 ),
             ]
         )
     
         with dask.config.set(scheduler="synchronous"):
 >           estimator = estimator.fit(iris_ds)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/bob/pipelines/tests/test_xarray.py:260: 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/bob/pipelines/xarray.py:551: in fit
     self._transform(ds, do_fit=True)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/bob/pipelines/xarray.py:510: in _transform
     block.estimator_ = _fit(*args, block=block)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/bob/pipelines/xarray.py:243: in _fit
     block.estimator.fit(*args, **block.fit_kwargs)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask_ml/wrappers.py:495: in fit
     self._fit_for_estimator(estimator, X, y, **fit_kwargs)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask_ml/wrappers.py:479: in _fit_for_estimator
     result = fit(
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask_ml/_partial.py:139: in fit
     return value.compute()
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/base.py:288: in compute
     (result,) = compute(self, traverse=False, **kwargs)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/base.py:571: in compute
     results = schedule(dsk, keys, **kwargs)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:553: in get_sync
     return get_async(
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:496: in get_async
     for key, res_info, failed in queue_get(queue).result():
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/concurrent/futures/_base.py:437: in result
     return self.__get_result()
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/concurrent/futures/_base.py:389: in __get_result
     raise self._exception
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:538: in submit
     fut.set_result(fn(*args, **kwargs))
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:234: in batch_execute_tasks
     return [execute_task(*a) for a in it]
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:234: in <listcomp>
     return [execute_task(*a) for a in it]
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:225: in execute_task
     result = pack_exception(e, dumps)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/local.py:220: in execute_task
     result = _execute_task(task, data)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask/core.py:119: in _execute_task
     return func(*(_execute_task(a, cache) for a in args))
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/dask_ml/_partial.py:17: in _partial_fit
     model.partial_fit(x, y, **kwargs)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/sklearn/linear_model/_stochastic_gradient.py:841: in partial_fit
     return self._partial_fit(
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/sklearn/linear_model/_stochastic_gradient.py:572: in _partial_fit
     X, y = self._validate_data(
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/sklearn/base.py:576: in _validate_data
     X, y = check_X_y(X, y, **check_params)
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/sklearn/utils/validation.py:956: in check_X_y
     X = check_array(
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
 array = ('pca.transform-98eb05bfe3c4e482e6896d5f42ca3d48', 1, 0)
 accept_sparse = 'csr'
     def check_array(
         array,
         accept_sparse=False,
         *,
         accept_large_sparse=True,
         dtype="numeric",
         order=None,
         copy=False,
         force_all_finite=True,
         ensure_2d=True,
         allow_nd=False,
         ensure_min_samples=1,
         ensure_min_features=1,
         estimator=None,
     ):
     
         """Input validation on an array, list, sparse matrix or similar.
     
         By default, the input is checked to be a non-empty 2D array containing
         only finite values. If the dtype of the array is object, attempt
         converting to float, raising on failure.
     
         Parameters
         ----------
         array : object
             Input object to check / convert.
     
         accept_sparse : str, bool or list/tuple of str, default=False
             String[s] representing allowed sparse matrix formats, such as 'csc',
             'csr', etc. If the input is sparse but not in the allowed format,
             it will be converted to the first listed format. True allows the input
             to be any format. False means that a sparse matrix input will
             raise an error.
     
         accept_large_sparse : bool, default=True
             If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by
             accept_sparse, accept_large_sparse=False will cause it to be accepted
             only if its indices are stored with a 32-bit dtype.
     
             .. versionadded:: 0.20
     
         dtype : 'numeric', type, list of type or None, default='numeric'
             Data type of result. If None, the dtype of the input is preserved.
             If "numeric", dtype is preserved unless array.dtype is object.
             If dtype is a list of types, conversion on the first type is only
             performed if the dtype of the input is not in the list.
     
         order : {'F', 'C'} or None, default=None
             Whether an array will be forced to be fortran or c-style.
             When order is None (default), then if copy=False, nothing is ensured
             about the memory layout of the output array; otherwise (copy=True)
             the memory layout of the returned array is kept as close as possible
             to the original array.
     
         copy : bool, default=False
             Whether a forced copy will be triggered. If copy=False, a copy might
             be triggered by a conversion.
     
         force_all_finite : bool or 'allow-nan', default=True
             Whether to raise an error on np.inf, np.nan, pd.NA in array. The
             possibilities are:
     
             - True: Force all values of array to be finite.
             - False: accepts np.inf, np.nan, pd.NA in array.
             - 'allow-nan': accepts only np.nan and pd.NA values in array. Values
               cannot be infinite.
     
             .. versionadded:: 0.20
                ``force_all_finite`` accepts the string ``'allow-nan'``.
     
             .. versionchanged:: 0.23
                Accepts `pd.NA` and converts it into `np.nan`
     
         ensure_2d : bool, default=True
             Whether to raise a value error if array is not 2D.
     
         allow_nd : bool, default=False
             Whether to allow array.ndim > 2.
     
         ensure_min_samples : int, default=1
             Make sure that the array has a minimum number of samples in its first
             axis (rows for a 2D array). Setting to 0 disables this check.
     
         ensure_min_features : int, default=1
             Make sure that the 2D array has some minimum number of features
             (columns). The default value of 1 rejects empty datasets.
             This check is only enforced when the input data has effectively 2
             dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
             disables this check.
     
         estimator : str or estimator instance, default=None
             If passed, include the name of the estimator in warning messages.
     
         Returns
         -------
         array_converted : object
             The converted and validated array.
         """
         if isinstance(array, np.matrix):
             warnings.warn(
                 "np.matrix usage is deprecated in 1.0 and will raise a TypeError "
                 "in 1.2. Please convert to a numpy array with np.asarray. For "
                 "more information see: "
                 "https://numpy.org/doc/stable/reference/generated/numpy.matrix.html",  # noqa
                 FutureWarning,
             )
     
         # store reference to original array to check if copy is needed when
         # function returns
         array_orig = array
     
         # store whether originally we wanted numeric dtype
         dtype_numeric = isinstance(dtype, str) and dtype == "numeric"
     
         dtype_orig = getattr(array, "dtype", None)
         if not hasattr(dtype_orig, "kind"):
             # not a data type (e.g. a column named dtype in a pandas DataFrame)
             dtype_orig = None
     
         # check if the object contains several dtypes (typically a pandas
         # DataFrame), and store them. If not, store None.
         dtypes_orig = None
         has_pd_integer_array = False
         if hasattr(array, "dtypes") and hasattr(array.dtypes, "__array__"):
             # throw warning if columns are sparse. If all columns are sparse, then
             # array.sparse exists and sparsity will be preserved (later).
             with suppress(ImportError):
                 from pandas.api.types import is_sparse
     
                 if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
                     warnings.warn(
                         "pandas.DataFrame with sparse columns found."
                         "It will be converted to a dense numpy array."
                     )
     
             dtypes_orig = list(array.dtypes)
             # pandas boolean dtype __array__ interface coerces bools to objects
             for i, dtype_iter in enumerate(dtypes_orig):
                 if dtype_iter.kind == "b":
                     dtypes_orig[i] = np.dtype(object)
                 elif dtype_iter.name.startswith(("Int", "UInt")):
                     # name looks like an Integer Extension Array, now check for
                     # the dtype
                     with suppress(ImportError):
                         from pandas import (
                             Int8Dtype,
                             Int16Dtype,
                             Int32Dtype,
                             Int64Dtype,
                             UInt8Dtype,
                             UInt16Dtype,
                             UInt32Dtype,
                             UInt64Dtype,
                         )
     
                         if isinstance(
                             dtype_iter,
                             (
                                 Int8Dtype,
                                 Int16Dtype,
                                 Int32Dtype,
                                 Int64Dtype,
                                 UInt8Dtype,
                                 UInt16Dtype,
                                 UInt32Dtype,
                                 UInt64Dtype,
                             ),
                         ):
                             has_pd_integer_array = True
     
             if all(isinstance(dtype, np.dtype) for dtype in dtypes_orig):
                 dtype_orig = np.result_type(*dtypes_orig)
     
         if dtype_numeric:
             if dtype_orig is not None and dtype_orig.kind == "O":
                 # if input is object, convert to float.
                 dtype = np.float64
             else:
                 dtype = None
     
         if isinstance(dtype, (list, tuple)):
             if dtype_orig is not None and dtype_orig in dtype:
                 # no dtype conversion required
                 dtype = None
             else:
                 # dtype conversion required. Let's select the first element of the
                 # list of accepted types.
                 dtype = dtype[0]
     
         if has_pd_integer_array:
             # If there are any pandas integer extension arrays,
             array = array.astype(dtype)
     
         if force_all_finite not in (True, False, "allow-nan"):
             raise ValueError(
                 'force_all_finite should be a bool or "allow-nan". Got {!r} instead'.format(
                     force_all_finite
                 )
             )
     
         if estimator is not None:
             if isinstance(estimator, str):
                 estimator_name = estimator
             else:
                 estimator_name = estimator.__class__.__name__
         else:
             estimator_name = "Estimator"
         context = " by %s" % estimator_name if estimator is not None else ""
     
         # When all dataframe columns are sparse, convert to a sparse array
         if hasattr(array, "sparse") and array.ndim > 1:
             # DataFrame.sparse only supports `to_coo`
             array = array.sparse.to_coo()
             if array.dtype == np.dtype("object"):
                 unique_dtypes = set([dt.subtype.name for dt in array_orig.dtypes])
                 if len(unique_dtypes) > 1:
                     raise ValueError(
                         "Pandas DataFrame with mixed sparse extension arrays "
                         "generated a sparse matrix with object dtype which "
                         "can not be converted to a scipy sparse matrix."
                         "Sparse extension arrays should all have the same "
                         "numeric type."
                     )
     
         if sp.issparse(array):
             _ensure_no_complex_data(array)
             array = _ensure_sparse_format(
                 array,
                 accept_sparse=accept_sparse,
                 dtype=dtype,
                 copy=copy,
                 force_all_finite=force_all_finite,
                 accept_large_sparse=accept_large_sparse,
             )
         else:
             # If np.array(..) gives ComplexWarning, then we convert the warning
             # to an error. This is needed because specifying a non complex
             # dtype to the function converts complex to real dtype,
             # thereby passing the test made in the lines following the scope
             # of warnings context manager.
             with warnings.catch_warnings():
                 try:
                     warnings.simplefilter("error", ComplexWarning)
                     if dtype is not None and np.dtype(dtype).kind in "iu":
                         # Conversion float -> int should not contain NaN or
                         # inf (numpy#14412). We cannot use casting='safe' because
                         # then conversion float -> int would be disallowed.
                         array = np.asarray(array, order=order)
                         if array.dtype.kind == "f":
                             _assert_all_finite(array, allow_nan=False, msg_dtype=dtype)
                         array = array.astype(dtype, casting="unsafe", copy=False)
                     else:
 >                       array = np.asarray(array, order=order, dtype=dtype)
 E                       ValueError: could not convert string to float: 'pca.transform-98eb05bfe3c4e482e6896d5f42ca3d48'
 ../_test_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_place/lib/python3.8/site-packages/sklearn/utils/validation.py:738: ValueError
Edited by Tiago de Freitas Pereira