library packages

2024-09-28 22:56:00 -07:00
parent 64d9b78b3a
commit 1973934e95
4893 changed files with 1184173 additions and 31 deletions
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/init.py
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_arpack.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_arpack.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_array_api.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_array_api.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_arrayfuncs.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_arrayfuncs.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_bunch.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_bunch.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_chunking.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_chunking.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_class_weight.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_class_weight.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_cython_blas.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_cython_blas.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_cython_templating.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_cython_templating.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_deprecation.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_deprecation.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_encode.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_encode.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_estimator_checks.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_estimator_checks.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_estimator_html_repr.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_estimator_html_repr.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_extmath.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_extmath.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_fast_dict.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_fast_dict.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_fixes.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_fixes.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_graph.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_graph.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_indexing.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_indexing.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_mask.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_mask.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_metaestimators.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_metaestimators.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_missing.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_missing.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_mocking.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_mocking.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_multiclass.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_multiclass.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_murmurhash.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_murmurhash.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_optimize.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_optimize.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_parallel.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_parallel.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_param_validation.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_param_validation.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_plotting.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_plotting.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_pprint.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_pprint.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_random.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_random.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_response.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_response.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_seq_dataset.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_seq_dataset.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_set_output.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_set_output.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_shortest_path.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_shortest_path.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_show_versions.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_show_versions.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_sparsefuncs.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_sparsefuncs.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_stats.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_stats.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_tags.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_tags.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_testing.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_testing.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_typedefs.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_typedefs.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_user_interface.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_user_interface.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_utils.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_utils.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_validation.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_validation.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_weight_vector.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/pycache/test_weight_vector.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_arpack.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_arpack.py
@@ -0,0 +1,16 @@
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.utils import check_random_state
+from sklearn.utils._arpack import _init_arpack_v0
+
+
+@pytest.mark.parametrize("seed", range(100))
+def test_init_arpack_v0(seed):
+    # check that the initialization a sampling from an uniform distribution
+    # where we can fix the random state
+    size = 1000
+    v0 = _init_arpack_v0(size, seed)
+
+    rng = check_random_state(seed)
+    assert_allclose(v0, rng.uniform(-1, 1, size=size))
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_array_api.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_array_api.py
@@ -0,0 +1,580 @@
+import re
+from functools import partial
+
+import numpy
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn._config import config_context
+from sklearn.base import BaseEstimator
+from sklearn.utils._array_api import (
+    _ArrayAPIWrapper,
+    _asarray_with_order,
+    _atol_for_type,
+    _average,
+    _convert_to_numpy,
+    _count_nonzero,
+    _estimator_with_converted_arrays,
+    _is_numpy_namespace,
+    _nanmax,
+    _nanmin,
+    _NumPyAPIWrapper,
+    _ravel,
+    device,
+    get_namespace,
+    get_namespace_and_device,
+    indexing_dtype,
+    supported_float_dtypes,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    skip_if_array_api_compat_not_configured,
+)
+from sklearn.utils.fixes import _IS_32BIT, CSR_CONTAINERS, np_version, parse_version
+
+
+@pytest.mark.parametrize("X", [numpy.asarray([1, 2, 3]), [1, 2, 3]])
+def test_get_namespace_ndarray_default(X):
+    """Check that get_namespace returns NumPy wrapper"""
+    xp_out, is_array_api_compliant = get_namespace(X)
+    assert isinstance(xp_out, _NumPyAPIWrapper)
+    assert not is_array_api_compliant
+
+
+def test_get_namespace_ndarray_creation_device():
+    """Check expected behavior with device and creation functions."""
+    X = numpy.asarray([1, 2, 3])
+    xp_out, _ = get_namespace(X)
+
+    full_array = xp_out.full(10, fill_value=2.0, device="cpu")
+    assert_allclose(full_array, [2.0] * 10)
+
+    with pytest.raises(ValueError, match="Unsupported device"):
+        xp_out.zeros(10, device="cuda")
+
+
+@skip_if_array_api_compat_not_configured
+def test_get_namespace_ndarray_with_dispatch():
+    """Test get_namespace on NumPy ndarrays."""
+    array_api_compat = pytest.importorskip("array_api_compat")
+
+    X_np = numpy.asarray([[1, 2, 3]])
+
+    with config_context(array_api_dispatch=True):
+        xp_out, is_array_api_compliant = get_namespace(X_np)
+        assert is_array_api_compliant
+        if np_version >= parse_version("2.0.0"):
+            # NumPy 2.0+ is an array API compliant library.
+            assert xp_out is numpy
+        else:
+            # Older NumPy versions require the compatibility layer.
+            assert xp_out is array_api_compat.numpy
+
+
+@skip_if_array_api_compat_not_configured
+def test_get_namespace_array_api():
+    """Test get_namespace for ArrayAPI arrays."""
+    xp = pytest.importorskip("array_api_strict")
+
+    X_np = numpy.asarray([[1, 2, 3]])
+    X_xp = xp.asarray(X_np)
+    with config_context(array_api_dispatch=True):
+        xp_out, is_array_api_compliant = get_namespace(X_xp)
+        assert is_array_api_compliant
+
+        with pytest.raises(TypeError):
+            xp_out, is_array_api_compliant = get_namespace(X_xp, X_np)
+
+
+class _AdjustableNameAPITestWrapper(_ArrayAPIWrapper):
+    """API wrapper that has an adjustable name. Used for testing."""
+
+    def __init__(self, array_namespace, name):
+        super().__init__(array_namespace=array_namespace)
+        self.__name__ = name
+
+
+def test_array_api_wrapper_astype():
+    """Test _ArrayAPIWrapper for ArrayAPIs that is not NumPy."""
+    array_api_strict = pytest.importorskip("array_api_strict")
+    xp_ = _AdjustableNameAPITestWrapper(array_api_strict, "array_api_strict")
+    xp = _ArrayAPIWrapper(xp_)
+
+    X = xp.asarray(([[1, 2, 3], [3, 4, 5]]), dtype=xp.float64)
+    X_converted = xp.astype(X, xp.float32)
+    assert X_converted.dtype == xp.float32
+
+    X_converted = xp.asarray(X, dtype=xp.float32)
+    assert X_converted.dtype == xp.float32
+
+
+@pytest.mark.parametrize("array_api", ["numpy", "array_api_strict"])
+def test_asarray_with_order(array_api):
+    """Test _asarray_with_order passes along order for NumPy arrays."""
+    xp = pytest.importorskip(array_api)
+
+    X = xp.asarray([1.2, 3.4, 5.1])
+    X_new = _asarray_with_order(X, order="F", xp=xp)
+
+    X_new_np = numpy.asarray(X_new)
+    assert X_new_np.flags["F_CONTIGUOUS"]
+
+
+def test_asarray_with_order_ignored():
+    """Test _asarray_with_order ignores order for Generic ArrayAPI."""
+    xp = pytest.importorskip("array_api_strict")
+    xp_ = _AdjustableNameAPITestWrapper(xp, "array_api_strict")
+
+    X = numpy.asarray([[1.2, 3.4, 5.1], [3.4, 5.5, 1.2]], order="C")
+    X = xp_.asarray(X)
+
+    X_new = _asarray_with_order(X, order="F", xp=xp_)
+
+    X_new_np = numpy.asarray(X_new)
+    assert X_new_np.flags["C_CONTIGUOUS"]
+    assert not X_new_np.flags["F_CONTIGUOUS"]
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize(
+    "weights, axis, normalize, expected",
+    [
+        # normalize = True
+        (None, None, True, 3.5),
+        (None, 0, True, [2.5, 3.5, 4.5]),
+        (None, 1, True, [2, 5]),
+        ([True, False], 0, True, [1, 2, 3]),  # boolean weights
+        ([True, True, False], 1, True, [1.5, 4.5]),  # boolean weights
+        ([0.4, 0.1], 0, True, [1.6, 2.6, 3.6]),
+        ([0.4, 0.2, 0.2], 1, True, [1.75, 4.75]),
+        ([1, 2], 0, True, [3, 4, 5]),
+        ([1, 1, 2], 1, True, [2.25, 5.25]),
+        ([[1, 2, 3], [1, 2, 3]], 0, True, [2.5, 3.5, 4.5]),
+        ([[1, 2, 1], [2, 2, 2]], 1, True, [2, 5]),
+        # normalize = False
+        (None, None, False, 21),
+        (None, 0, False, [5, 7, 9]),
+        (None, 1, False, [6, 15]),
+        ([True, False], 0, False, [1, 2, 3]),  # boolean weights
+        ([True, True, False], 1, False, [3, 9]),  # boolean weights
+        ([0.4, 0.1], 0, False, [0.8, 1.3, 1.8]),
+        ([0.4, 0.2, 0.2], 1, False, [1.4, 3.8]),
+        ([1, 2], 0, False, [9, 12, 15]),
+        ([1, 1, 2], 1, False, [9, 21]),
+        ([[1, 2, 3], [1, 2, 3]], 0, False, [5, 14, 27]),
+        ([[1, 2, 1], [2, 2, 2]], 1, False, [8, 30]),
+    ],
+)
+def test_average(
+    array_namespace, device_, dtype_name, weights, axis, normalize, expected
+):
+    xp = _array_api_for_tests(array_namespace, device_)
+    array_in = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype_name)
+    array_in = xp.asarray(array_in, device=device_)
+    if weights is not None:
+        weights = numpy.asarray(weights, dtype=dtype_name)
+        weights = xp.asarray(weights, device=device_)
+
+    with config_context(array_api_dispatch=True):
+        result = _average(array_in, axis=axis, weights=weights, normalize=normalize)
+
+    if np_version < parse_version("2.0.0") or np_version >= parse_version("2.1.0"):
+        # NumPy 2.0 has a problem with the device attribute of scalar arrays:
+        # https://github.com/numpy/numpy/issues/26850
+        assert device(array_in) == device(result)
+
+    result = _convert_to_numpy(result, xp)
+    assert_allclose(result, expected, atol=_atol_for_type(dtype_name))
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=False),
+)
+def test_average_raises_with_wrong_dtype(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    array_in = numpy.asarray([2, 0], dtype=dtype_name) + 1j * numpy.asarray(
+        [4, 3], dtype=dtype_name
+    )
+    complex_type_name = array_in.dtype.name
+    if not hasattr(xp, complex_type_name):
+        # This is the case for cupy as of March 2024 for instance.
+        pytest.skip(f"{array_namespace} does not support {complex_type_name}")
+
+    array_in = xp.asarray(array_in, device=device)
+
+    err_msg = "Complex floating point values are not supported by average."
+    with (
+        config_context(array_api_dispatch=True),
+        pytest.raises(NotImplementedError, match=err_msg),
+    ):
+        _average(array_in)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(include_numpy_namespaces=True),
+)
+@pytest.mark.parametrize(
+    "axis, weights, error, error_msg",
+    (
+        (
+            None,
+            [1, 2],
+            TypeError,
+            "Axis must be specified",
+        ),
+        (
+            0,
+            [[1, 2]],
+            # NumPy 2 raises ValueError, NumPy 1 raises TypeError
+            (ValueError, TypeError),
+            "weights",  # the message is different for NumPy 1 and 2...
+        ),
+        (
+            0,
+            [1, 2, 3, 4],
+            ValueError,
+            "weights",
+        ),
+        (0, [-1, 1], ZeroDivisionError, "Weights sum to zero, can't be normalized"),
+    ),
+)
+def test_average_raises_with_invalid_parameters(
+    array_namespace, device, dtype_name, axis, weights, error, error_msg
+):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    array_in = numpy.asarray([[1, 2, 3], [4, 5, 6]], dtype=dtype_name)
+    array_in = xp.asarray(array_in, device=device)
+
+    weights = numpy.asarray(weights, dtype=dtype_name)
+    weights = xp.asarray(weights, device=device)
+
+    with config_context(array_api_dispatch=True), pytest.raises(error, match=error_msg):
+        _average(array_in, axis=axis, weights=weights)
+
+
+def test_device_raises_if_no_input():
+    err_msg = re.escape(
+        "At least one input array expected after filtering with remove_none=True, "
+        "remove_types=[str]. Got none. Original types: []."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        device()
+
+    err_msg = re.escape(
+        "At least one input array expected after filtering with remove_none=True, "
+        "remove_types=[str]. Got none. Original types: [NoneType, str]."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        device(None, "name")
+
+
+def test_device_inspection():
+    class Device:
+        def __init__(self, name):
+            self.name = name
+
+        def __eq__(self, device):
+            return self.name == device.name
+
+        def __hash__(self):
+            raise TypeError("Device object is not hashable")
+
+        def __str__(self):
+            return self.name
+
+    class Array:
+        def __init__(self, device_name):
+            self.device = Device(device_name)
+
+    # Sanity check: ensure our Device mock class is non hashable, to
+    # accurately account for non-hashable device objects in some array
+    # libraries, because of which the `device` inspection function should'nt
+    # make use of hash lookup tables (in particular, not use `set`)
+    with pytest.raises(TypeError):
+        hash(Array("device").device)
+
+    # Test raise if on different devices
+    err_msg = "Input arrays use different devices: cpu, mygpu"
+    with pytest.raises(ValueError, match=err_msg):
+        device(Array("cpu"), Array("mygpu"))
+
+    # Test expected value is returned otherwise
+    array1 = Array("device")
+    array2 = Array("device")
+
+    assert array1.device == device(array1)
+    assert array1.device == device(array1, array2)
+    assert array1.device == device(array1, array1, array2)
+
+
+# TODO: add cupy and cupy.array_api to the list of libraries once the
+# the following upstream issue has been fixed:
+# https://github.com/cupy/cupy/issues/8180
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize("library", ["numpy", "array_api_strict", "torch"])
+@pytest.mark.parametrize(
+    "X,reduction,expected",
+    [
+        ([1, 2, numpy.nan], _nanmin, 1),
+        ([1, -2, -numpy.nan], _nanmin, -2),
+        ([numpy.inf, numpy.inf], _nanmin, numpy.inf),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmin, axis=0),
+            [1.0, 2.0, 3.0],
+        ),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmin, axis=1),
+            [1.0, numpy.nan, 4.0],
+        ),
+        ([1, 2, numpy.nan], _nanmax, 2),
+        ([1, 2, numpy.nan], _nanmax, 2),
+        ([-numpy.inf, -numpy.inf], _nanmax, -numpy.inf),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmax, axis=0),
+            [4.0, 5.0, 6.0],
+        ),
+        (
+            [[1, 2, 3], [numpy.nan, numpy.nan, numpy.nan], [4, 5, 6.0]],
+            partial(_nanmax, axis=1),
+            [3.0, numpy.nan, 6.0],
+        ),
+    ],
+)
+def test_nan_reductions(library, X, reduction, expected):
+    """Check NaN reductions like _nanmin and _nanmax"""
+    xp = pytest.importorskip(library)
+
+    with config_context(array_api_dispatch=True):
+        result = reduction(xp.asarray(X))
+
+    result = _convert_to_numpy(result, xp)
+    assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "namespace, _device, _dtype", yield_namespace_device_dtype_combinations()
+)
+def test_ravel(namespace, _device, _dtype):
+    xp = _array_api_for_tests(namespace, _device)
+
+    array = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
+    array_xp = xp.asarray(array, device=_device)
+    with config_context(array_api_dispatch=True):
+        result = _ravel(array_xp)
+
+    result = _convert_to_numpy(result, xp)
+    expected = numpy.ravel(array, order="C")
+
+    assert_allclose(expected, result)
+
+    if _is_numpy_namespace(xp):
+        assert numpy.asarray(result).flags["C_CONTIGUOUS"]
+
+
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize("library", ["cupy", "torch", "cupy.array_api"])
+def test_convert_to_numpy_gpu(library):  # pragma: nocover
+    """Check convert_to_numpy for GPU backed libraries."""
+    xp = pytest.importorskip(library)
+
+    if library == "torch":
+        if not xp.backends.cuda.is_built():
+            pytest.skip("test requires cuda")
+        X_gpu = xp.asarray([1.0, 2.0, 3.0], device="cuda")
+    else:
+        X_gpu = xp.asarray([1.0, 2.0, 3.0])
+
+    X_cpu = _convert_to_numpy(X_gpu, xp=xp)
+    expected_output = numpy.asarray([1.0, 2.0, 3.0])
+    assert_allclose(X_cpu, expected_output)
+
+
+def test_convert_to_numpy_cpu():
+    """Check convert_to_numpy for PyTorch CPU arrays."""
+    torch = pytest.importorskip("torch")
+    X_torch = torch.asarray([1.0, 2.0, 3.0], device="cpu")
+
+    X_cpu = _convert_to_numpy(X_torch, xp=torch)
+    expected_output = numpy.asarray([1.0, 2.0, 3.0])
+    assert_allclose(X_cpu, expected_output)
+
+
+class SimpleEstimator(BaseEstimator):
+    def fit(self, X, y=None):
+        self.X_ = X
+        self.n_features_ = X.shape[0]
+        return self
+
+
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize(
+    "array_namespace, converter",
+    [
+        ("torch", lambda array: array.cpu().numpy()),
+        ("array_api_strict", lambda array: numpy.asarray(array)),
+        ("cupy.array_api", lambda array: array._array.get()),
+    ],
+)
+def test_convert_estimator_to_ndarray(array_namespace, converter):
+    """Convert estimator attributes to ndarray."""
+    xp = pytest.importorskip(array_namespace)
+
+    X = xp.asarray([[1.3, 4.5]])
+    est = SimpleEstimator().fit(X)
+
+    new_est = _estimator_with_converted_arrays(est, converter)
+    assert isinstance(new_est.X_, numpy.ndarray)
+
+
+@skip_if_array_api_compat_not_configured
+def test_convert_estimator_to_array_api():
+    """Convert estimator attributes to ArrayAPI arrays."""
+    xp = pytest.importorskip("array_api_strict")
+
+    X_np = numpy.asarray([[1.3, 4.5]])
+    est = SimpleEstimator().fit(X_np)
+
+    new_est = _estimator_with_converted_arrays(est, lambda array: xp.asarray(array))
+    assert hasattr(new_est.X_, "__array_namespace__")
+
+
+def test_reshape_behavior():
+    """Check reshape behavior with copy and is strict with non-tuple shape."""
+    xp = _NumPyAPIWrapper()
+    X = xp.asarray([[1, 2, 3], [3, 4, 5]])
+
+    X_no_copy = xp.reshape(X, (-1,), copy=False)
+    assert X_no_copy.base is X
+
+    X_copy = xp.reshape(X, (6, 1), copy=True)
+    assert X_copy.base is not X.base
+
+    with pytest.raises(TypeError, match="shape must be a tuple"):
+        xp.reshape(X, -1)
+
+
+@pytest.mark.parametrize("wrapper", [_ArrayAPIWrapper, _NumPyAPIWrapper])
+def test_get_namespace_array_api_isdtype(wrapper):
+    """Test isdtype implementation from _ArrayAPIWrapper and _NumPyAPIWrapper."""
+
+    if wrapper == _ArrayAPIWrapper:
+        xp_ = pytest.importorskip("array_api_strict")
+        xp = _ArrayAPIWrapper(xp_)
+    else:
+        xp = _NumPyAPIWrapper()
+
+    assert xp.isdtype(xp.float32, xp.float32)
+    assert xp.isdtype(xp.float32, "real floating")
+    assert xp.isdtype(xp.float64, "real floating")
+    assert not xp.isdtype(xp.int32, "real floating")
+
+    for dtype in supported_float_dtypes(xp):
+        assert xp.isdtype(dtype, "real floating")
+
+    assert xp.isdtype(xp.bool, "bool")
+    assert not xp.isdtype(xp.float32, "bool")
+
+    assert xp.isdtype(xp.int16, "signed integer")
+    assert not xp.isdtype(xp.uint32, "signed integer")
+
+    assert xp.isdtype(xp.uint16, "unsigned integer")
+    assert not xp.isdtype(xp.int64, "unsigned integer")
+
+    assert xp.isdtype(xp.int64, "numeric")
+    assert xp.isdtype(xp.float32, "numeric")
+    assert xp.isdtype(xp.uint32, "numeric")
+
+    assert not xp.isdtype(xp.float32, "complex floating")
+
+    if wrapper == _NumPyAPIWrapper:
+        assert not xp.isdtype(xp.int8, "complex floating")
+        assert xp.isdtype(xp.complex64, "complex floating")
+        assert xp.isdtype(xp.complex128, "complex floating")
+
+    with pytest.raises(ValueError, match="Unrecognized data type"):
+        assert xp.isdtype(xp.int16, "unknown")
+
+
+@pytest.mark.parametrize(
+    "namespace, _device, _dtype", yield_namespace_device_dtype_combinations()
+)
+def test_indexing_dtype(namespace, _device, _dtype):
+    xp = _array_api_for_tests(namespace, _device)
+
+    if _IS_32BIT:
+        assert indexing_dtype(xp) == xp.int32
+    else:
+        assert indexing_dtype(xp) == xp.int64
+
+
+def test_get_namespace_and_device():
+    # Use torch as a library with custom Device objects:
+    torch = pytest.importorskip("torch")
+    xp_torch = pytest.importorskip("array_api_compat.torch")
+    some_torch_tensor = torch.arange(3, device="cpu")
+    some_numpy_array = numpy.arange(3)
+
+    # When dispatch is disabled, get_namespace_and_device should return the
+    # default NumPy wrapper namespace and no device. Our code will handle such
+    # inputs via the usual __array__ interface without attempting to dispatch
+    # via the array API.
+    namespace, is_array_api, device = get_namespace_and_device(some_torch_tensor)
+    assert namespace is get_namespace(some_numpy_array)[0]
+    assert not is_array_api
+    assert device is None
+
+    # Otherwise, expose the torch namespace and device via array API compat
+    # wrapper.
+    with config_context(array_api_dispatch=True):
+        namespace, is_array_api, device = get_namespace_and_device(some_torch_tensor)
+        assert namespace is xp_torch
+        assert is_array_api
+        assert device == some_torch_tensor.device
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations()
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("axis", [0, 1, None, -1, -2])
+@pytest.mark.parametrize("sample_weight_type", [None, "int", "float"])
+def test_count_nonzero(
+    array_namespace, device_, dtype_name, csr_container, axis, sample_weight_type
+):
+
+    from sklearn.utils.sparsefuncs import count_nonzero as sparse_count_nonzero
+
+    xp = _array_api_for_tests(array_namespace, device_)
+    array = numpy.array([[0, 3, 0], [2, -1, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]])
+    if sample_weight_type == "int":
+        sample_weight = numpy.asarray([1, 2, 2, 3, 1])
+    elif sample_weight_type == "float":
+        sample_weight = numpy.asarray([0.5, 1.5, 0.8, 3.2, 2.4], dtype=dtype_name)
+    else:
+        sample_weight = None
+    expected = sparse_count_nonzero(
+        csr_container(array), axis=axis, sample_weight=sample_weight
+    )
+    array_xp = xp.asarray(array, device=device_)
+
+    with config_context(array_api_dispatch=True):
+        result = _count_nonzero(
+            array_xp, xp=xp, device=device_, axis=axis, sample_weight=sample_weight
+        )
+
+    assert_allclose(_convert_to_numpy(result, xp=xp), expected)
+
+    if np_version < parse_version("2.0.0") or np_version >= parse_version("2.1.0"):
+        # NumPy 2.0 has a problem with the device attribute of scalar arrays:
+        # https://github.com/numpy/numpy/issues/26850
+        assert device(array_xp) == device(result)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_arrayfuncs.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_arrayfuncs.py
@@ -0,0 +1,40 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.arrayfuncs import _all_with_any_reduction_axis_1, min_pos
+
+
+def test_min_pos():
+    # Check that min_pos returns a positive value and that it's consistent
+    # between float and double
+    X = np.random.RandomState(0).randn(100)
+
+    min_double = min_pos(X)
+    min_float = min_pos(X.astype(np.float32))
+
+    assert_allclose(min_double, min_float)
+    assert min_double >= 0
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_min_pos_no_positive(dtype):
+    # Check that the return value of min_pos is the maximum representable
+    # value of the input dtype when all input elements are <= 0 (#19328)
+    X = np.full(100, -1.0).astype(dtype, copy=False)
+
+    assert min_pos(X) == np.finfo(dtype).max
+
+
+@pytest.mark.parametrize(
+    "dtype", [np.int16, np.int32, np.int64, np.float32, np.float64]
+)
+@pytest.mark.parametrize("value", [0, 1.5, -1])
+def test_all_with_any_reduction_axis_1(dtype, value):
+    # Check that return value is False when there is no row equal to `value`
+    X = np.arange(12, dtype=dtype).reshape(3, 4)
+    assert not _all_with_any_reduction_axis_1(X, value=value)
+
+    # Make a row equal to `value`
+    X[1, :] = value
+    assert _all_with_any_reduction_axis_1(X, value=value)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_bunch.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_bunch.py
@@ -0,0 +1,32 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.utils import Bunch
+
+
+def test_bunch_attribute_deprecation():
+    """Check that bunch raises deprecation message with `__getattr__`."""
+    bunch = Bunch()
+    values = np.asarray([1, 2, 3])
+    msg = (
+        "Key: 'values', is deprecated in 1.3 and will be "
+        "removed in 1.5. Please use 'grid_values' instead"
+    )
+    bunch._set_deprecated(
+        values, new_key="grid_values", deprecated_key="values", warning_message=msg
+    )
+
+    with warnings.catch_warnings():
+        # Does not warn for "grid_values"
+        warnings.simplefilter("error")
+        v = bunch["grid_values"]
+
+    assert v is values
+
+    with pytest.warns(FutureWarning, match=msg):
+        # Warns for "values"
+        v = bunch["values"]
+
+    assert v is values
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_chunking.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_chunking.py
@@ -0,0 +1,73 @@
+import warnings
+from itertools import chain
+
+import pytest
+
+from sklearn import config_context
+from sklearn.utils._chunking import gen_even_slices, get_chunk_n_rows
+from sklearn.utils._testing import assert_array_equal
+
+
+def test_gen_even_slices():
+    # check that gen_even_slices contains all samples
+    some_range = range(10)
+    joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)]))
+    assert_array_equal(some_range, joined_range)
+
+
+@pytest.mark.parametrize(
+    ("row_bytes", "max_n_rows", "working_memory", "expected"),
+    [
+        (1024, None, 1, 1024),
+        (1024, None, 0.99999999, 1023),
+        (1023, None, 1, 1025),
+        (1025, None, 1, 1023),
+        (1024, None, 2, 2048),
+        (1024, 7, 1, 7),
+        (1024 * 1024, None, 1, 1),
+    ],
+)
+def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory, expected):
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        actual = get_chunk_n_rows(
+            row_bytes=row_bytes,
+            max_n_rows=max_n_rows,
+            working_memory=working_memory,
+        )
+
+    assert actual == expected
+    assert type(actual) is type(expected)
+    with config_context(working_memory=working_memory):
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", UserWarning)
+            actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows)
+        assert actual == expected
+        assert type(actual) is type(expected)
+
+
+def test_get_chunk_n_rows_warns():
+    """Check that warning is raised when working_memory is too low."""
+    row_bytes = 1024 * 1024 + 1
+    max_n_rows = None
+    working_memory = 1
+    expected = 1
+
+    warn_msg = (
+        "Could not adhere to working_memory config. Currently 1MiB, 2MiB required."
+    )
+    with pytest.warns(UserWarning, match=warn_msg):
+        actual = get_chunk_n_rows(
+            row_bytes=row_bytes,
+            max_n_rows=max_n_rows,
+            working_memory=working_memory,
+        )
+
+    assert actual == expected
+    assert type(actual) is type(expected)
+
+    with config_context(working_memory=working_memory):
+        with pytest.warns(UserWarning, match=warn_msg):
+            actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows)
+        assert actual == expected
+        assert type(actual) is type(expected)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_class_weight.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_class_weight.py
@@ -0,0 +1,316 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.datasets import make_blobs
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._testing import assert_almost_equal, assert_array_almost_equal
+from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
+from sklearn.utils.fixes import CSC_CONTAINERS
+
+
+def test_compute_class_weight():
+    # Test (and demo) compute_class_weight.
+    y = np.asarray([2, 2, 2, 3, 3, 4])
+    classes = np.unique(y)
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    # total effect of samples is preserved
+    class_counts = np.bincount(y)[2:]
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert cw[0] < cw[1] < cw[2]
+
+
+@pytest.mark.parametrize(
+    "y_type, class_weight, classes, err_msg",
+    [
+        (
+            "numeric",
+            "balanced",
+            np.arange(4),
+            "classes should have valid labels that are in y",
+        ),
+        # Non-regression for https://github.com/scikit-learn/scikit-learn/issues/8312
+        (
+            "numeric",
+            {"label_not_present": 1.0},
+            np.arange(4),
+            r"The classes, \[0, 1, 2, 3\], are not in class_weight",
+        ),
+        (
+            "numeric",
+            "balanced",
+            np.arange(2),
+            "classes should include all valid labels",
+        ),
+        (
+            "numeric",
+            {0: 1.0, 1: 2.0},
+            np.arange(2),
+            "classes should include all valid labels",
+        ),
+        (
+            "string",
+            {"dogs": 3, "cat": 2},
+            np.array(["dog", "cat"]),
+            r"The classes, \['dog'\], are not in class_weight",
+        ),
+    ],
+)
+def test_compute_class_weight_not_present(y_type, class_weight, classes, err_msg):
+    # Raise error when y does not contain all class labels
+    y = (
+        np.asarray([0, 0, 0, 1, 1, 2])
+        if y_type == "numeric"
+        else np.asarray(["dog", "cat", "dog"])
+    )
+
+    print(y)
+    with pytest.raises(ValueError, match=err_msg):
+        compute_class_weight(class_weight, classes=classes, y=y)
+
+
+def test_compute_class_weight_dict():
+    classes = np.arange(3)
+    class_weights = {0: 1.0, 1: 2.0, 2: 3.0}
+    y = np.asarray([0, 0, 1, 2])
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+
+    # When the user specifies class weights, compute_class_weights should just
+    # return them.
+    assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw)
+
+    # When a class weight is specified that isn't in classes, the weight is ignored
+    class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5}
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+    assert_allclose([1.0, 2.0, 3.0], cw)
+
+    class_weights = {-1: 5.0, 0: 4.0, 1: 2.0, 2: 3.0}
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+    assert_allclose([4.0, 2.0, 3.0], cw)
+
+
+def test_compute_class_weight_invariance():
+    # Test that results with class_weight="balanced" is invariant wrt
+    # class imbalance if the number of samples is identical.
+    # The test uses a balanced two class dataset with 100 datapoints.
+    # It creates three versions, one where class 1 is duplicated
+    # resulting in 150 points of class 1 and 50 of class 0,
+    # one where there are 50 points in class 1 and 150 in class 0,
+    # and one where there are 100 points of each class (this one is balanced
+    # again).
+    # With balancing class weights, all three should give the same model.
+    X, y = make_blobs(centers=2, random_state=0)
+    # create dataset where class 1 is duplicated twice
+    X_1 = np.vstack([X] + [X[y == 1]] * 2)
+    y_1 = np.hstack([y] + [y[y == 1]] * 2)
+    # create dataset where class 0 is duplicated twice
+    X_0 = np.vstack([X] + [X[y == 0]] * 2)
+    y_0 = np.hstack([y] + [y[y == 0]] * 2)
+    # duplicate everything
+    X_ = np.vstack([X] * 2)
+    y_ = np.hstack([y] * 2)
+    # results should be identical
+    logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
+    logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
+    logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
+    assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
+    assert_array_almost_equal(logreg.coef_, logreg0.coef_)
+
+
+def test_compute_class_weight_balanced_negative():
+    # Test compute_class_weight when labels are negative
+    # Test with balanced class labels.
+    classes = np.array([-2, -1, 0])
+    y = np.asarray([-1, -1, 0, 0, -2, -2])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    assert len(cw) == len(classes)
+    assert_array_almost_equal(cw, np.array([1.0, 1.0, 1.0]))
+
+    # Test with unbalanced class labels.
+    y = np.asarray([-1, 0, 0, -2, -2, -2])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    assert len(cw) == len(classes)
+    class_counts = np.bincount(y + 2)
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert_array_almost_equal(cw, [2.0 / 3, 2.0, 1.0])
+
+
+def test_compute_class_weight_balanced_unordered():
+    # Test compute_class_weight when classes are unordered
+    classes = np.array([1, 0, 3])
+    y = np.asarray([1, 0, 0, 3, 3, 3])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    class_counts = np.bincount(y)[classes]
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert_array_almost_equal(cw, [2.0, 1.0, 2.0 / 3])
+
+
+def test_compute_class_weight_default():
+    # Test for the case where no weight is given for a present class.
+    # Current behaviour is to assign the unweighted classes a weight of 1.
+    y = np.asarray([2, 2, 2, 3, 3, 4])
+    classes = np.unique(y)
+    classes_len = len(classes)
+
+    # Test for non specified weights
+    cw = compute_class_weight(None, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, np.ones(3))
+
+    # Tests for partly specified weights
+    cw = compute_class_weight({2: 1.5}, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, [1.5, 1.0, 1.0])
+
+    cw = compute_class_weight({2: 1.5, 4: 0.5}, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, [1.5, 1.0, 0.5])
+
+
+def test_compute_sample_weight():
+    # Test (and demo) compute_sample_weight.
+    # Test with balanced classes
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with user-defined weights
+    sample_weight = compute_sample_weight({1: 2, 2: 1}, y)
+    assert_array_almost_equal(sample_weight, [2.0, 2.0, 2.0, 1.0, 1.0, 1.0])
+
+    # Test with column vector of balanced classes
+    y = np.asarray([[1], [1], [1], [2], [2], [2]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with unbalanced classes
+    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y)
+    expected_balanced = np.array(
+        [0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 2.3333]
+    )
+    assert_array_almost_equal(sample_weight, expected_balanced, decimal=4)
+
+    # Test with `None` weights
+    sample_weight = compute_sample_weight(None, y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with multi-output of balanced classes
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with multi-output with user-defined weights
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y)
+    assert_array_almost_equal(sample_weight, [2.0, 2.0, 2.0, 2.0, 2.0, 2.0])
+
+    # Test with multi-output of unbalanced classes
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, expected_balanced**2, decimal=3)
+
+
+def test_compute_sample_weight_with_subsample():
+    # Test compute_sample_weight with subsamples specified.
+    # Test with balanced classes and all samples present
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with column vector of balanced classes and all samples present
+    y = np.asarray([[1], [1], [1], [2], [2], [2]])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+    # Test with a subsample
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(4))
+    assert_array_almost_equal(sample_weight, [2.0 / 3, 2.0 / 3, 2.0 / 3, 2.0, 2.0, 2.0])
+
+    # Test with a bootstrap subsample
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=[0, 1, 1, 2, 2, 3])
+    expected_balanced = np.asarray([0.6, 0.6, 0.6, 3.0, 3.0, 3.0])
+    assert_array_almost_equal(sample_weight, expected_balanced)
+
+    # Test with a bootstrap subsample for multi-output
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight("balanced", y, indices=[0, 1, 1, 2, 2, 3])
+    assert_array_almost_equal(sample_weight, expected_balanced**2)
+
+    # Test with a missing class
+    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0])
+
+    # Test with a missing class for multi-output
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0])
+
+
+@pytest.mark.parametrize(
+    "y_type, class_weight, indices, err_msg",
+    [
+        (
+            "single-output",
+            {1: 2, 2: 1},
+            range(4),
+            "The only valid class_weight for subsampling is 'balanced'.",
+        ),
+        (
+            "multi-output",
+            {1: 2, 2: 1},
+            None,
+            "For multi-output, class_weight should be a list of dicts, or the string",
+        ),
+        (
+            "multi-output",
+            [{1: 2, 2: 1}],
+            None,
+            r"Got 1 element\(s\) while having 2 outputs",
+        ),
+    ],
+)
+def test_compute_sample_weight_errors(y_type, class_weight, indices, err_msg):
+    # Test compute_sample_weight raises errors expected.
+    # Invalid preset string
+    y_single_output = np.asarray([1, 1, 1, 2, 2, 2])
+    y_multi_output = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+
+    y = y_single_output if y_type == "single-output" else y_multi_output
+    with pytest.raises(ValueError, match=err_msg):
+        compute_sample_weight(class_weight, y, indices=indices)
+
+
+def test_compute_sample_weight_more_than_32():
+    # Non-regression smoke test for #12146
+    y = np.arange(50)  # more than 32 distinct classes
+    indices = np.arange(50)  # use subsampling
+    weight = compute_sample_weight("balanced", y, indices=indices)
+    assert_array_almost_equal(weight, np.ones(y.shape[0]))
+
+
+def test_class_weight_does_not_contains_more_classes():
+    """Check that class_weight can contain more labels than in y.
+
+    Non-regression test for #22413
+    """
+    tree = DecisionTreeClassifier(class_weight={0: 1, 1: 10, 2: 20})
+
+    # Does not raise
+    tree.fit([[0, 0, 1], [1, 0, 1], [1, 2, 0]], [0, 0, 1])
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_compute_sample_weight_sparse(csc_container):
+    """Check that we can compute weight for sparse `y`."""
+    y = csc_container(np.asarray([[0], [1], [1]]))
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_allclose(sample_weight, [1.5, 0.75, 0.75])
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_cython_blas.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_cython_blas.py
@@ -0,0 +1,234 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._cython_blas import (
+    ColMajor,
+    NoTrans,
+    RowMajor,
+    Trans,
+    _asum_memview,
+    _axpy_memview,
+    _copy_memview,
+    _dot_memview,
+    _gemm_memview,
+    _gemv_memview,
+    _ger_memview,
+    _nrm2_memview,
+    _rot_memview,
+    _rotg_memview,
+    _scal_memview,
+)
+from sklearn.utils._testing import assert_allclose
+
+
+def _numpy_to_cython(dtype):
+    cython = pytest.importorskip("cython")
+    if dtype == np.float32:
+        return cython.float
+    elif dtype == np.float64:
+        return cython.double
+
+
+RTOL = {np.float32: 1e-6, np.float64: 1e-12}
+ORDER = {RowMajor: "C", ColMajor: "F"}
+
+
+def _no_op(x):
+    return x
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_dot(dtype):
+    dot = _dot_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = x.dot(y)
+    actual = dot(x, y)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_asum(dtype):
+    asum = _asum_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.abs(x).sum()
+    actual = asum(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_axpy(dtype):
+    axpy = _axpy_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x + y
+    axpy(alpha, x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_nrm2(dtype):
+    nrm2 = _nrm2_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.linalg.norm(x)
+    actual = nrm2(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_copy(dtype):
+    copy = _copy_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = np.empty_like(x)
+
+    expected = x.copy()
+    copy(x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_scal(dtype):
+    scal = _scal_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x
+    scal(alpha, x)
+
+    assert_allclose(x, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_rotg(dtype):
+    rotg = _rotg_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    a = dtype(rng.randn())
+    b = dtype(rng.randn())
+    c, s = 0.0, 0.0
+
+    def expected_rotg(a, b):
+        roe = a if abs(a) > abs(b) else b
+        if a == 0 and b == 0:
+            c, s, r, z = (1, 0, 0, 0)
+        else:
+            r = np.sqrt(a**2 + b**2) * (1 if roe >= 0 else -1)
+            c, s = a / r, b / r
+            z = s if roe == a else (1 if c == 0 else 1 / c)
+        return r, z, c, s
+
+    expected = expected_rotg(a, b)
+    actual = rotg(a, b, c, s)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_rot(dtype):
+    rot = _rot_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    c = dtype(rng.randn())
+    s = dtype(rng.randn())
+
+    expected_x = c * x + s * y
+    expected_y = c * y - s * x
+
+    rot(x, y, c, s)
+
+    assert_allclose(x, expected_x)
+    assert_allclose(y, expected_y)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "opA, transA", [(_no_op, NoTrans), (np.transpose, Trans)], ids=["NoTrans", "Trans"]
+)
+@pytest.mark.parametrize("order", [RowMajor, ColMajor], ids=["RowMajor", "ColMajor"])
+def test_gemv(dtype, opA, transA, order):
+    gemv = _gemv_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(
+        opA(rng.random_sample((20, 10)).astype(dtype, copy=False)), order=ORDER[order]
+    )
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(x) + beta * y
+    gemv(transA, alpha, A, x, beta, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor], ids=["RowMajor", "ColMajor"])
+def test_ger(dtype, order):
+    ger = _ger_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    A = np.asarray(
+        rng.random_sample((10, 20)).astype(dtype, copy=False), order=ORDER[order]
+    )
+    alpha = 2.5
+
+    expected = alpha * np.outer(x, y) + A
+    ger(alpha, x, y, A)
+
+    assert_allclose(A, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "opB, transB", [(_no_op, NoTrans), (np.transpose, Trans)], ids=["NoTrans", "Trans"]
+)
+@pytest.mark.parametrize(
+    "opA, transA", [(_no_op, NoTrans), (np.transpose, Trans)], ids=["NoTrans", "Trans"]
+)
+@pytest.mark.parametrize("order", [RowMajor, ColMajor], ids=["RowMajor", "ColMajor"])
+def test_gemm(dtype, opA, transA, opB, transB, order):
+    gemm = _gemm_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(
+        opA(rng.random_sample((30, 10)).astype(dtype, copy=False)), order=ORDER[order]
+    )
+    B = np.asarray(
+        opB(rng.random_sample((10, 20)).astype(dtype, copy=False)), order=ORDER[order]
+    )
+    C = np.asarray(
+        rng.random_sample((30, 20)).astype(dtype, copy=False), order=ORDER[order]
+    )
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(opB(B)) + beta * C
+    gemm(transA, transB, alpha, A, B, beta, C)
+
+    assert_allclose(C, expected, rtol=RTOL[dtype])
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_cython_templating.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_cython_templating.py
@@ -0,0 +1,22 @@
+import pathlib
+
+import pytest
+
+import sklearn
+
+
+def test_files_generated_by_templates_are_git_ignored():
+    """Check the consistence of the files generated from template files."""
+    gitignore_file = pathlib.Path(sklearn.__file__).parent.parent / ".gitignore"
+    if not gitignore_file.exists():
+        pytest.skip("Tests are not run from the source folder")
+
+    base_dir = pathlib.Path(sklearn.__file__).parent
+    ignored_files = gitignore_file.read_text().split("\n")
+    ignored_files = [pathlib.Path(line) for line in ignored_files]
+
+    for filename in base_dir.glob("**/*.tp"):
+        filename = filename.relative_to(base_dir.parent)
+        # From "path/to/template.p??.tp" to "path/to/template.p??"
+        filename_wo_tempita_suffix = filename.with_suffix("")
+        assert filename_wo_tempita_suffix in ignored_files
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_deprecation.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_deprecation.py
@@ -0,0 +1,88 @@
+# Authors: Raghav RV <rvraghav93@gmail.com>
+# License: BSD 3 clause
+
+
+import pickle
+
+import pytest
+
+from sklearn.utils.deprecation import _is_deprecated, deprecated
+
+
+@deprecated("qwerty")
+class MockClass1:
+    pass
+
+
+class MockClass2:
+    @deprecated("mockclass2_method")
+    def method(self):
+        pass
+
+    @deprecated("n_features_ is deprecated")  # type: ignore
+    @property
+    def n_features_(self):
+        """Number of input features."""
+        return 10
+
+
+class MockClass3:
+    @deprecated()
+    def __init__(self):
+        pass
+
+
+class MockClass4:
+    pass
+
+
+class MockClass5(MockClass1):
+    """Inherit from deprecated class but does not call super().__init__."""
+
+    def __init__(self, a):
+        self.a = a
+
+
+@deprecated("a message")
+class MockClass6:
+    """A deprecated class that overrides __new__."""
+
+    def __new__(cls, *args, **kwargs):
+        assert len(args) > 0
+        return super().__new__(cls)
+
+
+@deprecated()
+def mock_function():
+    return 10
+
+
+def test_deprecated():
+    with pytest.warns(FutureWarning, match="qwerty"):
+        MockClass1()
+    with pytest.warns(FutureWarning, match="mockclass2_method"):
+        MockClass2().method()
+    with pytest.warns(FutureWarning, match="deprecated"):
+        MockClass3()
+    with pytest.warns(FutureWarning, match="qwerty"):
+        MockClass5(42)
+    with pytest.warns(FutureWarning, match="a message"):
+        MockClass6(42)
+    with pytest.warns(FutureWarning, match="deprecated"):
+        val = mock_function()
+    assert val == 10
+
+
+def test_is_deprecated():
+    # Test if _is_deprecated helper identifies wrapping via deprecated
+    # NOTE it works only for class methods and functions
+    assert _is_deprecated(MockClass1.__new__)
+    assert _is_deprecated(MockClass2().method)
+    assert _is_deprecated(MockClass3.__init__)
+    assert not _is_deprecated(MockClass4.__init__)
+    assert _is_deprecated(MockClass5.__new__)
+    assert _is_deprecated(mock_function)
+
+
+def test_pickle():
+    pickle.loads(pickle.dumps(mock_function))
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_encode.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_encode.py
@@ -0,0 +1,274 @@
+import pickle
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.utils._encode import _check_unknown, _encode, _get_counts, _unique
+
+
+@pytest.mark.parametrize(
+    "values, expected",
+    [
+        (np.array([2, 1, 3, 1, 3], dtype="int64"), np.array([1, 2, 3], dtype="int64")),
+        (
+            np.array([2, 1, np.nan, 1, np.nan], dtype="float32"),
+            np.array([1, 2, np.nan], dtype="float32"),
+        ),
+        (
+            np.array(["b", "a", "c", "a", "c"], dtype=object),
+            np.array(["a", "b", "c"], dtype=object),
+        ),
+        (
+            np.array(["b", "a", None, "a", None], dtype=object),
+            np.array(["a", "b", None], dtype=object),
+        ),
+        (np.array(["b", "a", "c", "a", "c"]), np.array(["a", "b", "c"])),
+    ],
+    ids=["int64", "float32-nan", "object", "object-None", "str"],
+)
+def test_encode_util(values, expected):
+    uniques = _unique(values)
+    assert_array_equal(uniques, expected)
+
+    result, encoded = _unique(values, return_inverse=True)
+    assert_array_equal(result, expected)
+    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
+
+    encoded = _encode(values, uniques=uniques)
+    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
+
+    result, counts = _unique(values, return_counts=True)
+    assert_array_equal(result, expected)
+    assert_array_equal(counts, np.array([2, 1, 2]))
+
+    result, encoded, counts = _unique(values, return_inverse=True, return_counts=True)
+    assert_array_equal(result, expected)
+    assert_array_equal(encoded, np.array([1, 0, 2, 0, 2]))
+    assert_array_equal(counts, np.array([2, 1, 2]))
+
+
+def test_encode_with_check_unknown():
+    # test for the check_unknown parameter of _encode()
+    uniques = np.array([1, 2, 3])
+    values = np.array([1, 2, 3, 4])
+
+    # Default is True, raise error
+    with pytest.raises(ValueError, match="y contains previously unseen labels"):
+        _encode(values, uniques=uniques, check_unknown=True)
+
+    # dont raise error if False
+    _encode(values, uniques=uniques, check_unknown=False)
+
+    # parameter is ignored for object dtype
+    uniques = np.array(["a", "b", "c"], dtype=object)
+    values = np.array(["a", "b", "c", "d"], dtype=object)
+    with pytest.raises(ValueError, match="y contains previously unseen labels"):
+        _encode(values, uniques=uniques, check_unknown=False)
+
+
+def _assert_check_unknown(values, uniques, expected_diff, expected_mask):
+    diff = _check_unknown(values, uniques)
+    assert_array_equal(diff, expected_diff)
+
+    diff, valid_mask = _check_unknown(values, uniques, return_mask=True)
+    assert_array_equal(diff, expected_diff)
+    assert_array_equal(valid_mask, expected_mask)
+
+
+@pytest.mark.parametrize(
+    "values, uniques, expected_diff, expected_mask",
+    [
+        (np.array([1, 2, 3, 4]), np.array([1, 2, 3]), [4], [True, True, True, False]),
+        (np.array([2, 1, 4, 5]), np.array([2, 5, 1]), [4], [True, True, False, True]),
+        (np.array([2, 1, np.nan]), np.array([2, 5, 1]), [np.nan], [True, True, False]),
+        (
+            np.array([2, 1, 4, np.nan]),
+            np.array([2, 5, 1, np.nan]),
+            [4],
+            [True, True, False, True],
+        ),
+        (
+            np.array([2, 1, 4, np.nan]),
+            np.array([2, 5, 1]),
+            [4, np.nan],
+            [True, True, False, False],
+        ),
+        (
+            np.array([2, 1, 4, 5]),
+            np.array([2, 5, 1, np.nan]),
+            [4],
+            [True, True, False, True],
+        ),
+        (
+            np.array(["a", "b", "c", "d"], dtype=object),
+            np.array(["a", "b", "c"], dtype=object),
+            np.array(["d"], dtype=object),
+            [True, True, True, False],
+        ),
+        (
+            np.array(["d", "c", "a", "b"], dtype=object),
+            np.array(["a", "c", "b"], dtype=object),
+            np.array(["d"], dtype=object),
+            [False, True, True, True],
+        ),
+        (
+            np.array(["a", "b", "c", "d"]),
+            np.array(["a", "b", "c"]),
+            np.array(["d"]),
+            [True, True, True, False],
+        ),
+        (
+            np.array(["d", "c", "a", "b"]),
+            np.array(["a", "c", "b"]),
+            np.array(["d"]),
+            [False, True, True, True],
+        ),
+    ],
+)
+def test_check_unknown(values, uniques, expected_diff, expected_mask):
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+
+@pytest.mark.parametrize("missing_value", [None, np.nan, float("nan")])
+@pytest.mark.parametrize("pickle_uniques", [True, False])
+def test_check_unknown_missing_values(missing_value, pickle_uniques):
+    # check for check_unknown with missing values with object dtypes
+    values = np.array(["d", "c", "a", "b", missing_value], dtype=object)
+    uniques = np.array(["c", "a", "b", missing_value], dtype=object)
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    expected_diff = ["d"]
+    expected_mask = [False, True, True, True, True]
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+    values = np.array(["d", "c", "a", "b", missing_value], dtype=object)
+    uniques = np.array(["c", "a", "b"], dtype=object)
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    expected_diff = ["d", missing_value]
+
+    expected_mask = [False, True, True, True, False]
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+    values = np.array(["a", missing_value], dtype=object)
+    uniques = np.array(["a", "b", "z"], dtype=object)
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    expected_diff = [missing_value]
+    expected_mask = [True, False]
+    _assert_check_unknown(values, uniques, expected_diff, expected_mask)
+
+
+@pytest.mark.parametrize("missing_value", [np.nan, None, float("nan")])
+@pytest.mark.parametrize("pickle_uniques", [True, False])
+def test_unique_util_missing_values_objects(missing_value, pickle_uniques):
+    # check for _unique and _encode with missing values with object dtypes
+    values = np.array(["a", "c", "c", missing_value, "b"], dtype=object)
+    expected_uniques = np.array(["a", "b", "c", missing_value], dtype=object)
+
+    uniques = _unique(values)
+
+    if missing_value is None:
+        assert_array_equal(uniques, expected_uniques)
+    else:  # missing_value == np.nan
+        assert_array_equal(uniques[:-1], expected_uniques[:-1])
+        assert np.isnan(uniques[-1])
+
+    if pickle_uniques:
+        uniques = pickle.loads(pickle.dumps(uniques))
+
+    encoded = _encode(values, uniques=uniques)
+    assert_array_equal(encoded, np.array([0, 2, 2, 3, 1]))
+
+
+def test_unique_util_missing_values_numeric():
+    # Check missing values in numerical values
+    values = np.array([3, 1, np.nan, 5, 3, np.nan], dtype=float)
+    expected_uniques = np.array([1, 3, 5, np.nan], dtype=float)
+    expected_inverse = np.array([1, 0, 3, 2, 1, 3])
+
+    uniques = _unique(values)
+    assert_array_equal(uniques, expected_uniques)
+
+    uniques, inverse = _unique(values, return_inverse=True)
+    assert_array_equal(uniques, expected_uniques)
+    assert_array_equal(inverse, expected_inverse)
+
+    encoded = _encode(values, uniques=uniques)
+    assert_array_equal(encoded, expected_inverse)
+
+
+def test_unique_util_with_all_missing_values():
+    # test for all types of missing values for object dtype
+    values = np.array([np.nan, "a", "c", "c", None, float("nan"), None], dtype=object)
+
+    uniques = _unique(values)
+    assert_array_equal(uniques[:-1], ["a", "c", None])
+    # last value is nan
+    assert np.isnan(uniques[-1])
+
+    expected_inverse = [3, 0, 1, 1, 2, 3, 2]
+    _, inverse = _unique(values, return_inverse=True)
+    assert_array_equal(inverse, expected_inverse)
+
+
+def test_check_unknown_with_both_missing_values():
+    # test for both types of missing values for object dtype
+    values = np.array([np.nan, "a", "c", "c", None, np.nan, None], dtype=object)
+
+    diff = _check_unknown(values, known_values=np.array(["a", "c"], dtype=object))
+    assert diff[0] is None
+    assert np.isnan(diff[1])
+
+    diff, valid_mask = _check_unknown(
+        values, known_values=np.array(["a", "c"], dtype=object), return_mask=True
+    )
+
+    assert diff[0] is None
+    assert np.isnan(diff[1])
+    assert_array_equal(valid_mask, [False, True, True, True, False, False, False])
+
+
+@pytest.mark.parametrize(
+    "values, uniques, expected_counts",
+    [
+        (np.array([1] * 10 + [2] * 4 + [3] * 15), np.array([1, 2, 3]), [10, 4, 15]),
+        (
+            np.array([1] * 10 + [2] * 4 + [3] * 15),
+            np.array([1, 2, 3, 5]),
+            [10, 4, 15, 0],
+        ),
+        (
+            np.array([np.nan] * 10 + [2] * 4 + [3] * 15),
+            np.array([2, 3, np.nan]),
+            [4, 15, 10],
+        ),
+        (
+            np.array(["b"] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["a", "b", "c"],
+            [16, 4, 20],
+        ),
+        (
+            np.array(["b"] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["c", "b", "a"],
+            [20, 4, 16],
+        ),
+        (
+            np.array([np.nan] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["c", np.nan, "a"],
+            [20, 4, 16],
+        ),
+        (
+            np.array(["b"] * 4 + ["a"] * 16 + ["c"] * 20, dtype=object),
+            ["a", "b", "c", "e"],
+            [16, 4, 20, 0],
+        ),
+    ],
+)
+def test_get_counts(values, uniques, expected_counts):
+    counts = _get_counts(values, uniques)
+    assert_array_equal(counts, expected_counts)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_estimator_checks.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_estimator_checks.py
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_estimator_html_repr.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_estimator_html_repr.py
@@ -0,0 +1,566 @@
+import html
+import locale
+import re
+import types
+from contextlib import closing
+from io import StringIO
+from unittest.mock import patch
+
+import pytest
+
+from sklearn import config_context
+from sklearn.base import BaseEstimator
+from sklearn.cluster import AgglomerativeClustering, Birch
+from sklearn.compose import ColumnTransformer, make_column_transformer
+from sklearn.datasets import load_iris
+from sklearn.decomposition import PCA, TruncatedSVD
+from sklearn.ensemble import StackingClassifier, StackingRegressor, VotingClassifier
+from sklearn.feature_selection import SelectPercentile
+from sklearn.gaussian_process.kernels import ExpSineSquared
+from sklearn.impute import SimpleImputer
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import RandomizedSearchCV
+from sklearn.multiclass import OneVsOneClassifier
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.svm import LinearSVC, LinearSVR
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._estimator_html_repr import (
+    _get_css_style,
+    _get_visual_block,
+    _HTMLDocumentationLinkMixin,
+    _write_label_html,
+    estimator_html_repr,
+)
+from sklearn.utils.fixes import parse_version
+
+
+@pytest.mark.parametrize("checked", [True, False])
+def test_write_label_html(checked):
+    # Test checking logic and labeling
+    name = "LogisticRegression"
+    tool_tip = "hello-world"
+
+    with closing(StringIO()) as out:
+        _write_label_html(out, name, tool_tip, checked=checked)
+        html_label = out.getvalue()
+
+        p = (
+            r'<label for="sk-estimator-id-[0-9]*"'
+            r' class="sk-toggleable__label (fitted)? sk-toggleable__label-arrow ">'
+            r"LogisticRegression"
+        )
+        re_compiled = re.compile(p)
+        assert re_compiled.search(html_label)
+
+        assert html_label.startswith('<div class="sk-label-container">')
+        assert "<pre>hello-world</pre>" in html_label
+        if checked:
+            assert "checked>" in html_label
+
+
+@pytest.mark.parametrize("est", ["passthrough", "drop", None])
+def test_get_visual_block_single_str_none(est):
+    # Test estimators that are represented by strings
+    est_html_info = _get_visual_block(est)
+    assert est_html_info.kind == "single"
+    assert est_html_info.estimators == est
+    assert est_html_info.names == str(est)
+    assert est_html_info.name_details == str(est)
+
+
+def test_get_visual_block_single_estimator():
+    est = LogisticRegression(C=10.0)
+    est_html_info = _get_visual_block(est)
+    assert est_html_info.kind == "single"
+    assert est_html_info.estimators == est
+    assert est_html_info.names == est.__class__.__name__
+    assert est_html_info.name_details == str(est)
+
+
+def test_get_visual_block_pipeline():
+    pipe = Pipeline(
+        [
+            ("imputer", SimpleImputer()),
+            ("do_nothing", "passthrough"),
+            ("do_nothing_more", None),
+            ("classifier", LogisticRegression()),
+        ]
+    )
+    est_html_info = _get_visual_block(pipe)
+    assert est_html_info.kind == "serial"
+    assert est_html_info.estimators == tuple(step[1] for step in pipe.steps)
+    assert est_html_info.names == [
+        "imputer: SimpleImputer",
+        "do_nothing: passthrough",
+        "do_nothing_more: passthrough",
+        "classifier: LogisticRegression",
+    ]
+    assert est_html_info.name_details == [str(est) for _, est in pipe.steps]
+
+
+def test_get_visual_block_feature_union():
+    f_union = FeatureUnion([("pca", PCA()), ("svd", TruncatedSVD())])
+    est_html_info = _get_visual_block(f_union)
+    assert est_html_info.kind == "parallel"
+    assert est_html_info.names == ("pca", "svd")
+    assert est_html_info.estimators == tuple(
+        trans[1] for trans in f_union.transformer_list
+    )
+    assert est_html_info.name_details == (None, None)
+
+
+def test_get_visual_block_voting():
+    clf = VotingClassifier(
+        [("log_reg", LogisticRegression()), ("mlp", MLPClassifier())]
+    )
+    est_html_info = _get_visual_block(clf)
+    assert est_html_info.kind == "parallel"
+    assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators)
+    assert est_html_info.names == ("log_reg", "mlp")
+    assert est_html_info.name_details == (None, None)
+
+
+def test_get_visual_block_column_transformer():
+    ct = ColumnTransformer(
+        [("pca", PCA(), ["num1", "num2"]), ("svd", TruncatedSVD, [0, 3])]
+    )
+    est_html_info = _get_visual_block(ct)
+    assert est_html_info.kind == "parallel"
+    assert est_html_info.estimators == tuple(trans[1] for trans in ct.transformers)
+    assert est_html_info.names == ("pca", "svd")
+    assert est_html_info.name_details == (["num1", "num2"], [0, 3])
+
+
+def test_estimator_html_repr_pipeline():
+    num_trans = Pipeline(
+        steps=[("pass", "passthrough"), ("imputer", SimpleImputer(strategy="median"))]
+    )
+
+    cat_trans = Pipeline(
+        steps=[
+            ("imputer", SimpleImputer(strategy="constant", missing_values="empty")),
+            ("one-hot", OneHotEncoder(drop="first")),
+        ]
+    )
+
+    preprocess = ColumnTransformer(
+        [
+            ("num", num_trans, ["a", "b", "c", "d", "e"]),
+            ("cat", cat_trans, [0, 1, 2, 3]),
+        ]
+    )
+
+    feat_u = FeatureUnion(
+        [
+            ("pca", PCA(n_components=1)),
+            (
+                "tsvd",
+                Pipeline(
+                    [
+                        ("first", TruncatedSVD(n_components=3)),
+                        ("select", SelectPercentile()),
+                    ]
+                ),
+            ),
+        ]
+    )
+
+    clf = VotingClassifier(
+        [
+            ("lr", LogisticRegression(solver="lbfgs", random_state=1)),
+            ("mlp", MLPClassifier(alpha=0.001)),
+        ]
+    )
+
+    pipe = Pipeline(
+        [("preprocessor", preprocess), ("feat_u", feat_u), ("classifier", clf)]
+    )
+    html_output = estimator_html_repr(pipe)
+
+    # top level estimators show estimator with changes
+    assert html.escape(str(pipe)) in html_output
+    for _, est in pipe.steps:
+        assert (
+            '<div class="sk-toggleable__content "><pre>' + html.escape(str(est))
+        ) in html_output
+
+    # low level estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert html.escape(str(num_trans["pass"])) in html_output
+        assert "passthrough</label>" in html_output
+        assert html.escape(str(num_trans["imputer"])) in html_output
+
+        for _, _, cols in preprocess.transformers:
+            assert f"<pre>{html.escape(str(cols))}</pre>" in html_output
+
+        # feature union
+        for name, _ in feat_u.transformer_list:
+            assert f"<label>{html.escape(name)}</label>" in html_output
+
+        pca = feat_u.transformer_list[0][1]
+        assert f"<pre>{html.escape(str(pca))}</pre>" in html_output
+
+        tsvd = feat_u.transformer_list[1][1]
+        first = tsvd["first"]
+        select = tsvd["select"]
+        assert f"<pre>{html.escape(str(first))}</pre>" in html_output
+        assert f"<pre>{html.escape(str(select))}</pre>" in html_output
+
+        # voting classifier
+        for name, est in clf.estimators:
+            assert f"<label>{html.escape(name)}</label>" in html_output
+            assert f"<pre>{html.escape(str(est))}</pre>" in html_output
+
+    # verify that prefers-color-scheme is implemented
+    assert "prefers-color-scheme" in html_output
+
+
+@pytest.mark.parametrize("final_estimator", [None, LinearSVC()])
+def test_stacking_classifier(final_estimator):
+    estimators = [
+        ("mlp", MLPClassifier(alpha=0.001)),
+        ("tree", DecisionTreeClassifier()),
+    ]
+    clf = StackingClassifier(estimators=estimators, final_estimator=final_estimator)
+
+    html_output = estimator_html_repr(clf)
+
+    assert html.escape(str(clf)) in html_output
+    # If final_estimator's default changes from LogisticRegression
+    # this should be updated
+    if final_estimator is None:
+        assert "LogisticRegression(" in html_output
+    else:
+        assert final_estimator.__class__.__name__ in html_output
+
+
+@pytest.mark.parametrize("final_estimator", [None, LinearSVR()])
+def test_stacking_regressor(final_estimator):
+    reg = StackingRegressor(
+        estimators=[("svr", LinearSVR())], final_estimator=final_estimator
+    )
+    html_output = estimator_html_repr(reg)
+
+    assert html.escape(str(reg.estimators[0][0])) in html_output
+    p = (
+        r'<label for="sk-estimator-id-[0-9]*"'
+        r' class="sk-toggleable__label (fitted)? sk-toggleable__label-arrow ">'
+        r"&nbsp;LinearSVR"
+    )
+    re_compiled = re.compile(p)
+    assert re_compiled.search(html_output)
+
+    if final_estimator is None:
+        p = (
+            r'<label for="sk-estimator-id-[0-9]*"'
+            r' class="sk-toggleable__label (fitted)? sk-toggleable__label-arrow ">'
+            r"&nbsp;RidgeCV"
+        )
+        re_compiled = re.compile(p)
+        assert re_compiled.search(html_output)
+    else:
+        assert html.escape(final_estimator.__class__.__name__) in html_output
+
+
+def test_birch_duck_typing_meta():
+    # Test duck typing meta estimators with Birch
+    birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
+    html_output = estimator_html_repr(birch)
+
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"<pre>{html.escape(str(birch.n_clusters))}" in html_output
+        assert "AgglomerativeClustering</label>" in html_output
+
+    # outer estimator contains all changes
+    assert f"<pre>{html.escape(str(birch))}" in html_output
+
+
+def test_ovo_classifier_duck_typing_meta():
+    # Test duck typing metaestimators with OVO
+    ovo = OneVsOneClassifier(LinearSVC(penalty="l1"))
+    html_output = estimator_html_repr(ovo)
+
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"<pre>{html.escape(str(ovo.estimator))}" in html_output
+        # regex to match the start of the tag
+        p = (
+            r'<label for="sk-estimator-id-[0-9]*" '
+            r'class="sk-toggleable__label  sk-toggleable__label-arrow ">&nbsp;LinearSVC'
+        )
+        re_compiled = re.compile(p)
+        assert re_compiled.search(html_output)
+
+    # outer estimator
+    assert f"<pre>{html.escape(str(ovo))}" in html_output
+
+
+def test_duck_typing_nested_estimator():
+    # Test duck typing metaestimators with random search
+    kernel_ridge = KernelRidge(kernel=ExpSineSquared())
+    param_distributions = {"alpha": [1, 2]}
+
+    kernel_ridge_tuned = RandomizedSearchCV(
+        kernel_ridge,
+        param_distributions=param_distributions,
+    )
+    html_output = estimator_html_repr(kernel_ridge_tuned)
+    assert "estimator: KernelRidge</label>" in html_output
+
+
+@pytest.mark.parametrize("print_changed_only", [True, False])
+def test_one_estimator_print_change_only(print_changed_only):
+    pca = PCA(n_components=10)
+
+    with config_context(print_changed_only=print_changed_only):
+        pca_repr = html.escape(str(pca))
+        html_output = estimator_html_repr(pca)
+        assert pca_repr in html_output
+
+
+def test_fallback_exists():
+    """Check that repr fallback is in the HTML."""
+    pca = PCA(n_components=10)
+    html_output = estimator_html_repr(pca)
+
+    assert (
+        f'<div class="sk-text-repr-fallback"><pre>{html.escape(str(pca))}'
+        in html_output
+    )
+
+
+def test_show_arrow_pipeline():
+    """Show arrow in pipeline for top level in pipeline"""
+    pipe = Pipeline([("scale", StandardScaler()), ("log_Reg", LogisticRegression())])
+
+    html_output = estimator_html_repr(pipe)
+    assert (
+        'class="sk-toggleable__label  sk-toggleable__label-arrow ">&nbsp;&nbsp;Pipeline'
+        in html_output
+    )
+
+
+def test_invalid_parameters_in_stacking():
+    """Invalidate stacking configuration uses default repr.
+
+    Non-regression test for #24009.
+    """
+    stacker = StackingClassifier(estimators=[])
+
+    html_output = estimator_html_repr(stacker)
+    assert html.escape(str(stacker)) in html_output
+
+
+def test_estimator_get_params_return_cls():
+    """Check HTML repr works where a value in get_params is a class."""
+
+    class MyEstimator:
+        def get_params(self, deep=False):
+            return {"inner_cls": LogisticRegression}
+
+    est = MyEstimator()
+    assert "MyEstimator" in estimator_html_repr(est)
+
+
+def test_estimator_html_repr_unfitted_vs_fitted():
+    """Check that we have the information that the estimator is fitted or not in the
+    HTML representation.
+    """
+
+    class MyEstimator(BaseEstimator):
+        def fit(self, X, y):
+            self.fitted_ = True
+            return self
+
+    X, y = load_iris(return_X_y=True)
+    estimator = MyEstimator()
+    assert "<span>Not fitted</span>" in estimator_html_repr(estimator)
+    estimator.fit(X, y)
+    assert "<span>Fitted</span>" in estimator_html_repr(estimator)
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        LogisticRegression(),
+        make_pipeline(StandardScaler(), LogisticRegression()),
+        make_pipeline(
+            make_column_transformer((StandardScaler(), slice(0, 3))),
+            LogisticRegression(),
+        ),
+    ],
+)
+def test_estimator_html_repr_fitted_icon(estimator):
+    """Check that we are showing the fitted status icon only once."""
+    pattern = '<span class="sk-estimator-doc-link ">i<span>Not fitted</span></span>'
+    assert estimator_html_repr(estimator).count(pattern) == 1
+    X, y = load_iris(return_X_y=True)
+    estimator.fit(X, y)
+    pattern = '<span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span>'
+    assert estimator_html_repr(estimator).count(pattern) == 1
+
+
+@pytest.mark.parametrize("mock_version", ["1.3.0.dev0", "1.3.0"])
+def test_html_documentation_link_mixin_sklearn(mock_version):
+    """Check the behaviour of the `_HTMLDocumentationLinkMixin` class for scikit-learn
+    default.
+    """
+
+    # mock the `__version__` where the mixin is located
+    with patch("sklearn.utils._estimator_html_repr.__version__", mock_version):
+        mixin = _HTMLDocumentationLinkMixin()
+
+        assert mixin._doc_link_module == "sklearn"
+        sklearn_version = parse_version(mock_version)
+        # we need to parse the version manually to be sure that this test is passing in
+        # other branches than `main` (that is "dev").
+        if sklearn_version.dev is None:
+            version = f"{sklearn_version.major}.{sklearn_version.minor}"
+        else:
+            version = "dev"
+        assert (
+            mixin._doc_link_template
+            == f"https://scikit-learn.org/{version}/modules/generated/"
+            "{estimator_module}.{estimator_name}.html"
+        )
+        assert (
+            mixin._get_doc_link()
+            == f"https://scikit-learn.org/{version}/modules/generated/"
+            "sklearn.utils._HTMLDocumentationLinkMixin.html"
+        )
+
+
+@pytest.mark.parametrize(
+    "module_path,expected_module",
+    [
+        ("prefix.mymodule", "prefix.mymodule"),
+        ("prefix._mymodule", "prefix"),
+        ("prefix.mypackage._mymodule", "prefix.mypackage"),
+        ("prefix.mypackage._mymodule.submodule", "prefix.mypackage"),
+        ("prefix.mypackage.mymodule.submodule", "prefix.mypackage.mymodule.submodule"),
+    ],
+)
+def test_html_documentation_link_mixin_get_doc_link_instance(
+    module_path, expected_module
+):
+    """Check the behaviour of the `_get_doc_link` with various parameter."""
+
+    class FooBar(_HTMLDocumentationLinkMixin):
+        pass
+
+    FooBar.__module__ = module_path
+    est = FooBar()
+    # if we set `_doc_link`, then we expect to infer a module and name for the estimator
+    est._doc_link_module = "prefix"
+    est._doc_link_template = (
+        "https://website.com/{estimator_module}.{estimator_name}.html"
+    )
+    assert est._get_doc_link() == f"https://website.com/{expected_module}.FooBar.html"
+
+
+@pytest.mark.parametrize(
+    "module_path,expected_module",
+    [
+        ("prefix.mymodule", "prefix.mymodule"),
+        ("prefix._mymodule", "prefix"),
+        ("prefix.mypackage._mymodule", "prefix.mypackage"),
+        ("prefix.mypackage._mymodule.submodule", "prefix.mypackage"),
+        ("prefix.mypackage.mymodule.submodule", "prefix.mypackage.mymodule.submodule"),
+    ],
+)
+def test_html_documentation_link_mixin_get_doc_link_class(module_path, expected_module):
+    """Check the behaviour of the `_get_doc_link` when `_doc_link_module` and
+    `_doc_link_template` are defined at the class level and not at the instance
+    level."""
+
+    class FooBar(_HTMLDocumentationLinkMixin):
+        _doc_link_module = "prefix"
+        _doc_link_template = (
+            "https://website.com/{estimator_module}.{estimator_name}.html"
+        )
+
+    FooBar.__module__ = module_path
+    est = FooBar()
+    assert est._get_doc_link() == f"https://website.com/{expected_module}.FooBar.html"
+
+
+def test_html_documentation_link_mixin_get_doc_link_out_of_library():
+    """Check the behaviour of the `_get_doc_link` with various parameter."""
+    mixin = _HTMLDocumentationLinkMixin()
+
+    # if the `_doc_link_module` does not refer to the root module of the estimator
+    # (here the mixin), then we should return an empty string.
+    mixin._doc_link_module = "xxx"
+    assert mixin._get_doc_link() == ""
+
+
+def test_html_documentation_link_mixin_doc_link_url_param_generator_instance():
+    mixin = _HTMLDocumentationLinkMixin()
+    # we can bypass the generation by providing our own callable
+    mixin._doc_link_template = (
+        "https://website.com/{my_own_variable}.{another_variable}.html"
+    )
+
+    def url_param_generator(estimator):
+        return {
+            "my_own_variable": "value_1",
+            "another_variable": "value_2",
+        }
+
+    mixin._doc_link_url_param_generator = types.MethodType(url_param_generator, mixin)
+
+    assert mixin._get_doc_link() == "https://website.com/value_1.value_2.html"
+
+
+def test_html_documentation_link_mixin_doc_link_url_param_generator_class():
+    # we can bypass the generation by providing our own callable
+
+    def url_param_generator(estimator):
+        return {
+            "my_own_variable": "value_1",
+            "another_variable": "value_2",
+        }
+
+    class FooBar(_HTMLDocumentationLinkMixin):
+        _doc_link_template = (
+            "https://website.com/{my_own_variable}.{another_variable}.html"
+        )
+        _doc_link_url_param_generator = url_param_generator
+
+    estimator = FooBar()
+    assert estimator._get_doc_link() == "https://website.com/value_1.value_2.html"
+
+
+@pytest.fixture
+def set_non_utf8_locale():
+    """Pytest fixture to set non utf-8 locale during the test.
+
+    The locale is set to the original one after the test has run.
+    """
+    try:
+        locale.setlocale(locale.LC_CTYPE, "C")
+    except locale.Error:
+        pytest.skip("'C' locale is not available on this OS")
+
+    yield
+
+    # Resets the locale to the original one. Python calls setlocale(LC_TYPE, "")
+    # at startup according to
+    # https://docs.python.org/3/library/locale.html#background-details-hints-tips-and-caveats.
+    # This assumes that no other locale changes have been made. For some reason,
+    # on some platforms, trying to restore locale with something like
+    # locale.setlocale(locale.LC_CTYPE, locale.getlocale()) raises a
+    # locale.Error: unsupported locale setting
+    locale.setlocale(locale.LC_CTYPE, "")
+
+
+def test_non_utf8_locale(set_non_utf8_locale):
+    """Checks that utf8 encoding is used when reading the CSS file.
+
+    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/27725
+    """
+    _get_css_style()
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_extmath.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_extmath.py
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_fast_dict.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_fast_dict.py
@@ -0,0 +1,47 @@
+"""Test fast_dict."""
+
+import numpy as np
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.utils._fast_dict import IntFloatDict, argmin
+
+
+def test_int_float_dict():
+    rng = np.random.RandomState(0)
+    keys = np.unique(rng.randint(100, size=10).astype(np.intp))
+    values = rng.rand(len(keys))
+
+    d = IntFloatDict(keys, values)
+    for key, value in zip(keys, values):
+        assert d[key] == value
+    assert len(d) == len(keys)
+
+    d.append(120, 3.0)
+    assert d[120] == 3.0
+    assert len(d) == len(keys) + 1
+    for i in range(2000):
+        d.append(i + 1000, 4.0)
+    assert d[1100] == 4.0
+
+
+def test_int_float_dict_argmin():
+    # Test the argmin implementation on the IntFloatDict
+    keys = np.arange(100, dtype=np.intp)
+    values = np.arange(100, dtype=np.float64)
+    d = IntFloatDict(keys, values)
+    assert argmin(d) == (0, 0)
+
+
+def test_to_arrays():
+    # Test that an IntFloatDict is converted into arrays
+    # of keys and values correctly
+    keys_in = np.array([1, 2, 3], dtype=np.intp)
+    values_in = np.array([4, 5, 6], dtype=np.float64)
+
+    d = IntFloatDict(keys_in, values_in)
+    keys_out, values_out = d.to_arrays()
+
+    assert keys_out.dtype == keys_in.dtype
+    assert values_in.dtype == values_out.dtype
+    assert_array_equal(keys_out, keys_in)
+    assert_allclose(values_out, values_in)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_fixes.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_fixes.py
@@ -0,0 +1,162 @@
+# Authors: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Justin Vincent
+#          Lars Buitinck
+# License: BSD 3 clause
+
+import numpy as np
+import pytest
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils.fixes import _object_dtype_isnan, _smallest_admissible_index_dtype
+
+
+@pytest.mark.parametrize("dtype, val", ([object, 1], [object, "a"], [float, 1]))
+def test_object_dtype_isnan(dtype, val):
+    X = np.array([[val, np.nan], [np.nan, val]], dtype=dtype)
+
+    expected_mask = np.array([[False, True], [True, False]])
+
+    mask = _object_dtype_isnan(X)
+
+    assert_array_equal(mask, expected_mask)
+
+
+@pytest.mark.parametrize(
+    "params, expected_dtype",
+    [
+        ({}, np.int32),  # default behaviour
+        ({"maxval": np.iinfo(np.int32).max}, np.int32),
+        ({"maxval": np.iinfo(np.int32).max + 1}, np.int64),
+    ],
+)
+def test_smallest_admissible_index_dtype_max_val(params, expected_dtype):
+    """Check the behaviour of `smallest_admissible_index_dtype` depending only on the
+    `max_val` parameter.
+    """
+    assert _smallest_admissible_index_dtype(**params) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "params, expected_dtype",
+    [
+        # Arrays dtype is int64 and thus should not be downcasted to int32 without
+        # checking the content of providing maxval.
+        ({"arrays": np.array([1, 2], dtype=np.int64)}, np.int64),
+        # One of the array is int64 and should not be downcasted to int32
+        # for the same reasons.
+        (
+            {
+                "arrays": (
+                    np.array([1, 2], dtype=np.int32),
+                    np.array([1, 2], dtype=np.int64),
+                )
+            },
+            np.int64,
+        ),
+        # Both arrays are already int32: we can just keep this dtype.
+        (
+            {
+                "arrays": (
+                    np.array([1, 2], dtype=np.int32),
+                    np.array([1, 2], dtype=np.int32),
+                )
+            },
+            np.int32,
+        ),
+        # Arrays should be upcasted to at least int32 precision.
+        ({"arrays": np.array([1, 2], dtype=np.int8)}, np.int32),
+        # Check that `maxval` takes precedence over the arrays and thus upcast to
+        # int64.
+        (
+            {
+                "arrays": np.array([1, 2], dtype=np.int32),
+                "maxval": np.iinfo(np.int32).max + 1,
+            },
+            np.int64,
+        ),
+    ],
+)
+def test_smallest_admissible_index_dtype_without_checking_contents(
+    params, expected_dtype
+):
+    """Check the behaviour of `smallest_admissible_index_dtype` using the passed
+    arrays but without checking the contents of the arrays.
+    """
+    assert _smallest_admissible_index_dtype(**params) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "params, expected_dtype",
+    [
+        # empty arrays should always be converted to int32 indices
+        (
+            {
+                "arrays": (np.array([], dtype=np.int64), np.array([], dtype=np.int64)),
+                "check_contents": True,
+            },
+            np.int32,
+        ),
+        # arrays respecting np.iinfo(np.int32).min < x < np.iinfo(np.int32).max should
+        # be converted to int32,
+        (
+            {"arrays": np.array([1], dtype=np.int64), "check_contents": True},
+            np.int32,
+        ),
+        # otherwise, it should be converted to int64. We need to create a uint32
+        # arrays to accommodate a value > np.iinfo(np.int32).max
+        (
+            {
+                "arrays": np.array([np.iinfo(np.int32).max + 1], dtype=np.uint32),
+                "check_contents": True,
+            },
+            np.int64,
+        ),
+        # maxval should take precedence over the arrays contents and thus upcast to
+        # int64.
+        (
+            {
+                "arrays": np.array([1], dtype=np.int32),
+                "check_contents": True,
+                "maxval": np.iinfo(np.int32).max + 1,
+            },
+            np.int64,
+        ),
+        # when maxval is small, but check_contents is True and the contents
+        # require np.int64, we still require np.int64 indexing in the end.
+        (
+            {
+                "arrays": np.array([np.iinfo(np.int32).max + 1], dtype=np.uint32),
+                "check_contents": True,
+                "maxval": 1,
+            },
+            np.int64,
+        ),
+    ],
+)
+def test_smallest_admissible_index_dtype_by_checking_contents(params, expected_dtype):
+    """Check the behaviour of `smallest_admissible_index_dtype` using the dtype of the
+    arrays but as well the contents.
+    """
+    assert _smallest_admissible_index_dtype(**params) == expected_dtype
+
+
+@pytest.mark.parametrize(
+    "params, err_type, err_msg",
+    [
+        (
+            {"maxval": np.iinfo(np.int64).max + 1},
+            ValueError,
+            "is to large to be represented as np.int64",
+        ),
+        (
+            {"arrays": np.array([1, 2], dtype=np.float64)},
+            ValueError,
+            "Array dtype float64 is not supported",
+        ),
+        ({"arrays": [1, 2]}, TypeError, "Arrays should be of type np.ndarray"),
+    ],
+)
+def test_smallest_admissible_index_dtype_error(params, err_type, err_msg):
+    """Check that we raise the proper error message."""
+    with pytest.raises(err_type, match=err_msg):
+        _smallest_admissible_index_dtype(**params)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_graph.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_graph.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+from scipy.sparse.csgraph import connected_components
+
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.neighbors import kneighbors_graph
+from sklearn.utils.graph import _fix_connected_components
+
+
+def test_fix_connected_components():
+    # Test that _fix_connected_components reduces the number of component to 1.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=2, mode="distance")
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components > 1
+
+    graph = _fix_connected_components(X, graph, n_connected_components, labels)
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components == 1
+
+
+def test_fix_connected_components_precomputed():
+    # Test that _fix_connected_components accepts precomputed distance matrix.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=2, mode="distance")
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components > 1
+
+    distances = pairwise_distances(X)
+    graph = _fix_connected_components(
+        distances, graph, n_connected_components, labels, metric="precomputed"
+    )
+
+    n_connected_components, labels = connected_components(graph)
+    assert n_connected_components == 1
+
+    # but it does not work with precomputed neighbors graph
+    with pytest.raises(RuntimeError, match="does not work with a sparse"):
+        _fix_connected_components(
+            graph, graph, n_connected_components, labels, metric="precomputed"
+        )
+
+
+def test_fix_connected_components_wrong_mode():
+    # Test that the an error is raised if the mode string is incorrect.
+    X = np.array([0, 1, 2, 5, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=2, mode="distance")
+    n_connected_components, labels = connected_components(graph)
+
+    with pytest.raises(ValueError, match="Unknown mode"):
+        graph = _fix_connected_components(
+            X, graph, n_connected_components, labels, mode="foo"
+        )
+
+
+def test_fix_connected_components_connectivity_mode():
+    # Test that the connectivity mode fill new connections with ones.
+    X = np.array([0, 1, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=1, mode="connectivity")
+    n_connected_components, labels = connected_components(graph)
+    graph = _fix_connected_components(
+        X, graph, n_connected_components, labels, mode="connectivity"
+    )
+    assert np.all(graph.data == 1)
+
+
+def test_fix_connected_components_distance_mode():
+    # Test that the distance mode does not fill new connections with ones.
+    X = np.array([0, 1, 6, 7])[:, None]
+    graph = kneighbors_graph(X, n_neighbors=1, mode="distance")
+    assert np.all(graph.data == 1)
+
+    n_connected_components, labels = connected_components(graph)
+    graph = _fix_connected_components(
+        X, graph, n_connected_components, labels, mode="distance"
+    )
+    assert not np.all(graph.data == 1)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_indexing.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_indexing.py
@@ -0,0 +1,594 @@
+import warnings
+from copy import copy
+from unittest import SkipTest
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.externals._packaging.version import parse as parse_version
+from sklearn.utils import _safe_indexing, resample, shuffle
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
+from sklearn.utils._indexing import (
+    _determine_key_type,
+    _get_column_indices,
+    _safe_assign,
+)
+from sklearn.utils._mocking import MockDataFrame
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    _convert_container,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+    skip_if_array_api_compat_not_configured,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+# toy array
+X_toy = np.arange(9).reshape((3, 3))
+
+
+def test_polars_indexing():
+    """Check _safe_indexing for polars as expected."""
+    pl = pytest.importorskip("polars", minversion="0.18.2")
+    df = pl.DataFrame(
+        {"a": [1, 2, 3, 4], "b": [4, 5, 6, 8], "c": [1, 4, 1, 10]}, orient="row"
+    )
+
+    from polars.testing import assert_frame_equal
+
+    str_keys = [["b"], ["a", "b"], ["b", "a", "c"], ["c"], ["a"]]
+
+    for key in str_keys:
+        out = _safe_indexing(df, key, axis=1)
+        assert_frame_equal(df[key], out)
+
+    bool_keys = [([True, False, True], ["a", "c"]), ([False, False, True], ["c"])]
+
+    for bool_key, str_key in bool_keys:
+        out = _safe_indexing(df, bool_key, axis=1)
+        assert_frame_equal(df[:, str_key], out)
+
+    int_keys = [([0, 1], ["a", "b"]), ([2], ["c"])]
+
+    for int_key, str_key in int_keys:
+        out = _safe_indexing(df, int_key, axis=1)
+        assert_frame_equal(df[:, str_key], out)
+
+    axis_0_keys = [[0, 1], [1, 3], [3, 2]]
+    for key in axis_0_keys:
+        out = _safe_indexing(df, key, axis=0)
+        assert_frame_equal(df[key], out)
+
+
+@pytest.mark.parametrize(
+    "key, dtype",
+    [
+        (0, "int"),
+        ("0", "str"),
+        (True, "bool"),
+        (np.bool_(True), "bool"),
+        ([0, 1, 2], "int"),
+        (["0", "1", "2"], "str"),
+        ((0, 1, 2), "int"),
+        (("0", "1", "2"), "str"),
+        (slice(None, None), None),
+        (slice(0, 2), "int"),
+        (np.array([0, 1, 2], dtype=np.int32), "int"),
+        (np.array([0, 1, 2], dtype=np.int64), "int"),
+        (np.array([0, 1, 2], dtype=np.uint8), "int"),
+        ([True, False], "bool"),
+        ((True, False), "bool"),
+        (np.array([True, False]), "bool"),
+        ("col_0", "str"),
+        (["col_0", "col_1", "col_2"], "str"),
+        (("col_0", "col_1", "col_2"), "str"),
+        (slice("begin", "end"), "str"),
+        (np.array(["col_0", "col_1", "col_2"]), "str"),
+        (np.array(["col_0", "col_1", "col_2"], dtype=object), "str"),
+    ],
+)
+def test_determine_key_type(key, dtype):
+    assert _determine_key_type(key) == dtype
+
+
+def test_determine_key_type_error():
+    with pytest.raises(ValueError, match="No valid specification of the"):
+        _determine_key_type(1.0)
+
+
+def test_determine_key_type_slice_error():
+    with pytest.raises(TypeError, match="Only array-like or scalar are"):
+        _determine_key_type(slice(0, 2, 1), accept_slice=False)
+
+
+@skip_if_array_api_compat_not_configured
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name", yield_namespace_device_dtype_combinations()
+)
+def test_determine_key_type_array_api(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    with sklearn.config_context(array_api_dispatch=True):
+        int_array_key = xp.asarray([1, 2, 3])
+        assert _determine_key_type(int_array_key) == "int"
+
+        bool_array_key = xp.asarray([True, False, True])
+        assert _determine_key_type(bool_array_key) == "bool"
+
+        try:
+            complex_array_key = xp.asarray([1 + 1j, 2 + 2j, 3 + 3j])
+        except TypeError:
+            # Complex numbers are not supported by all Array API libraries.
+            complex_array_key = None
+
+        if complex_array_key is not None:
+            with pytest.raises(ValueError, match="No valid specification of the"):
+                _determine_key_type(complex_array_key)
+
+
+@pytest.mark.parametrize(
+    "array_type", ["list", "array", "sparse", "dataframe", "polars"]
+)
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series", "slice"])
+def test_safe_indexing_2d_container_axis_0(array_type, indices_type):
+    indices = [1, 2]
+    if indices_type == "slice" and isinstance(indices[1], int):
+        indices[1] += 1
+    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(
+        subset, _convert_container([[4, 5, 6], [7, 8, 9]], array_type)
+    )
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series", "polars_series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series", "slice"])
+def test_safe_indexing_1d_container(array_type, indices_type):
+    indices = [1, 2]
+    if indices_type == "slice" and isinstance(indices[1], int):
+        indices[1] += 1
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe", "polars"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series", "slice"])
+@pytest.mark.parametrize("indices", [[1, 2], ["col_1", "col_2"]])
+def test_safe_indexing_2d_container_axis_1(array_type, indices_type, indices):
+    # validation of the indices
+    # we make a copy because indices is mutable and shared between tests
+    indices_converted = copy(indices)
+    if indices_type == "slice" and isinstance(indices[1], int):
+        indices_converted[1] += 1
+
+    columns_name = ["col_0", "col_1", "col_2"]
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+    indices_converted = _convert_container(indices_converted, indices_type)
+
+    if isinstance(indices[0], str) and array_type not in ("dataframe", "polars"):
+        err_msg = (
+            "Specifying the columns using strings is only supported for dataframes"
+        )
+        with pytest.raises(ValueError, match=err_msg):
+            _safe_indexing(array, indices_converted, axis=1)
+    else:
+        subset = _safe_indexing(array, indices_converted, axis=1)
+        assert_allclose_dense_sparse(
+            subset, _convert_container([[2, 3], [5, 6], [8, 9]], array_type)
+        )
+
+
+@pytest.mark.parametrize("array_read_only", [True, False])
+@pytest.mark.parametrize("indices_read_only", [True, False])
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe", "polars"])
+@pytest.mark.parametrize("indices_type", ["array", "series"])
+@pytest.mark.parametrize(
+    "axis, expected_array", [(0, [[4, 5, 6], [7, 8, 9]]), (1, [[2, 3], [5, 6], [8, 9]])]
+)
+def test_safe_indexing_2d_read_only_axis_1(
+    array_read_only, indices_read_only, array_type, indices_type, axis, expected_array
+):
+    array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    if array_read_only:
+        array.setflags(write=False)
+    array = _convert_container(array, array_type)
+    indices = np.array([1, 2])
+    if indices_read_only:
+        indices.setflags(write=False)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=axis)
+    assert_allclose_dense_sparse(subset, _convert_container(expected_array, array_type))
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series", "polars_series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
+def test_safe_indexing_1d_container_mask(array_type, indices_type):
+    indices = [False] + [True] * 2 + [False] * 6
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe", "polars"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
+@pytest.mark.parametrize(
+    "axis, expected_subset",
+    [(0, [[4, 5, 6], [7, 8, 9]]), (1, [[2, 3], [5, 6], [8, 9]])],
+)
+def test_safe_indexing_2d_mask(array_type, indices_type, axis, expected_subset):
+    columns_name = ["col_0", "col_1", "col_2"]
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+    indices = [False, True, True]
+    indices = _convert_container(indices, indices_type)
+
+    subset = _safe_indexing(array, indices, axis=axis)
+    assert_allclose_dense_sparse(
+        subset, _convert_container(expected_subset, array_type)
+    )
+
+
+@pytest.mark.parametrize(
+    "array_type, expected_output_type",
+    [
+        ("list", "list"),
+        ("array", "array"),
+        ("sparse", "sparse"),
+        ("dataframe", "series"),
+        ("polars", "polars_series"),
+    ],
+)
+def test_safe_indexing_2d_scalar_axis_0(array_type, expected_output_type):
+    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    indices = 2
+    subset = _safe_indexing(array, indices, axis=0)
+    expected_array = _convert_container([7, 8, 9], expected_output_type)
+    assert_allclose_dense_sparse(subset, expected_array)
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series", "polars_series"])
+def test_safe_indexing_1d_scalar(array_type):
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = 2
+    subset = _safe_indexing(array, indices, axis=0)
+    assert subset == 3
+
+
+@pytest.mark.parametrize(
+    "array_type, expected_output_type",
+    [
+        ("array", "array"),
+        ("sparse", "sparse"),
+        ("dataframe", "series"),
+        ("polars", "polars_series"),
+    ],
+)
+@pytest.mark.parametrize("indices", [2, "col_2"])
+def test_safe_indexing_2d_scalar_axis_1(array_type, expected_output_type, indices):
+    columns_name = ["col_0", "col_1", "col_2"]
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+
+    if isinstance(indices, str) and array_type not in ("dataframe", "polars"):
+        err_msg = (
+            "Specifying the columns using strings is only supported for dataframes"
+        )
+        with pytest.raises(ValueError, match=err_msg):
+            _safe_indexing(array, indices, axis=1)
+    else:
+        subset = _safe_indexing(array, indices, axis=1)
+        expected_output = [3, 6, 9]
+        if expected_output_type == "sparse":
+            # sparse matrix are keeping the 2D shape
+            expected_output = [[3], [6], [9]]
+        expected_array = _convert_container(expected_output, expected_output_type)
+        assert_allclose_dense_sparse(subset, expected_array)
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "sparse"])
+def test_safe_indexing_None_axis_0(array_type):
+    X = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    X_subset = _safe_indexing(X, None, axis=0)
+    assert_allclose_dense_sparse(X_subset, X)
+
+
+def test_safe_indexing_pandas_no_matching_cols_error():
+    pd = pytest.importorskip("pandas")
+    err_msg = "No valid specification of the columns."
+    X = pd.DataFrame(X_toy)
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X, [1.0], axis=1)
+
+
+@pytest.mark.parametrize("axis", [None, 3])
+def test_safe_indexing_error_axis(axis):
+    with pytest.raises(ValueError, match="'axis' should be either 0"):
+        _safe_indexing(X_toy, [0, 1], axis=axis)
+
+
+@pytest.mark.parametrize("X_constructor", ["array", "series", "polars_series"])
+def test_safe_indexing_1d_array_error(X_constructor):
+    # check that we are raising an error if the array-like passed is 1D and
+    # we try to index on the 2nd dimension
+    X = list(range(5))
+    if X_constructor == "array":
+        X_constructor = np.asarray(X)
+    elif X_constructor == "series":
+        pd = pytest.importorskip("pandas")
+        X_constructor = pd.Series(X)
+    elif X_constructor == "polars_series":
+        pl = pytest.importorskip("polars")
+        X_constructor = pl.Series(values=X)
+
+    err_msg = "'X' should be a 2D NumPy array, 2D sparse matrix or dataframe"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X_constructor, [0, 1], axis=1)
+
+
+def test_safe_indexing_container_axis_0_unsupported_type():
+    indices = ["col_1", "col_2"]
+    array = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    err_msg = "String indexing is not supported with 'axis=0'"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(array, indices, axis=0)
+
+
+def test_safe_indexing_pandas_no_settingwithcopy_warning():
+    # Using safe_indexing with an array-like indexer gives a copy of the
+    # DataFrame -> ensure it doesn't raise a warning if modified
+    pd = pytest.importorskip("pandas")
+
+    pd_version = parse_version(pd.__version__)
+    pd_base_version = parse_version(pd_version.base_version)
+
+    if pd_base_version >= parse_version("3"):
+        raise SkipTest("SettingWithCopyWarning has been removed in pandas 3.0.0.dev")
+
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
+    subset = _safe_indexing(X, [0, 1], axis=0)
+    if hasattr(pd.errors, "SettingWithCopyWarning"):
+        SettingWithCopyWarning = pd.errors.SettingWithCopyWarning
+    else:
+        # backward compatibility for pandas < 1.5
+        SettingWithCopyWarning = pd.core.common.SettingWithCopyWarning
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", SettingWithCopyWarning)
+        subset.iloc[0, 0] = 10
+    # The original dataframe is unaffected by the assignment on the subset:
+    assert X.iloc[0, 0] == 1
+
+
+@pytest.mark.parametrize("indices", [0, [0, 1], slice(0, 2), np.array([0, 1])])
+def test_safe_indexing_list_axis_1_unsupported(indices):
+    """Check that we raise a ValueError when axis=1 with input as list."""
+    X = [[1, 2], [4, 5], [7, 8]]
+    err_msg = "axis=1 is not supported for lists"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X, indices, axis=1)
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
+def test_safe_assign(array_type):
+    """Check that `_safe_assign` works as expected."""
+    rng = np.random.RandomState(0)
+    X_array = rng.randn(10, 5)
+
+    row_indexer = [1, 2]
+    values = rng.randn(len(row_indexer), X_array.shape[1])
+    X = _convert_container(X_array, array_type)
+    _safe_assign(X, values, row_indexer=row_indexer)
+
+    assigned_portion = _safe_indexing(X, row_indexer, axis=0)
+    assert_allclose_dense_sparse(
+        assigned_portion, _convert_container(values, array_type)
+    )
+
+    column_indexer = [1, 2]
+    values = rng.randn(X_array.shape[0], len(column_indexer))
+    X = _convert_container(X_array, array_type)
+    _safe_assign(X, values, column_indexer=column_indexer)
+
+    assigned_portion = _safe_indexing(X, column_indexer, axis=1)
+    assert_allclose_dense_sparse(
+        assigned_portion, _convert_container(values, array_type)
+    )
+
+    row_indexer, column_indexer = None, None
+    values = rng.randn(*X.shape)
+    X = _convert_container(X_array, array_type)
+    _safe_assign(X, values, column_indexer=column_indexer)
+
+    assert_allclose_dense_sparse(X, _convert_container(values, array_type))
+
+
+@pytest.mark.parametrize(
+    "key, err_msg",
+    [
+        (10, r"all features must be in \[0, 2\]"),
+        ("whatever", "A given column is not a column of the dataframe"),
+        (object(), "No valid specification of the columns"),
+    ],
+)
+def test_get_column_indices_error(key, err_msg):
+    pd = pytest.importorskip("pandas")
+    X_df = pd.DataFrame(X_toy, columns=["col_0", "col_1", "col_2"])
+
+    with pytest.raises(ValueError, match=err_msg):
+        _get_column_indices(X_df, key)
+
+
+@pytest.mark.parametrize(
+    "key", [["col1"], ["col2"], ["col1", "col2"], ["col1", "col3"], ["col2", "col3"]]
+)
+def test_get_column_indices_pandas_nonunique_columns_error(key):
+    pd = pytest.importorskip("pandas")
+    toy = np.zeros((1, 5), dtype=int)
+    columns = ["col1", "col1", "col2", "col3", "col2"]
+    X = pd.DataFrame(toy, columns=columns)
+
+    err_msg = "Selected columns, {}, are not unique in dataframe".format(key)
+    with pytest.raises(ValueError) as exc_info:
+        _get_column_indices(X, key)
+    assert str(exc_info.value) == err_msg
+
+
+def test_get_column_indices_interchange():
+    """Check _get_column_indices for edge cases with the interchange"""
+    pd = pytest.importorskip("pandas", minversion="1.5")
+
+    df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"])
+
+    # Hide the fact that this is a pandas dataframe to trigger the dataframe protocol
+    # code path.
+    class MockDataFrame:
+        def __init__(self, df):
+            self._df = df
+
+        def __getattr__(self, name):
+            return getattr(self._df, name)
+
+    df_mocked = MockDataFrame(df)
+
+    key_results = [
+        (slice(1, None), [1, 2]),
+        (slice(None, 2), [0, 1]),
+        (slice(1, 2), [1]),
+        (["b", "c"], [1, 2]),
+        (slice("a", "b"), [0, 1]),
+        (slice("a", None), [0, 1, 2]),
+        (slice(None, "a"), [0]),
+        (["c", "a"], [2, 0]),
+        ([], []),
+    ]
+    for key, result in key_results:
+        assert _get_column_indices(df_mocked, key) == result
+
+    msg = "A given column is not a column of the dataframe"
+    with pytest.raises(ValueError, match=msg):
+        _get_column_indices(df_mocked, ["not_a_column"])
+
+    msg = "key.step must be 1 or None"
+    with pytest.raises(NotImplementedError, match=msg):
+        _get_column_indices(df_mocked, slice("a", None, 2))
+
+
+def test_resample():
+    # Border case not worth mentioning in doctests
+    assert resample() is None
+
+    # Check that invalid arguments yield ValueError
+    with pytest.raises(ValueError):
+        resample([0], [0, 1])
+    with pytest.raises(ValueError):
+        resample([0, 1], [0, 1], replace=False, n_samples=3)
+
+    # Issue:6581, n_samples can be more when replace is True (default).
+    assert len(resample([1, 2], n_samples=5)) == 5
+
+
+def test_resample_stratified():
+    # Make sure resample can stratify
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    p = 0.9
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.binomial(1, p, size=n_samples)
+
+    _, y_not_stratified = resample(X, y, n_samples=10, random_state=0, stratify=None)
+    assert np.all(y_not_stratified == 1)
+
+    _, y_stratified = resample(X, y, n_samples=10, random_state=0, stratify=y)
+    assert not np.all(y_stratified == 1)
+    assert np.sum(y_stratified) == 9  # all 1s, one 0
+
+
+def test_resample_stratified_replace():
+    # Make sure stratified resampling supports the replace parameter
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.randint(0, 2, size=n_samples)
+
+    X_replace, _ = resample(
+        X, y, replace=True, n_samples=50, random_state=rng, stratify=y
+    )
+    X_no_replace, _ = resample(
+        X, y, replace=False, n_samples=50, random_state=rng, stratify=y
+    )
+    assert np.unique(X_replace).shape[0] < 50
+    assert np.unique(X_no_replace).shape[0] == 50
+
+    # make sure n_samples can be greater than X.shape[0] if we sample with
+    # replacement
+    X_replace, _ = resample(
+        X, y, replace=True, n_samples=1000, random_state=rng, stratify=y
+    )
+    assert X_replace.shape[0] == 1000
+    assert np.unique(X_replace).shape[0] == 100
+
+
+def test_resample_stratify_2dy():
+    # Make sure y can be 2d when stratifying
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.randint(0, 2, size=(n_samples, 2))
+    X, y = resample(X, y, n_samples=50, random_state=rng, stratify=y)
+    assert y.ndim == 2
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_resample_stratify_sparse_error(csr_container):
+    # resample must be ndarray
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 2))
+    y = rng.randint(0, 2, size=n_samples)
+    stratify = csr_container(y.reshape(-1, 1))
+    with pytest.raises(TypeError, match="Sparse data was passed"):
+        X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify)
+
+
+def test_shuffle_on_ndim_equals_three():
+    def to_tuple(A):  # to make the inner arrays hashable
+        return tuple(tuple(tuple(C) for C in B) for B in A)
+
+    A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # A.shape = (2,2,2)
+    S = set(to_tuple(A))
+    shuffle(A)  # shouldn't raise a ValueError for dim = 3
+    assert set(to_tuple(A)) == S
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_shuffle_dont_convert_to_array(csc_container):
+    # Check that shuffle does not try to convert to numpy arrays with float
+    # dtypes can let any indexable datastructure pass-through.
+    a = ["a", "b", "c"]
+    b = np.array(["a", "b", "c"], dtype=object)
+    c = [1, 2, 3]
+    d = MockDataFrame(np.array([["a", 0], ["b", 1], ["c", 2]], dtype=object))
+    e = csc_container(np.arange(6).reshape(3, 2))
+    a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
+
+    assert a_s == ["c", "b", "a"]
+    assert type(a_s) == list  # noqa: E721
+
+    assert_array_equal(b_s, ["c", "b", "a"])
+    assert b_s.dtype == object
+
+    assert c_s == [3, 2, 1]
+    assert type(c_s) == list  # noqa: E721
+
+    assert_array_equal(d_s, np.array([["c", 2], ["b", 1], ["a", 0]], dtype=object))
+    assert type(d_s) == MockDataFrame  # noqa: E721
+
+    assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]]))
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_mask.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_mask.py
@@ -0,0 +1,19 @@
+import pytest
+
+from sklearn.utils._mask import safe_mask
+from sklearn.utils.fixes import CSR_CONTAINERS
+from sklearn.utils.validation import check_random_state
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_safe_mask(csr_container):
+    random_state = check_random_state(0)
+    X = random_state.rand(5, 4)
+    X_csr = csr_container(X)
+    mask = [False, False, True, True, True]
+
+    mask = safe_mask(X, mask)
+    assert X[mask].shape[0] == 3
+
+    mask = safe_mask(X_csr, mask)
+    assert X_csr[mask].shape[0] == 3
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_metaestimators.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_metaestimators.py
@@ -0,0 +1,63 @@
+import pickle
+
+import pytest
+
+from sklearn.utils.metaestimators import available_if
+
+
+class AvailableParameterEstimator:
+    """This estimator's `available` parameter toggles the presence of a method"""
+
+    def __init__(self, available=True, return_value=1):
+        self.available = available
+        self.return_value = return_value
+
+    @available_if(lambda est: est.available)
+    def available_func(self):
+        """This is a mock available_if function"""
+        return self.return_value
+
+
+def test_available_if_docstring():
+    assert "This is a mock available_if function" in str(
+        AvailableParameterEstimator.__dict__["available_func"].__doc__
+    )
+    assert "This is a mock available_if function" in str(
+        AvailableParameterEstimator.available_func.__doc__
+    )
+    assert "This is a mock available_if function" in str(
+        AvailableParameterEstimator().available_func.__doc__
+    )
+
+
+def test_available_if():
+    assert hasattr(AvailableParameterEstimator(), "available_func")
+    assert not hasattr(AvailableParameterEstimator(available=False), "available_func")
+
+
+def test_available_if_unbound_method():
+    # This is a non regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/20614
+    # to make sure that decorated functions can be used as an unbound method,
+    # for instance when monkeypatching.
+    est = AvailableParameterEstimator()
+    AvailableParameterEstimator.available_func(est)
+
+    est = AvailableParameterEstimator(available=False)
+    with pytest.raises(
+        AttributeError,
+        match="This 'AvailableParameterEstimator' has no attribute 'available_func'",
+    ):
+        AvailableParameterEstimator.available_func(est)
+
+
+def test_available_if_methods_can_be_pickled():
+    """Check that available_if methods can be pickled.
+
+    Non-regression test for #21344.
+    """
+    return_value = 10
+    est = AvailableParameterEstimator(available=True, return_value=return_value)
+    pickled_bytes = pickle.dumps(est.available_func)
+    unpickled_func = pickle.loads(pickled_bytes)
+    assert unpickled_func() == return_value
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_missing.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_missing.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._missing import is_scalar_nan
+
+
+@pytest.mark.parametrize(
+    "value, result",
+    [
+        (float("nan"), True),
+        (np.nan, True),
+        (float(np.nan), True),
+        (np.float32(np.nan), True),
+        (np.float64(np.nan), True),
+        (0, False),
+        (0.0, False),
+        (None, False),
+        ("", False),
+        ("nan", False),
+        ([np.nan], False),
+        (9867966753463435747313673, False),  # Python int that overflows with C type
+    ],
+)
+def test_is_scalar_nan(value, result):
+    assert is_scalar_nan(value) is result
+    # make sure that we are returning a Python bool
+    assert isinstance(is_scalar_nan(value), bool)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_mocking.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_mocking.py
@@ -0,0 +1,205 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+from scipy import sparse
+
+from sklearn.datasets import load_iris
+from sklearn.utils import _safe_indexing, check_array
+from sklearn.utils._mocking import (
+    CheckingClassifier,
+    _MockEstimatorOnOffPrediction,
+)
+from sklearn.utils._testing import _convert_container
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+
+@pytest.fixture
+def iris():
+    return load_iris(return_X_y=True)
+
+
+def _success(x):
+    return True
+
+
+def _fail(x):
+    return False
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {},
+        {"check_X": _success},
+        {"check_y": _success},
+        {"check_X": _success, "check_y": _success},
+    ],
+)
+def test_check_on_fit_success(iris, kwargs):
+    X, y = iris
+    CheckingClassifier(**kwargs).fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"check_X": _fail},
+        {"check_y": _fail},
+        {"check_X": _success, "check_y": _fail},
+        {"check_X": _fail, "check_y": _success},
+        {"check_X": _fail, "check_y": _fail},
+    ],
+)
+def test_check_on_fit_fail(iris, kwargs):
+    X, y = iris
+    clf = CheckingClassifier(**kwargs)
+    with pytest.raises(AssertionError):
+        clf.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "pred_func", ["predict", "predict_proba", "decision_function", "score"]
+)
+def test_check_X_on_predict_success(iris, pred_func):
+    X, y = iris
+    clf = CheckingClassifier(check_X=_success).fit(X, y)
+    getattr(clf, pred_func)(X)
+
+
+@pytest.mark.parametrize(
+    "pred_func", ["predict", "predict_proba", "decision_function", "score"]
+)
+def test_check_X_on_predict_fail(iris, pred_func):
+    X, y = iris
+    clf = CheckingClassifier(check_X=_success).fit(X, y)
+    clf.set_params(check_X=_fail)
+    with pytest.raises(AssertionError):
+        getattr(clf, pred_func)(X)
+
+
+@pytest.mark.parametrize("input_type", ["list", "array", "sparse", "dataframe"])
+def test_checking_classifier(iris, input_type):
+    # Check that the CheckingClassifier outputs what we expect
+    X, y = iris
+    X = _convert_container(X, input_type)
+    clf = CheckingClassifier()
+    clf.fit(X, y)
+
+    assert_array_equal(clf.classes_, np.unique(y))
+    assert len(clf.classes_) == 3
+    assert clf.n_features_in_ == 4
+
+    y_pred = clf.predict(X)
+    assert all(pred in clf.classes_ for pred in y_pred)
+
+    assert clf.score(X) == pytest.approx(0)
+    clf.set_params(foo_param=10)
+    assert clf.fit(X, y).score(X) == pytest.approx(1)
+
+    y_proba = clf.predict_proba(X)
+    assert y_proba.shape == (150, 3)
+    assert np.logical_and(y_proba >= 0, y_proba <= 1).all()
+
+    y_decision = clf.decision_function(X)
+    assert y_decision.shape == (150, 3)
+
+    # check the shape in case of binary classification
+    first_2_classes = np.logical_or(y == 0, y == 1)
+    X = _safe_indexing(X, first_2_classes)
+    y = _safe_indexing(y, first_2_classes)
+    clf.fit(X, y)
+
+    y_proba = clf.predict_proba(X)
+    assert y_proba.shape == (100, 2)
+    assert np.logical_and(y_proba >= 0, y_proba <= 1).all()
+
+    y_decision = clf.decision_function(X)
+    assert y_decision.shape == (100,)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_checking_classifier_with_params(iris, csr_container):
+    X, y = iris
+    X_sparse = csr_container(X)
+
+    clf = CheckingClassifier(check_X=sparse.issparse)
+    with pytest.raises(AssertionError):
+        clf.fit(X, y)
+    clf.fit(X_sparse, y)
+
+    clf = CheckingClassifier(
+        check_X=check_array, check_X_params={"accept_sparse": False}
+    )
+    clf.fit(X, y)
+    with pytest.raises(TypeError, match="Sparse data was passed"):
+        clf.fit(X_sparse, y)
+
+
+def test_checking_classifier_fit_params(iris):
+    # check the error raised when the number of samples is not the one expected
+    X, y = iris
+    clf = CheckingClassifier(expected_sample_weight=True)
+    sample_weight = np.ones(len(X) // 2)
+
+    msg = f"sample_weight.shape == ({len(X) // 2},), expected ({len(X)},)!"
+    with pytest.raises(ValueError) as exc:
+        clf.fit(X, y, sample_weight=sample_weight)
+    assert exc.value.args[0] == msg
+
+
+def test_checking_classifier_missing_fit_params(iris):
+    X, y = iris
+    clf = CheckingClassifier(expected_sample_weight=True)
+    err_msg = "Expected sample_weight to be passed"
+    with pytest.raises(AssertionError, match=err_msg):
+        clf.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "methods_to_check",
+    [["predict"], ["predict", "predict_proba"]],
+)
+@pytest.mark.parametrize(
+    "predict_method", ["predict", "predict_proba", "decision_function", "score"]
+)
+def test_checking_classifier_methods_to_check(iris, methods_to_check, predict_method):
+    # check that methods_to_check allows to bypass checks
+    X, y = iris
+
+    clf = CheckingClassifier(
+        check_X=sparse.issparse,
+        methods_to_check=methods_to_check,
+    )
+
+    clf.fit(X, y)
+    if predict_method in methods_to_check:
+        with pytest.raises(AssertionError):
+            getattr(clf, predict_method)(X)
+    else:
+        getattr(clf, predict_method)(X)
+
+
+@pytest.mark.parametrize(
+    "response_methods",
+    [
+        ["predict"],
+        ["predict", "predict_proba"],
+        ["predict", "decision_function"],
+        ["predict", "predict_proba", "decision_function"],
+    ],
+)
+def test_mock_estimator_on_off_prediction(iris, response_methods):
+    X, y = iris
+    estimator = _MockEstimatorOnOffPrediction(response_methods=response_methods)
+
+    estimator.fit(X, y)
+    assert hasattr(estimator, "classes_")
+    assert_array_equal(estimator.classes_, np.unique(y))
+
+    possible_responses = ["predict", "predict_proba", "decision_function"]
+    for response in possible_responses:
+        if response in response_methods:
+            assert hasattr(estimator, response)
+            assert getattr(estimator, response)(X) == response
+        else:
+            assert not hasattr(estimator, response)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_multiclass.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_multiclass.py
@@ -0,0 +1,613 @@
+from itertools import product
+
+import numpy as np
+import pytest
+from scipy.sparse import issparse
+
+from sklearn import config_context, datasets
+from sklearn.model_selection import ShuffleSplit
+from sklearn.svm import SVC
+from sklearn.utils._array_api import yield_namespace_device_dtype_combinations
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    _convert_container,
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.estimator_checks import _NotAnArray
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+from sklearn.utils.metaestimators import _safe_split
+from sklearn.utils.multiclass import (
+    _ovr_decision_function,
+    check_classification_targets,
+    class_distribution,
+    is_multilabel,
+    type_of_target,
+    unique_labels,
+)
+
+multilabel_explicit_zero = np.array([[0, 1], [1, 0]])
+multilabel_explicit_zero[:, 0] = 0
+
+
+def _generate_sparse(
+    data,
+    sparse_containers=tuple(
+        COO_CONTAINERS
+        + CSC_CONTAINERS
+        + CSR_CONTAINERS
+        + DOK_CONTAINERS
+        + LIL_CONTAINERS
+    ),
+    dtypes=(bool, int, np.int8, np.uint8, float, np.float32),
+):
+    return [
+        sparse_container(data, dtype=dtype)
+        for sparse_container in sparse_containers
+        for dtype in dtypes
+    ]
+
+
+EXAMPLES = {
+    "multilabel-indicator": [
+        # valid when the data is formatted as sparse or dense, identified
+        # by CSR format when the testing takes place
+        *_generate_sparse(
+            np.random.RandomState(42).randint(2, size=(10, 10)),
+            sparse_containers=CSR_CONTAINERS,
+            dtypes=(int,),
+        ),
+        [[0, 1], [1, 0]],
+        [[0, 1]],
+        *_generate_sparse(
+            multilabel_explicit_zero, sparse_containers=CSC_CONTAINERS, dtypes=(int,)
+        ),
+        *_generate_sparse([[0, 1], [1, 0]]),
+        *_generate_sparse([[0, 0], [0, 0]]),
+        *_generate_sparse([[0, 1]]),
+        # Only valid when data is dense
+        [[-1, 1], [1, -1]],
+        np.array([[-1, 1], [1, -1]]),
+        np.array([[-3, 3], [3, -3]]),
+        _NotAnArray(np.array([[-3, 3], [3, -3]])),
+    ],
+    "multiclass": [
+        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
+        np.array([1, 0, 2]),
+        np.array([1, 0, 2], dtype=np.int8),
+        np.array([1, 0, 2], dtype=np.uint8),
+        np.array([1, 0, 2], dtype=float),
+        np.array([1, 0, 2], dtype=np.float32),
+        np.array([[1], [0], [2]]),
+        _NotAnArray(np.array([1, 0, 2])),
+        [0, 1, 2],
+        ["a", "b", "c"],
+        np.array(["a", "b", "c"]),
+        np.array(["a", "b", "c"], dtype=object),
+        np.array(["a", "b", "c"], dtype=object),
+    ],
+    "multiclass-multioutput": [
+        [[1, 0, 2, 2], [1, 4, 2, 4]],
+        [["a", "b"], ["c", "d"]],
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=float),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
+        *_generate_sparse(
+            [[1, 0, 2, 2], [1, 4, 2, 4]],
+            sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,
+            dtypes=(int, np.int8, np.uint8, float, np.float32),
+        ),
+        np.array([["a", "b"], ["c", "d"]]),
+        np.array([["a", "b"], ["c", "d"]]),
+        np.array([["a", "b"], ["c", "d"]], dtype=object),
+        np.array([[1, 0, 2]]),
+        _NotAnArray(np.array([[1, 0, 2]])),
+    ],
+    "binary": [
+        [0, 1],
+        [1, 1],
+        [],
+        [0],
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=bool),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=float),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),
+        np.array([[0], [1]]),
+        _NotAnArray(np.array([[0], [1]])),
+        [1, -1],
+        [3, 5],
+        ["a"],
+        ["a", "b"],
+        ["abc", "def"],
+        np.array(["abc", "def"]),
+        ["a", "b"],
+        np.array(["abc", "def"], dtype=object),
+    ],
+    "continuous": [
+        [1e-5],
+        [0, 0.5],
+        np.array([[0], [0.5]]),
+        np.array([[0], [0.5]], dtype=np.float32),
+    ],
+    "continuous-multioutput": [
+        np.array([[0, 0.5], [0.5, 0]]),
+        np.array([[0, 0.5], [0.5, 0]], dtype=np.float32),
+        np.array([[0, 0.5]]),
+        *_generate_sparse(
+            [[0, 0.5], [0.5, 0]],
+            sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,
+            dtypes=(float, np.float32),
+        ),
+        *_generate_sparse(
+            [[0, 0.5]],
+            sparse_containers=CSC_CONTAINERS + CSR_CONTAINERS,
+            dtypes=(float, np.float32),
+        ),
+    ],
+    "unknown": [
+        [[]],
+        np.array([[]], dtype=object),
+        [()],
+        # sequence of sequences that weren't supported even before deprecation
+        np.array([np.array([]), np.array([1, 2, 3])], dtype=object),
+        [np.array([]), np.array([1, 2, 3])],
+        [{1, 2, 3}, {1, 2}],
+        [frozenset([1, 2, 3]), frozenset([1, 2])],
+        # and also confusable as sequences of sequences
+        [{0: "a", 1: "b"}, {0: "a"}],
+        # ndim 0
+        np.array(0),
+        # empty second dimension
+        np.array([[], []]),
+        # 3d
+        np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),
+    ],
+}
+
+ARRAY_API_EXAMPLES = {
+    "multilabel-indicator": [
+        np.random.RandomState(42).randint(2, size=(10, 10)),
+        [[0, 1], [1, 0]],
+        [[0, 1]],
+        multilabel_explicit_zero,
+        [[0, 0], [0, 0]],
+        [[-1, 1], [1, -1]],
+        np.array([[-1, 1], [1, -1]]),
+        np.array([[-3, 3], [3, -3]]),
+        _NotAnArray(np.array([[-3, 3], [3, -3]])),
+    ],
+    "multiclass": [
+        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
+        np.array([1, 0, 2]),
+        np.array([1, 0, 2], dtype=np.int8),
+        np.array([1, 0, 2], dtype=np.uint8),
+        np.array([1, 0, 2], dtype=float),
+        np.array([1, 0, 2], dtype=np.float32),
+        np.array([[1], [0], [2]]),
+        _NotAnArray(np.array([1, 0, 2])),
+        [0, 1, 2],
+    ],
+    "multiclass-multioutput": [
+        [[1, 0, 2, 2], [1, 4, 2, 4]],
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=float),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
+        np.array([[1, 0, 2]]),
+        _NotAnArray(np.array([[1, 0, 2]])),
+    ],
+    "binary": [
+        [0, 1],
+        [1, 1],
+        [],
+        [0],
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=bool),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=float),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),
+        np.array([[0], [1]]),
+        _NotAnArray(np.array([[0], [1]])),
+        [1, -1],
+        [3, 5],
+    ],
+    "continuous": [
+        [1e-5],
+        [0, 0.5],
+        np.array([[0], [0.5]]),
+        np.array([[0], [0.5]], dtype=np.float32),
+    ],
+    "continuous-multioutput": [
+        np.array([[0, 0.5], [0.5, 0]]),
+        np.array([[0, 0.5], [0.5, 0]], dtype=np.float32),
+        np.array([[0, 0.5]]),
+    ],
+    "unknown": [
+        [[]],
+        [()],
+        np.array(0),
+        np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),
+    ],
+}
+
+
+NON_ARRAY_LIKE_EXAMPLES = [
+    {1, 2, 3},
+    {0: "a", 1: "b"},
+    {0: [5], 1: [5]},
+    "abc",
+    frozenset([1, 2, 3]),
+    None,
+]
+
+MULTILABEL_SEQUENCES = [
+    [[1], [2], [0, 1]],
+    [(), (2), (0, 1)],
+    np.array([[], [1, 2]], dtype="object"),
+    _NotAnArray(np.array([[], [1, 2]], dtype="object")),
+]
+
+
+def test_unique_labels():
+    # Empty iterable
+    with pytest.raises(ValueError):
+        unique_labels()
+
+    # Multiclass problem
+    assert_array_equal(unique_labels(range(10)), np.arange(10))
+    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
+    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))
+
+    # Multilabel indicator
+    assert_array_equal(
+        unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])), np.arange(3)
+    )
+
+    assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])), np.arange(3))
+
+    # Several arrays passed
+    assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5))
+    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)), np.arange(3))
+
+    # Border line case with binary indicator matrix
+    with pytest.raises(ValueError):
+        unique_labels([4, 0, 2], np.ones((5, 5)))
+    with pytest.raises(ValueError):
+        unique_labels(np.ones((5, 4)), np.ones((5, 5)))
+
+    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))), np.arange(5))
+
+
+def test_unique_labels_non_specific():
+    # Test unique_labels with a variety of collected examples
+
+    # Smoke test for all supported format
+    for format in ["binary", "multiclass", "multilabel-indicator"]:
+        for y in EXAMPLES[format]:
+            unique_labels(y)
+
+    # We don't support those format at the moment
+    for example in NON_ARRAY_LIKE_EXAMPLES:
+        with pytest.raises(ValueError):
+            unique_labels(example)
+
+    for y_type in [
+        "unknown",
+        "continuous",
+        "continuous-multioutput",
+        "multiclass-multioutput",
+    ]:
+        for example in EXAMPLES[y_type]:
+            with pytest.raises(ValueError):
+                unique_labels(example)
+
+
+def test_unique_labels_mixed_types():
+    # Mix with binary or multiclass and multilabel
+    mix_clf_format = product(
+        EXAMPLES["multilabel-indicator"], EXAMPLES["multiclass"] + EXAMPLES["binary"]
+    )
+
+    for y_multilabel, y_multiclass in mix_clf_format:
+        with pytest.raises(ValueError):
+            unique_labels(y_multiclass, y_multilabel)
+        with pytest.raises(ValueError):
+            unique_labels(y_multilabel, y_multiclass)
+
+    with pytest.raises(ValueError):
+        unique_labels([[1, 2]], [["a", "d"]])
+
+    with pytest.raises(ValueError):
+        unique_labels(["1", 2])
+
+    with pytest.raises(ValueError):
+        unique_labels([["1", 2], [1, 3]])
+
+    with pytest.raises(ValueError):
+        unique_labels([["1", "2"], [2, 3]])
+
+
+def test_is_multilabel():
+    for group, group_examples in EXAMPLES.items():
+        dense_exp = group == "multilabel-indicator"
+
+        for example in group_examples:
+            # Only mark explicitly defined sparse examples as valid sparse
+            # multilabel-indicators
+            sparse_exp = dense_exp and issparse(example)
+
+            if issparse(example) or (
+                hasattr(example, "__array__")
+                and np.asarray(example).ndim == 2
+                and np.asarray(example).dtype.kind in "biuf"
+                and np.asarray(example).shape[1] > 0
+            ):
+                examples_sparse = [
+                    sparse_container(example)
+                    for sparse_container in (
+                        COO_CONTAINERS
+                        + CSC_CONTAINERS
+                        + CSR_CONTAINERS
+                        + DOK_CONTAINERS
+                        + LIL_CONTAINERS
+                    )
+                ]
+                for exmpl_sparse in examples_sparse:
+                    assert sparse_exp == is_multilabel(
+                        exmpl_sparse
+                    ), f"is_multilabel({exmpl_sparse!r}) should be {sparse_exp}"
+
+            # Densify sparse examples before testing
+            if issparse(example):
+                example = example.toarray()
+
+            assert dense_exp == is_multilabel(
+                example
+            ), f"is_multilabel({example!r}) should be {dense_exp}"
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype_name",
+    yield_namespace_device_dtype_combinations(),
+)
+def test_is_multilabel_array_api_compliance(array_namespace, device, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device)
+
+    for group, group_examples in ARRAY_API_EXAMPLES.items():
+        dense_exp = group == "multilabel-indicator"
+        for example in group_examples:
+            if np.asarray(example).dtype.kind == "f":
+                example = np.asarray(example, dtype=dtype_name)
+            else:
+                example = np.asarray(example)
+            example = xp.asarray(example, device=device)
+
+            with config_context(array_api_dispatch=True):
+                assert dense_exp == is_multilabel(
+                    example
+                ), f"is_multilabel({example!r}) should be {dense_exp}"
+
+
+def test_check_classification_targets():
+    for y_type in EXAMPLES.keys():
+        if y_type in ["unknown", "continuous", "continuous-multioutput"]:
+            for example in EXAMPLES[y_type]:
+                msg = "Unknown label type: "
+                with pytest.raises(ValueError, match=msg):
+                    check_classification_targets(example)
+        else:
+            for example in EXAMPLES[y_type]:
+                check_classification_targets(example)
+
+
+# @ignore_warnings
+def test_type_of_target():
+    for group, group_examples in EXAMPLES.items():
+        for example in group_examples:
+            assert (
+                type_of_target(example) == group
+            ), "type_of_target(%r) should be %r, got %r" % (
+                example,
+                group,
+                type_of_target(example),
+            )
+
+    for example in NON_ARRAY_LIKE_EXAMPLES:
+        msg_regex = r"Expected array-like \(array or non-string sequence\).*"
+        with pytest.raises(ValueError, match=msg_regex):
+            type_of_target(example)
+
+    for example in MULTILABEL_SEQUENCES:
+        msg = (
+            "You appear to be using a legacy multi-label data "
+            "representation. Sequence of sequences are no longer supported;"
+            " use a binary array or sparse matrix instead."
+        )
+        with pytest.raises(ValueError, match=msg):
+            type_of_target(example)
+
+
+def test_type_of_target_pandas_sparse():
+    pd = pytest.importorskip("pandas")
+
+    y = pd.arrays.SparseArray([1, np.nan, np.nan, 1, np.nan])
+    msg = "y cannot be class 'SparseSeries' or 'SparseArray'"
+    with pytest.raises(ValueError, match=msg):
+        type_of_target(y)
+
+
+def test_type_of_target_pandas_nullable():
+    """Check that type_of_target works with pandas nullable dtypes."""
+    pd = pytest.importorskip("pandas")
+
+    for dtype in ["Int32", "Float32"]:
+        y_true = pd.Series([1, 0, 2, 3, 4], dtype=dtype)
+        assert type_of_target(y_true) == "multiclass"
+
+        y_true = pd.Series([1, 0, 1, 0], dtype=dtype)
+        assert type_of_target(y_true) == "binary"
+
+    y_true = pd.DataFrame([[1.4, 3.1], [3.1, 1.4]], dtype="Float32")
+    assert type_of_target(y_true) == "continuous-multioutput"
+
+    y_true = pd.DataFrame([[0, 1], [1, 1]], dtype="Int32")
+    assert type_of_target(y_true) == "multilabel-indicator"
+
+    y_true = pd.DataFrame([[1, 2], [3, 1]], dtype="Int32")
+    assert type_of_target(y_true) == "multiclass-multioutput"
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+def test_unique_labels_pandas_nullable(dtype):
+    """Checks that unique_labels work with pandas nullable dtypes.
+
+    Non-regression test for gh-25634.
+    """
+    pd = pytest.importorskip("pandas")
+
+    y_true = pd.Series([1, 0, 0, 1, 0, 1, 1, 0, 1], dtype=dtype)
+    y_predicted = pd.Series([0, 0, 1, 1, 0, 1, 1, 1, 1], dtype="int64")
+
+    labels = unique_labels(y_true, y_predicted)
+    assert_array_equal(labels, [0, 1])
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_class_distribution(csc_container):
+    y = np.array(
+        [
+            [1, 0, 0, 1],
+            [2, 2, 0, 1],
+            [1, 3, 0, 1],
+            [4, 2, 0, 1],
+            [2, 0, 0, 1],
+            [1, 3, 0, 1],
+        ]
+    )
+    # Define the sparse matrix with a mix of implicit and explicit zeros
+    data = np.array([1, 2, 1, 4, 2, 1, 0, 2, 3, 2, 3, 1, 1, 1, 1, 1, 1])
+    indices = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 4, 5])
+    indptr = np.array([0, 6, 11, 11, 17])
+    y_sp = csc_container((data, indices, indptr), shape=(6, 4))
+
+    classes, n_classes, class_prior = class_distribution(y)
+    classes_sp, n_classes_sp, class_prior_sp = class_distribution(y_sp)
+    classes_expected = [[1, 2, 4], [0, 2, 3], [0], [1]]
+    n_classes_expected = [3, 3, 1, 1]
+    class_prior_expected = [[3 / 6, 2 / 6, 1 / 6], [1 / 3, 1 / 3, 1 / 3], [1.0], [1.0]]
+
+    for k in range(y.shape[1]):
+        assert_array_almost_equal(classes[k], classes_expected[k])
+        assert_array_almost_equal(n_classes[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior[k], class_prior_expected[k])
+
+        assert_array_almost_equal(classes_sp[k], classes_expected[k])
+        assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])
+
+    # Test again with explicit sample weights
+    (classes, n_classes, class_prior) = class_distribution(
+        y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+    )
+    (classes_sp, n_classes_sp, class_prior_sp) = class_distribution(
+        y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+    )
+    class_prior_expected = [[4 / 9, 3 / 9, 2 / 9], [2 / 9, 4 / 9, 3 / 9], [1.0], [1.0]]
+
+    for k in range(y.shape[1]):
+        assert_array_almost_equal(classes[k], classes_expected[k])
+        assert_array_almost_equal(n_classes[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior[k], class_prior_expected[k])
+
+        assert_array_almost_equal(classes_sp[k], classes_expected[k])
+        assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])
+
+
+def test_safe_split_with_precomputed_kernel():
+    clf = SVC()
+    clfp = SVC(kernel="precomputed")
+
+    iris = datasets.load_iris()
+    X, y = iris.data, iris.target
+    K = np.dot(X, X.T)
+
+    cv = ShuffleSplit(test_size=0.25, random_state=0)
+    train, test = list(cv.split(X))[0]
+
+    X_train, y_train = _safe_split(clf, X, y, train)
+    K_train, y_train2 = _safe_split(clfp, K, y, train)
+    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
+    assert_array_almost_equal(y_train, y_train2)
+
+    X_test, y_test = _safe_split(clf, X, y, test, train)
+    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
+    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
+    assert_array_almost_equal(y_test, y_test2)
+
+
+def test_ovr_decision_function():
+    # test properties for ovr decision function
+
+    predictions = np.array([[0, 1, 1], [0, 1, 0], [0, 1, 1], [0, 1, 1]])
+
+    confidences = np.array(
+        [[-1e16, 0, -1e16], [1.0, 2.0, -3.0], [-5.0, 2.0, 5.0], [-0.5, 0.2, 0.5]]
+    )
+
+    n_classes = 3
+
+    dec_values = _ovr_decision_function(predictions, confidences, n_classes)
+
+    # check that the decision values are within 0.5 range of the votes
+    votes = np.array([[1, 0, 2], [1, 1, 1], [1, 0, 2], [1, 0, 2]])
+
+    assert_allclose(votes, dec_values, atol=0.5)
+
+    # check that the prediction are what we expect
+    # highest vote or highest confidence if there is a tie.
+    # for the second sample we have a tie (should be won by 1)
+    expected_prediction = np.array([2, 1, 2, 2])
+    assert_array_equal(np.argmax(dec_values, axis=1), expected_prediction)
+
+    # third and fourth sample have the same vote but third sample
+    # has higher confidence, this should reflect on the decision values
+    assert dec_values[2, 2] > dec_values[3, 2]
+
+    # assert subset invariance.
+    dec_values_one = [
+        _ovr_decision_function(
+            np.array([predictions[i]]), np.array([confidences[i]]), n_classes
+        )[0]
+        for i in range(4)
+    ]
+
+    assert_allclose(dec_values, dec_values_one, atol=1e-6)
+
+
+# TODO(1.7): Change to ValueError when byte labels is deprecated.
+@pytest.mark.parametrize("input_type", ["list", "array"])
+def test_labels_in_bytes_format(input_type):
+    # check that we raise an error with bytes encoded labels
+    # non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/16980
+    target = _convert_container([b"a", b"b"], input_type)
+    err_msg = (
+        "Support for labels represented as bytes is deprecated in v1.5 and will"
+        " error in v1.7. Convert the labels to a string or integer format."
+    )
+    with pytest.warns(FutureWarning, match=err_msg):
+        type_of_target(target)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_murmurhash.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_murmurhash.py
@@ -0,0 +1,74 @@
+# Author: Olivier Grisel <olivier.grisel@ensta.org>
+#
+# License: BSD 3 clause
+
+import numpy as np
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+
+from sklearn.utils.murmurhash import murmurhash3_32
+
+
+def test_mmhash3_int():
+    assert murmurhash3_32(3) == 847579505
+    assert murmurhash3_32(3, seed=0) == 847579505
+    assert murmurhash3_32(3, seed=42) == -1823081949
+
+    assert murmurhash3_32(3, positive=False) == 847579505
+    assert murmurhash3_32(3, seed=0, positive=False) == 847579505
+    assert murmurhash3_32(3, seed=42, positive=False) == -1823081949
+
+    assert murmurhash3_32(3, positive=True) == 847579505
+    assert murmurhash3_32(3, seed=0, positive=True) == 847579505
+    assert murmurhash3_32(3, seed=42, positive=True) == 2471885347
+
+
+def test_mmhash3_int_array():
+    rng = np.random.RandomState(42)
+    keys = rng.randint(-5342534, 345345, size=3 * 2 * 1).astype(np.int32)
+    keys = keys.reshape((3, 2, 1))
+
+    for seed in [0, 42]:
+        expected = np.array([murmurhash3_32(int(k), seed) for k in keys.flat])
+        expected = expected.reshape(keys.shape)
+        assert_array_equal(murmurhash3_32(keys, seed), expected)
+
+    for seed in [0, 42]:
+        expected = np.array([murmurhash3_32(k, seed, positive=True) for k in keys.flat])
+        expected = expected.reshape(keys.shape)
+        assert_array_equal(murmurhash3_32(keys, seed, positive=True), expected)
+
+
+def test_mmhash3_bytes():
+    assert murmurhash3_32(b"foo", 0) == -156908512
+    assert murmurhash3_32(b"foo", 42) == -1322301282
+
+    assert murmurhash3_32(b"foo", 0, positive=True) == 4138058784
+    assert murmurhash3_32(b"foo", 42, positive=True) == 2972666014
+
+
+def test_mmhash3_unicode():
+    assert murmurhash3_32("foo", 0) == -156908512
+    assert murmurhash3_32("foo", 42) == -1322301282
+
+    assert murmurhash3_32("foo", 0, positive=True) == 4138058784
+    assert murmurhash3_32("foo", 42, positive=True) == 2972666014
+
+
+def test_no_collision_on_byte_range():
+    previous_hashes = set()
+    for i in range(100):
+        h = murmurhash3_32(" " * i, 0)
+        assert h not in previous_hashes, "Found collision on growing empty string"
+
+
+def test_uniform_distribution():
+    n_bins, n_samples = 10, 100000
+    bins = np.zeros(n_bins, dtype=np.float64)
+
+    for i in range(n_samples):
+        bins[murmurhash3_32(i, positive=True) % n_bins] += 1
+
+    means = bins / n_samples
+    expected = np.full(n_bins, 1.0 / n_bins)
+
+    assert_array_almost_equal(means / expected, np.ones(n_bins), 2)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_optimize.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_optimize.py
@@ -0,0 +1,158 @@
+import numpy as np
+import pytest
+from scipy.optimize import fmin_ncg
+
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils.optimize import _newton_cg
+
+
+def test_newton_cg():
+    # Test that newton_cg gives same result as scipy's fmin_ncg
+
+    rng = np.random.RandomState(0)
+    A = rng.normal(size=(10, 10))
+    x0 = np.ones(10)
+
+    def func(x):
+        Ax = A.dot(x)
+        return 0.5 * (Ax).dot(Ax)
+
+    def grad(x):
+        return A.T.dot(A.dot(x))
+
+    def hess(x, p):
+        return p.dot(A.T.dot(A.dot(x.all())))
+
+    def grad_hess(x):
+        return grad(x), lambda x: A.T.dot(A.dot(x))
+
+    assert_array_almost_equal(
+        _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0],
+        fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess),
+    )
+
+
+@pytest.mark.parametrize("verbose", [0, 1, 2])
+def test_newton_cg_verbosity(capsys, verbose):
+    """Test the std output of verbose newton_cg solver."""
+    A = np.eye(2)
+    b = np.array([1, 2], dtype=float)
+
+    _newton_cg(
+        grad_hess=lambda x: (A @ x - b, lambda z: A @ z),
+        func=lambda x: 0.5 * x @ A @ x - b @ x,
+        grad=lambda x: A @ x - b,
+        x0=np.zeros(A.shape[0]),
+        verbose=verbose,
+    )  # returns array([1., 2])
+    captured = capsys.readouterr()
+
+    if verbose == 0:
+        assert captured.out == ""
+    else:
+        msg = [
+            "Newton-CG iter = 1",
+            "Check Convergence",
+            "max |gradient|",
+            "Solver did converge at loss = ",
+        ]
+        for m in msg:
+            assert m in captured.out
+
+    if verbose >= 2:
+        msg = [
+            "Inner CG solver iteration 1 stopped with",
+            "sum(|residuals|) <= tol",
+            "Line Search",
+            "try line search wolfe1",
+            "wolfe1 line search was successful",
+        ]
+        for m in msg:
+            assert m in captured.out
+
+    if verbose >= 2:
+        # Set up a badly scaled singular Hessian with a completely wrong starting
+        # position. This should trigger 2nd line search check
+        A = np.array([[1.0, 2], [2, 4]]) * 1e30  # collinear columns
+        b = np.array([1.0, 2.0])
+        # Note that scipy.optimize._linesearch LineSearchWarning inherits from
+        # RuntimeWarning, but we do not want to import from non public APIs.
+        with pytest.warns(RuntimeWarning):
+            _newton_cg(
+                grad_hess=lambda x: (A @ x - b, lambda z: A @ z),
+                func=lambda x: 0.5 * x @ A @ x - b @ x,
+                grad=lambda x: A @ x - b,
+                x0=np.array([-2.0, 1]),  # null space of hessian
+                verbose=verbose,
+            )
+        captured = capsys.readouterr()
+        msg = [
+            "wolfe1 line search was not successful",
+            "check loss |improvement| <= eps * |loss_old|:",
+            "check sum(|gradient|) < sum(|gradient_old|):",
+            "last resort: try line search wolfe2",
+        ]
+        for m in msg:
+            assert m in captured.out
+
+        # Set up a badly conditioned Hessian that leads to tiny curvature.
+        # X.T @ X have singular values array([1.00000400e+01, 1.00008192e-11])
+        A = np.array([[1.0, 2], [1, 2 + 1e-15]])
+        b = np.array([-2.0, 1])
+        with pytest.warns(ConvergenceWarning):
+            _newton_cg(
+                grad_hess=lambda x: (A @ x - b, lambda z: A @ z),
+                func=lambda x: 0.5 * x @ A @ x - b @ x,
+                grad=lambda x: A @ x - b,
+                x0=b,
+                verbose=verbose,
+                maxiter=2,
+            )
+        captured = capsys.readouterr()
+        msg = [
+            "tiny_|p| = eps * ||p||^2",
+        ]
+        for m in msg:
+            assert m in captured.out
+
+        # Test for a case with negative Hessian.
+        # We do not trigger "Inner CG solver iteration {i} stopped with negative
+        # curvature", but that is very hard to trigger.
+        A = np.eye(2)
+        b = np.array([-2.0, 1])
+        with pytest.warns(RuntimeWarning):
+            _newton_cg(
+                # Note the wrong sign in the hessian product.
+                grad_hess=lambda x: (A @ x - b, lambda z: -A @ z),
+                func=lambda x: 0.5 * x @ A @ x - b @ x,
+                grad=lambda x: A @ x - b,
+                x0=np.array([1.0, 1.0]),
+                verbose=verbose,
+                maxiter=3,
+            )
+        captured = capsys.readouterr()
+        msg = [
+            "Inner CG solver iteration 0 fell back to steepest descent",
+        ]
+        for m in msg:
+            assert m in captured.out
+
+        A = np.diag([1e-3, 1, 1e3])
+        b = np.array([-2.0, 1, 2.0])
+        with pytest.warns(ConvergenceWarning):
+            _newton_cg(
+                grad_hess=lambda x: (A @ x - b, lambda z: A @ z),
+                func=lambda x: 0.5 * x @ A @ x - b @ x,
+                grad=lambda x: A @ x - b,
+                x0=np.ones_like(b),
+                verbose=verbose,
+                maxiter=2,
+                maxinner=1,
+            )
+        captured = capsys.readouterr()
+        msg = [
+            "Inner CG solver stopped reaching maxiter=1",
+        ]
+        for m in msg:
+            assert m in captured.out
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_parallel.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_parallel.py
@@ -0,0 +1,100 @@
+import time
+
+import joblib
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn import config_context, get_config
+from sklearn.compose import make_column_transformer
+from sklearn.datasets import load_iris
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils.parallel import Parallel, delayed
+
+
+def get_working_memory():
+    return get_config()["working_memory"]
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("backend", ["loky", "threading", "multiprocessing"])
+def test_configuration_passes_through_to_joblib(n_jobs, backend):
+    # Tests that the global global configuration is passed to joblib jobs
+
+    with config_context(working_memory=123):
+        results = Parallel(n_jobs=n_jobs, backend=backend)(
+            delayed(get_working_memory)() for _ in range(2)
+        )
+
+    assert_array_equal(results, [123] * 2)
+
+
+def test_parallel_delayed_warnings():
+    """Informative warnings should be raised when mixing sklearn and joblib API"""
+    # We should issue a warning when one wants to use sklearn.utils.fixes.Parallel
+    # with joblib.delayed. The config will not be propagated to the workers.
+    warn_msg = "`sklearn.utils.parallel.Parallel` needs to be used in conjunction"
+    with pytest.warns(UserWarning, match=warn_msg) as records:
+        Parallel()(joblib.delayed(time.sleep)(0) for _ in range(10))
+    assert len(records) == 10
+
+    # We should issue a warning if one wants to use sklearn.utils.fixes.delayed with
+    # joblib.Parallel
+    warn_msg = (
+        "`sklearn.utils.parallel.delayed` should be used with "
+        "`sklearn.utils.parallel.Parallel` to make it possible to propagate"
+    )
+    with pytest.warns(UserWarning, match=warn_msg) as records:
+        joblib.Parallel()(delayed(time.sleep)(0) for _ in range(10))
+    assert len(records) == 10
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_dispatch_config_parallel(n_jobs):
+    """Check that we properly dispatch the configuration in parallel processing.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/25239
+    """
+    pd = pytest.importorskip("pandas")
+    iris = load_iris(as_frame=True)
+
+    class TransformerRequiredDataFrame(StandardScaler):
+        def fit(self, X, y=None):
+            assert isinstance(X, pd.DataFrame), "X should be a DataFrame"
+            return super().fit(X, y)
+
+        def transform(self, X, y=None):
+            assert isinstance(X, pd.DataFrame), "X should be a DataFrame"
+            return super().transform(X, y)
+
+    dropper = make_column_transformer(
+        ("drop", [0]),
+        remainder="passthrough",
+        n_jobs=n_jobs,
+    )
+    param_grid = {"randomforestclassifier__max_depth": [1, 2, 3]}
+    search_cv = GridSearchCV(
+        make_pipeline(
+            dropper,
+            TransformerRequiredDataFrame(),
+            RandomForestClassifier(n_estimators=5, n_jobs=n_jobs),
+        ),
+        param_grid,
+        cv=5,
+        n_jobs=n_jobs,
+        error_score="raise",  # this search should not fail
+    )
+
+    # make sure that `fit` would fail in case we don't request dataframe
+    with pytest.raises(AssertionError, match="X should be a DataFrame"):
+        search_cv.fit(iris.data, iris.target)
+
+    with config_context(transform_output="pandas"):
+        # we expect each intermediate steps to output a DataFrame
+        search_cv.fit(iris.data, iris.target)
+
+    assert not np.isnan(search_cv.cv_results_["mean_test_score"]).any()
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_param_validation.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_param_validation.py
@@ -0,0 +1,785 @@
+from numbers import Integral, Real
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn._config import config_context, get_config
+from sklearn.base import BaseEstimator, _fit_context
+from sklearn.model_selection import LeaveOneOut
+from sklearn.utils import deprecated
+from sklearn.utils._param_validation import (
+    HasMethods,
+    Hidden,
+    Interval,
+    InvalidParameterError,
+    MissingValues,
+    Options,
+    RealNotInt,
+    StrOptions,
+    _ArrayLikes,
+    _Booleans,
+    _Callables,
+    _CVObjects,
+    _InstancesOf,
+    _IterablesNotString,
+    _NanConstraint,
+    _NoneConstraint,
+    _PandasNAConstraint,
+    _RandomStates,
+    _SparseMatrices,
+    _VerboseHelper,
+    generate_invalid_param_val,
+    generate_valid_param,
+    make_constraint,
+    validate_params,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+
+# Some helpers for the tests
+@validate_params(
+    {"a": [Real], "b": [Real], "c": [Real], "d": [Real]},
+    prefer_skip_nested_validation=True,
+)
+def _func(a, b=0, *args, c, d=0, **kwargs):
+    """A function to test the validation of functions."""
+
+
+class _Class:
+    """A class to test the _InstancesOf constraint and the validation of methods."""
+
+    @validate_params({"a": [Real]}, prefer_skip_nested_validation=True)
+    def _method(self, a):
+        """A validated method"""
+
+    @deprecated()
+    @validate_params({"a": [Real]}, prefer_skip_nested_validation=True)
+    def _deprecated_method(self, a):
+        """A deprecated validated method"""
+
+
+class _Estimator(BaseEstimator):
+    """An estimator to test the validation of estimator parameters."""
+
+    _parameter_constraints: dict = {"a": [Real]}
+
+    def __init__(self, a):
+        self.a = a
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X=None, y=None):
+        pass
+
+
+@pytest.mark.parametrize("interval_type", [Integral, Real])
+def test_interval_range(interval_type):
+    """Check the range of values depending on closed."""
+    interval = Interval(interval_type, -2, 2, closed="left")
+    assert -2 in interval
+    assert 2 not in interval
+
+    interval = Interval(interval_type, -2, 2, closed="right")
+    assert -2 not in interval
+    assert 2 in interval
+
+    interval = Interval(interval_type, -2, 2, closed="both")
+    assert -2 in interval
+    assert 2 in interval
+
+    interval = Interval(interval_type, -2, 2, closed="neither")
+    assert -2 not in interval
+    assert 2 not in interval
+
+
+@pytest.mark.parametrize("interval_type", [Integral, Real])
+def test_interval_large_integers(interval_type):
+    """Check that Interval constraint work with large integers.
+
+    non-regression test for #26648.
+    """
+    interval = Interval(interval_type, 0, 2, closed="neither")
+    assert 2**65 not in interval
+    assert 2**128 not in interval
+    assert float(2**65) not in interval
+    assert float(2**128) not in interval
+
+    interval = Interval(interval_type, 0, 2**128, closed="neither")
+    assert 2**65 in interval
+    assert 2**128 not in interval
+    assert float(2**65) in interval
+    assert float(2**128) not in interval
+
+    assert 2**1024 not in interval
+
+
+def test_interval_inf_in_bounds():
+    """Check that inf is included iff a bound is closed and set to None.
+
+    Only valid for real intervals.
+    """
+    interval = Interval(Real, 0, None, closed="right")
+    assert np.inf in interval
+
+    interval = Interval(Real, None, 0, closed="left")
+    assert -np.inf in interval
+
+    interval = Interval(Real, None, None, closed="neither")
+    assert np.inf not in interval
+    assert -np.inf not in interval
+
+
+@pytest.mark.parametrize(
+    "interval",
+    [Interval(Real, 0, 1, closed="left"), Interval(Real, None, None, closed="both")],
+)
+def test_nan_not_in_interval(interval):
+    """Check that np.nan is not in any interval."""
+    assert np.nan not in interval
+
+
+@pytest.mark.parametrize(
+    "params, error, match",
+    [
+        (
+            {"type": Integral, "left": 1.0, "right": 2, "closed": "both"},
+            TypeError,
+            r"Expecting left to be an int for an interval over the integers",
+        ),
+        (
+            {"type": Integral, "left": 1, "right": 2.0, "closed": "neither"},
+            TypeError,
+            "Expecting right to be an int for an interval over the integers",
+        ),
+        (
+            {"type": Integral, "left": None, "right": 0, "closed": "left"},
+            ValueError,
+            r"left can't be None when closed == left",
+        ),
+        (
+            {"type": Integral, "left": 0, "right": None, "closed": "right"},
+            ValueError,
+            r"right can't be None when closed == right",
+        ),
+        (
+            {"type": Integral, "left": 1, "right": -1, "closed": "both"},
+            ValueError,
+            r"right can't be less than left",
+        ),
+    ],
+)
+def test_interval_errors(params, error, match):
+    """Check that informative errors are raised for invalid combination of parameters"""
+    with pytest.raises(error, match=match):
+        Interval(**params)
+
+
+def test_stroptions():
+    """Sanity check for the StrOptions constraint"""
+    options = StrOptions({"a", "b", "c"}, deprecated={"c"})
+    assert options.is_satisfied_by("a")
+    assert options.is_satisfied_by("c")
+    assert not options.is_satisfied_by("d")
+
+    assert "'c' (deprecated)" in str(options)
+
+
+def test_options():
+    """Sanity check for the Options constraint"""
+    options = Options(Real, {-0.5, 0.5, np.inf}, deprecated={-0.5})
+    assert options.is_satisfied_by(-0.5)
+    assert options.is_satisfied_by(np.inf)
+    assert not options.is_satisfied_by(1.23)
+
+    assert "-0.5 (deprecated)" in str(options)
+
+
+@pytest.mark.parametrize(
+    "type, expected_type_name",
+    [
+        (int, "int"),
+        (Integral, "int"),
+        (Real, "float"),
+        (np.ndarray, "numpy.ndarray"),
+    ],
+)
+def test_instances_of_type_human_readable(type, expected_type_name):
+    """Check the string representation of the _InstancesOf constraint."""
+    constraint = _InstancesOf(type)
+    assert str(constraint) == f"an instance of '{expected_type_name}'"
+
+
+def test_hasmethods():
+    """Check the HasMethods constraint."""
+    constraint = HasMethods(["a", "b"])
+
+    class _Good:
+        def a(self):
+            pass  # pragma: no cover
+
+        def b(self):
+            pass  # pragma: no cover
+
+    class _Bad:
+        def a(self):
+            pass  # pragma: no cover
+
+    assert constraint.is_satisfied_by(_Good())
+    assert not constraint.is_satisfied_by(_Bad())
+    assert str(constraint) == "an object implementing 'a' and 'b'"
+
+
+@pytest.mark.parametrize(
+    "constraint",
+    [
+        Interval(Real, None, 0, closed="left"),
+        Interval(Real, 0, None, closed="left"),
+        Interval(Real, None, None, closed="neither"),
+        StrOptions({"a", "b", "c"}),
+        MissingValues(),
+        MissingValues(numeric_only=True),
+        _VerboseHelper(),
+        HasMethods("fit"),
+        _IterablesNotString(),
+        _CVObjects(),
+    ],
+)
+def test_generate_invalid_param_val(constraint):
+    """Check that the value generated does not satisfy the constraint"""
+    bad_value = generate_invalid_param_val(constraint)
+    assert not constraint.is_satisfied_by(bad_value)
+
+
+@pytest.mark.parametrize(
+    "integer_interval, real_interval",
+    [
+        (
+            Interval(Integral, None, 3, closed="right"),
+            Interval(RealNotInt, -5, 5, closed="both"),
+        ),
+        (
+            Interval(Integral, None, 3, closed="right"),
+            Interval(RealNotInt, -5, 5, closed="neither"),
+        ),
+        (
+            Interval(Integral, None, 3, closed="right"),
+            Interval(RealNotInt, 4, 5, closed="both"),
+        ),
+        (
+            Interval(Integral, None, 3, closed="right"),
+            Interval(RealNotInt, 5, None, closed="left"),
+        ),
+        (
+            Interval(Integral, None, 3, closed="right"),
+            Interval(RealNotInt, 4, None, closed="neither"),
+        ),
+        (
+            Interval(Integral, 3, None, closed="left"),
+            Interval(RealNotInt, -5, 5, closed="both"),
+        ),
+        (
+            Interval(Integral, 3, None, closed="left"),
+            Interval(RealNotInt, -5, 5, closed="neither"),
+        ),
+        (
+            Interval(Integral, 3, None, closed="left"),
+            Interval(RealNotInt, 1, 2, closed="both"),
+        ),
+        (
+            Interval(Integral, 3, None, closed="left"),
+            Interval(RealNotInt, None, -5, closed="left"),
+        ),
+        (
+            Interval(Integral, 3, None, closed="left"),
+            Interval(RealNotInt, None, -4, closed="neither"),
+        ),
+        (
+            Interval(Integral, -5, 5, closed="both"),
+            Interval(RealNotInt, None, 1, closed="right"),
+        ),
+        (
+            Interval(Integral, -5, 5, closed="both"),
+            Interval(RealNotInt, 1, None, closed="left"),
+        ),
+        (
+            Interval(Integral, -5, 5, closed="both"),
+            Interval(RealNotInt, -10, -4, closed="neither"),
+        ),
+        (
+            Interval(Integral, -5, 5, closed="both"),
+            Interval(RealNotInt, -10, -4, closed="right"),
+        ),
+        (
+            Interval(Integral, -5, 5, closed="neither"),
+            Interval(RealNotInt, 6, 10, closed="neither"),
+        ),
+        (
+            Interval(Integral, -5, 5, closed="neither"),
+            Interval(RealNotInt, 6, 10, closed="left"),
+        ),
+        (
+            Interval(Integral, 2, None, closed="left"),
+            Interval(RealNotInt, 0, 1, closed="both"),
+        ),
+        (
+            Interval(Integral, 1, None, closed="left"),
+            Interval(RealNotInt, 0, 1, closed="both"),
+        ),
+    ],
+)
+def test_generate_invalid_param_val_2_intervals(integer_interval, real_interval):
+    """Check that the value generated for an interval constraint does not satisfy any of
+    the interval constraints.
+    """
+    bad_value = generate_invalid_param_val(constraint=real_interval)
+    assert not real_interval.is_satisfied_by(bad_value)
+    assert not integer_interval.is_satisfied_by(bad_value)
+
+    bad_value = generate_invalid_param_val(constraint=integer_interval)
+    assert not real_interval.is_satisfied_by(bad_value)
+    assert not integer_interval.is_satisfied_by(bad_value)
+
+
+@pytest.mark.parametrize(
+    "constraint",
+    [
+        _ArrayLikes(),
+        _InstancesOf(list),
+        _Callables(),
+        _NoneConstraint(),
+        _RandomStates(),
+        _SparseMatrices(),
+        _Booleans(),
+        Interval(Integral, None, None, closed="neither"),
+    ],
+)
+def test_generate_invalid_param_val_all_valid(constraint):
+    """Check that the function raises NotImplementedError when there's no invalid value
+    for the constraint.
+    """
+    with pytest.raises(NotImplementedError):
+        generate_invalid_param_val(constraint)
+
+
+@pytest.mark.parametrize(
+    "constraint",
+    [
+        _ArrayLikes(),
+        _Callables(),
+        _InstancesOf(list),
+        _NoneConstraint(),
+        _RandomStates(),
+        _SparseMatrices(),
+        _Booleans(),
+        _VerboseHelper(),
+        MissingValues(),
+        MissingValues(numeric_only=True),
+        StrOptions({"a", "b", "c"}),
+        Options(Integral, {1, 2, 3}),
+        Interval(Integral, None, None, closed="neither"),
+        Interval(Integral, 0, 10, closed="neither"),
+        Interval(Integral, 0, None, closed="neither"),
+        Interval(Integral, None, 0, closed="neither"),
+        Interval(Real, 0, 1, closed="neither"),
+        Interval(Real, 0, None, closed="both"),
+        Interval(Real, None, 0, closed="right"),
+        HasMethods("fit"),
+        _IterablesNotString(),
+        _CVObjects(),
+    ],
+)
+def test_generate_valid_param(constraint):
+    """Check that the value generated does satisfy the constraint."""
+    value = generate_valid_param(constraint)
+    assert constraint.is_satisfied_by(value)
+
+
+@pytest.mark.parametrize(
+    "constraint_declaration, value",
+    [
+        (Interval(Real, 0, 1, closed="both"), 0.42),
+        (Interval(Integral, 0, None, closed="neither"), 42),
+        (StrOptions({"a", "b", "c"}), "b"),
+        (Options(type, {np.float32, np.float64}), np.float64),
+        (callable, lambda x: x + 1),
+        (None, None),
+        ("array-like", [[1, 2], [3, 4]]),
+        ("array-like", np.array([[1, 2], [3, 4]])),
+        ("sparse matrix", csr_matrix([[1, 2], [3, 4]])),
+        *[
+            ("sparse matrix", container([[1, 2], [3, 4]]))
+            for container in CSR_CONTAINERS
+        ],
+        ("random_state", 0),
+        ("random_state", np.random.RandomState(0)),
+        ("random_state", None),
+        (_Class, _Class()),
+        (int, 1),
+        (Real, 0.5),
+        ("boolean", False),
+        ("verbose", 1),
+        ("nan", np.nan),
+        (MissingValues(), -1),
+        (MissingValues(), -1.0),
+        (MissingValues(), 2**1028),
+        (MissingValues(), None),
+        (MissingValues(), float("nan")),
+        (MissingValues(), np.nan),
+        (MissingValues(), "missing"),
+        (HasMethods("fit"), _Estimator(a=0)),
+        ("cv_object", 5),
+    ],
+)
+def test_is_satisfied_by(constraint_declaration, value):
+    """Sanity check for the is_satisfied_by method"""
+    constraint = make_constraint(constraint_declaration)
+    assert constraint.is_satisfied_by(value)
+
+
+@pytest.mark.parametrize(
+    "constraint_declaration, expected_constraint_class",
+    [
+        (Interval(Real, 0, 1, closed="both"), Interval),
+        (StrOptions({"option1", "option2"}), StrOptions),
+        (Options(Real, {0.42, 1.23}), Options),
+        ("array-like", _ArrayLikes),
+        ("sparse matrix", _SparseMatrices),
+        ("random_state", _RandomStates),
+        (None, _NoneConstraint),
+        (callable, _Callables),
+        (int, _InstancesOf),
+        ("boolean", _Booleans),
+        ("verbose", _VerboseHelper),
+        (MissingValues(numeric_only=True), MissingValues),
+        (HasMethods("fit"), HasMethods),
+        ("cv_object", _CVObjects),
+        ("nan", _NanConstraint),
+    ],
+)
+def test_make_constraint(constraint_declaration, expected_constraint_class):
+    """Check that make_constraint dispatches to the appropriate constraint class"""
+    constraint = make_constraint(constraint_declaration)
+    assert constraint.__class__ is expected_constraint_class
+
+
+def test_make_constraint_unknown():
+    """Check that an informative error is raised when an unknown constraint is passed"""
+    with pytest.raises(ValueError, match="Unknown constraint"):
+        make_constraint("not a valid constraint")
+
+
+def test_validate_params():
+    """Check that validate_params works no matter how the arguments are passed"""
+    with pytest.raises(
+        InvalidParameterError, match="The 'a' parameter of _func must be"
+    ):
+        _func("wrong", c=1)
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'b' parameter of _func must be"
+    ):
+        _func(*[1, "wrong"], c=1)
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'c' parameter of _func must be"
+    ):
+        _func(1, **{"c": "wrong"})
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'd' parameter of _func must be"
+    ):
+        _func(1, c=1, d="wrong")
+
+    # check in the presence of extra positional and keyword args
+    with pytest.raises(
+        InvalidParameterError, match="The 'b' parameter of _func must be"
+    ):
+        _func(0, *["wrong", 2, 3], c=4, **{"e": 5})
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'c' parameter of _func must be"
+    ):
+        _func(0, *[1, 2, 3], c="four", **{"e": 5})
+
+
+def test_validate_params_missing_params():
+    """Check that no error is raised when there are parameters without
+    constraints
+    """
+
+    @validate_params({"a": [int]}, prefer_skip_nested_validation=True)
+    def func(a, b):
+        pass
+
+    func(1, 2)
+
+
+def test_decorate_validated_function():
+    """Check that validate_params functions can be decorated"""
+    decorated_function = deprecated()(_func)
+
+    with pytest.warns(FutureWarning, match="Function _func is deprecated"):
+        decorated_function(1, 2, c=3)
+
+    # outer decorator does not interfere with validation
+    with pytest.warns(FutureWarning, match="Function _func is deprecated"):
+        with pytest.raises(
+            InvalidParameterError, match=r"The 'c' parameter of _func must be"
+        ):
+            decorated_function(1, 2, c="wrong")
+
+
+def test_validate_params_method():
+    """Check that validate_params works with methods"""
+    with pytest.raises(
+        InvalidParameterError, match="The 'a' parameter of _Class._method must be"
+    ):
+        _Class()._method("wrong")
+
+    # validated method can be decorated
+    with pytest.warns(FutureWarning, match="Function _deprecated_method is deprecated"):
+        with pytest.raises(
+            InvalidParameterError,
+            match="The 'a' parameter of _Class._deprecated_method must be",
+        ):
+            _Class()._deprecated_method("wrong")
+
+
+def test_validate_params_estimator():
+    """Check that validate_params works with Estimator instances"""
+    # no validation in init
+    est = _Estimator("wrong")
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'a' parameter of _Estimator must be"
+    ):
+        est.fit()
+
+
+def test_stroptions_deprecated_subset():
+    """Check that the deprecated parameter must be a subset of options."""
+    with pytest.raises(ValueError, match="deprecated options must be a subset"):
+        StrOptions({"a", "b", "c"}, deprecated={"a", "d"})
+
+
+def test_hidden_constraint():
+    """Check that internal constraints are not exposed in the error message."""
+
+    @validate_params(
+        {"param": [Hidden(list), dict]}, prefer_skip_nested_validation=True
+    )
+    def f(param):
+        pass
+
+    # list and dict are valid params
+    f({"a": 1, "b": 2, "c": 3})
+    f([1, 2, 3])
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'param' parameter"
+    ) as exc_info:
+        f(param="bad")
+
+    # the list option is not exposed in the error message
+    err_msg = str(exc_info.value)
+    assert "an instance of 'dict'" in err_msg
+    assert "an instance of 'list'" not in err_msg
+
+
+def test_hidden_stroptions():
+    """Check that we can have 2 StrOptions constraints, one being hidden."""
+
+    @validate_params(
+        {"param": [StrOptions({"auto"}), Hidden(StrOptions({"warn"}))]},
+        prefer_skip_nested_validation=True,
+    )
+    def f(param):
+        pass
+
+    # "auto" and "warn" are valid params
+    f("auto")
+    f("warn")
+
+    with pytest.raises(
+        InvalidParameterError, match="The 'param' parameter"
+    ) as exc_info:
+        f(param="bad")
+
+    # the "warn" option is not exposed in the error message
+    err_msg = str(exc_info.value)
+    assert "auto" in err_msg
+    assert "warn" not in err_msg
+
+
+def test_validate_params_set_param_constraints_attribute():
+    """Check that the validate_params decorator properly sets the parameter constraints
+    as attribute of the decorated function/method.
+    """
+    assert hasattr(_func, "_skl_parameter_constraints")
+    assert hasattr(_Class()._method, "_skl_parameter_constraints")
+
+
+def test_boolean_constraint_deprecated_int():
+    """Check that validate_params raise a deprecation message but still passes
+    validation when using an int for a parameter accepting a boolean.
+    """
+
+    @validate_params({"param": ["boolean"]}, prefer_skip_nested_validation=True)
+    def f(param):
+        pass
+
+    # True/False and np.bool_(True/False) are valid params
+    f(True)
+    f(np.bool_(False))
+
+
+def test_no_validation():
+    """Check that validation can be skipped for a parameter."""
+
+    @validate_params(
+        {"param1": [int, None], "param2": "no_validation"},
+        prefer_skip_nested_validation=True,
+    )
+    def f(param1=None, param2=None):
+        pass
+
+    # param1 is validated
+    with pytest.raises(InvalidParameterError, match="The 'param1' parameter"):
+        f(param1="wrong")
+
+    # param2 is not validated: any type is valid.
+    class SomeType:
+        pass
+
+    f(param2=SomeType)
+    f(param2=SomeType())
+
+
+def test_pandas_na_constraint_with_pd_na():
+    """Add a specific test for checking support for `pandas.NA`."""
+    pd = pytest.importorskip("pandas")
+
+    na_constraint = _PandasNAConstraint()
+    assert na_constraint.is_satisfied_by(pd.NA)
+    assert not na_constraint.is_satisfied_by(np.array([1, 2, 3]))
+
+
+def test_iterable_not_string():
+    """Check that a string does not satisfy the _IterableNotString constraint."""
+    constraint = _IterablesNotString()
+    assert constraint.is_satisfied_by([1, 2, 3])
+    assert constraint.is_satisfied_by(range(10))
+    assert not constraint.is_satisfied_by("some string")
+
+
+def test_cv_objects():
+    """Check that the _CVObjects constraint accepts all current ways
+    to pass cv objects."""
+    constraint = _CVObjects()
+    assert constraint.is_satisfied_by(5)
+    assert constraint.is_satisfied_by(LeaveOneOut())
+    assert constraint.is_satisfied_by([([1, 2], [3, 4]), ([3, 4], [1, 2])])
+    assert constraint.is_satisfied_by(None)
+    assert not constraint.is_satisfied_by("not a CV object")
+
+
+def test_third_party_estimator():
+    """Check that the validation from a scikit-learn estimator inherited by a third
+    party estimator does not impose a match between the dict of constraints and the
+    parameters of the estimator.
+    """
+
+    class ThirdPartyEstimator(_Estimator):
+        def __init__(self, b):
+            self.b = b
+            super().__init__(a=0)
+
+        def fit(self, X=None, y=None):
+            super().fit(X, y)
+
+    # does not raise, even though "b" is not in the constraints dict and "a" is not
+    # a parameter of the estimator.
+    ThirdPartyEstimator(b=0).fit()
+
+
+def test_interval_real_not_int():
+    """Check for the type RealNotInt in the Interval constraint."""
+    constraint = Interval(RealNotInt, 0, 1, closed="both")
+    assert constraint.is_satisfied_by(1.0)
+    assert not constraint.is_satisfied_by(1)
+
+
+def test_real_not_int():
+    """Check for the RealNotInt type."""
+    assert isinstance(1.0, RealNotInt)
+    assert not isinstance(1, RealNotInt)
+    assert isinstance(np.float64(1), RealNotInt)
+    assert not isinstance(np.int64(1), RealNotInt)
+
+
+def test_skip_param_validation():
+    """Check that param validation can be skipped using config_context."""
+
+    @validate_params({"a": [int]}, prefer_skip_nested_validation=True)
+    def f(a):
+        pass
+
+    with pytest.raises(InvalidParameterError, match="The 'a' parameter"):
+        f(a="1")
+
+    # does not raise
+    with config_context(skip_parameter_validation=True):
+        f(a="1")
+
+
+@pytest.mark.parametrize("prefer_skip_nested_validation", [True, False])
+def test_skip_nested_validation(prefer_skip_nested_validation):
+    """Check that nested validation can be skipped."""
+
+    @validate_params({"a": [int]}, prefer_skip_nested_validation=True)
+    def f(a):
+        pass
+
+    @validate_params(
+        {"b": [int]},
+        prefer_skip_nested_validation=prefer_skip_nested_validation,
+    )
+    def g(b):
+        # calls f with a bad parameter type
+        return f(a="invalid_param_value")
+
+    # Validation for g is never skipped.
+    with pytest.raises(InvalidParameterError, match="The 'b' parameter"):
+        g(b="invalid_param_value")
+
+    if prefer_skip_nested_validation:
+        g(b=1)  # does not raise because inner f is not validated
+    else:
+        with pytest.raises(InvalidParameterError, match="The 'a' parameter"):
+            g(b=1)
+
+
+@pytest.mark.parametrize(
+    "skip_parameter_validation, prefer_skip_nested_validation, expected_skipped",
+    [
+        (True, True, True),
+        (True, False, True),
+        (False, True, True),
+        (False, False, False),
+    ],
+)
+def test_skip_nested_validation_and_config_context(
+    skip_parameter_validation, prefer_skip_nested_validation, expected_skipped
+):
+    """Check interaction between global skip and local skip."""
+
+    @validate_params(
+        {"a": [int]}, prefer_skip_nested_validation=prefer_skip_nested_validation
+    )
+    def g(a):
+        return get_config()["skip_parameter_validation"]
+
+    with config_context(skip_parameter_validation=skip_parameter_validation):
+        actual_skipped = g(1)
+
+    assert actual_skipped == expected_skipped
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_plotting.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_plotting.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._plotting import _interval_max_min_ratio, _validate_score_name
+
+
+def metric():
+    pass  # pragma: no cover
+
+
+def neg_metric():
+    pass  # pragma: no cover
+
+
+@pytest.mark.parametrize(
+    "score_name, scoring, negate_score, expected_score_name",
+    [
+        ("accuracy", None, False, "accuracy"),  # do not transform the name
+        (None, "accuracy", False, "Accuracy"),  # capitalize the name
+        (None, "accuracy", True, "Negative accuracy"),  # add "Negative"
+        (None, "neg_mean_absolute_error", False, "Negative mean absolute error"),
+        (None, "neg_mean_absolute_error", True, "Mean absolute error"),  # remove "neg_"
+        ("MAE", "neg_mean_absolute_error", True, "MAE"),  # keep score_name
+        (None, None, False, "Score"),  # default name
+        (None, None, True, "Negative score"),  # default name but negated
+        ("Some metric", metric, False, "Some metric"),  # do not transform the name
+        ("Some metric", metric, True, "Some metric"),  # do not transform the name
+        (None, metric, False, "Metric"),  # default name
+        (None, metric, True, "Negative metric"),  # default name but negated
+        ("Some metric", neg_metric, False, "Some metric"),  # do not transform the name
+        ("Some metric", neg_metric, True, "Some metric"),  # do not transform the name
+        (None, neg_metric, False, "Negative metric"),  # default name
+        (None, neg_metric, True, "Metric"),  # default name but negated
+    ],
+)
+def test_validate_score_name(score_name, scoring, negate_score, expected_score_name):
+    """Check that we return the right score name."""
+    assert (
+        _validate_score_name(score_name, scoring, negate_score) == expected_score_name
+    )
+
+
+# In the following test, we check the value of the max to min ratio
+# for parameter value intervals to check that using a decision threshold
+# of 5. is a good heuristic to decide between linear and log scales on
+# common ranges of parameter values.
+@pytest.mark.parametrize(
+    "data, lower_bound, upper_bound",
+    [
+        # Such a range could be clearly displayed with either log scale or linear
+        # scale.
+        (np.geomspace(0.1, 1, 5), 5, 6),
+        # Checking that the ratio is still positive on a negative log scale.
+        (-np.geomspace(0.1, 1, 10), 7, 8),
+        # Evenly spaced parameter values lead to a ratio of 1.
+        (np.linspace(0, 1, 5), 0.9, 1.1),
+        # This is not exactly spaced on a log scale but we will benefit from treating
+        # it as such for visualization.
+        ([1, 2, 5, 10, 20, 50], 20, 40),
+    ],
+)
+def test_inverval_max_min_ratio(data, lower_bound, upper_bound):
+    assert lower_bound < _interval_max_min_ratio(data) < upper_bound
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_pprint.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_pprint.py
@@ -0,0 +1,680 @@
+import re
+from pprint import PrettyPrinter
+
+import numpy as np
+
+from sklearn.utils._pprint import _EstimatorPrettyPrinter
+from sklearn.linear_model import LogisticRegressionCV
+from sklearn.pipeline import make_pipeline
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_selection import SelectKBest, chi2
+from sklearn import config_context
+
+
+# Ignore flake8 (lots of line too long issues)
+# ruff: noqa
+
+
+# Constructors excerpted to test pprinting
+class LogisticRegression(BaseEstimator):
+    def __init__(
+        self,
+        penalty="l2",
+        dual=False,
+        tol=1e-4,
+        C=1.0,
+        fit_intercept=True,
+        intercept_scaling=1,
+        class_weight=None,
+        random_state=None,
+        solver="warn",
+        max_iter=100,
+        multi_class="warn",
+        verbose=0,
+        warm_start=False,
+        n_jobs=None,
+        l1_ratio=None,
+    ):
+        self.penalty = penalty
+        self.dual = dual
+        self.tol = tol
+        self.C = C
+        self.fit_intercept = fit_intercept
+        self.intercept_scaling = intercept_scaling
+        self.class_weight = class_weight
+        self.random_state = random_state
+        self.solver = solver
+        self.max_iter = max_iter
+        self.multi_class = multi_class
+        self.verbose = verbose
+        self.warm_start = warm_start
+        self.n_jobs = n_jobs
+        self.l1_ratio = l1_ratio
+
+    def fit(self, X, y):
+        return self
+
+
+class StandardScaler(TransformerMixin, BaseEstimator):
+    def __init__(self, copy=True, with_mean=True, with_std=True):
+        self.with_mean = with_mean
+        self.with_std = with_std
+        self.copy = copy
+
+    def transform(self, X, copy=None):
+        return self
+
+
+class RFE(BaseEstimator):
+    def __init__(self, estimator, n_features_to_select=None, step=1, verbose=0):
+        self.estimator = estimator
+        self.n_features_to_select = n_features_to_select
+        self.step = step
+        self.verbose = verbose
+
+
+class GridSearchCV(BaseEstimator):
+    def __init__(
+        self,
+        estimator,
+        param_grid,
+        scoring=None,
+        n_jobs=None,
+        iid="warn",
+        refit=True,
+        cv="warn",
+        verbose=0,
+        pre_dispatch="2*n_jobs",
+        error_score="raise-deprecating",
+        return_train_score=False,
+    ):
+        self.estimator = estimator
+        self.param_grid = param_grid
+        self.scoring = scoring
+        self.n_jobs = n_jobs
+        self.iid = iid
+        self.refit = refit
+        self.cv = cv
+        self.verbose = verbose
+        self.pre_dispatch = pre_dispatch
+        self.error_score = error_score
+        self.return_train_score = return_train_score
+
+
+class CountVectorizer(BaseEstimator):
+    def __init__(
+        self,
+        input="content",
+        encoding="utf-8",
+        decode_error="strict",
+        strip_accents=None,
+        lowercase=True,
+        preprocessor=None,
+        tokenizer=None,
+        stop_words=None,
+        token_pattern=r"(?u)\b\w\w+\b",
+        ngram_range=(1, 1),
+        analyzer="word",
+        max_df=1.0,
+        min_df=1,
+        max_features=None,
+        vocabulary=None,
+        binary=False,
+        dtype=np.int64,
+    ):
+        self.input = input
+        self.encoding = encoding
+        self.decode_error = decode_error
+        self.strip_accents = strip_accents
+        self.preprocessor = preprocessor
+        self.tokenizer = tokenizer
+        self.analyzer = analyzer
+        self.lowercase = lowercase
+        self.token_pattern = token_pattern
+        self.stop_words = stop_words
+        self.max_df = max_df
+        self.min_df = min_df
+        self.max_features = max_features
+        self.ngram_range = ngram_range
+        self.vocabulary = vocabulary
+        self.binary = binary
+        self.dtype = dtype
+
+
+class Pipeline(BaseEstimator):
+    def __init__(self, steps, memory=None):
+        self.steps = steps
+        self.memory = memory
+
+
+class SVC(BaseEstimator):
+    def __init__(
+        self,
+        C=1.0,
+        kernel="rbf",
+        degree=3,
+        gamma="auto_deprecated",
+        coef0=0.0,
+        shrinking=True,
+        probability=False,
+        tol=1e-3,
+        cache_size=200,
+        class_weight=None,
+        verbose=False,
+        max_iter=-1,
+        decision_function_shape="ovr",
+        random_state=None,
+    ):
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.tol = tol
+        self.C = C
+        self.shrinking = shrinking
+        self.probability = probability
+        self.cache_size = cache_size
+        self.class_weight = class_weight
+        self.verbose = verbose
+        self.max_iter = max_iter
+        self.decision_function_shape = decision_function_shape
+        self.random_state = random_state
+
+
+class PCA(BaseEstimator):
+    def __init__(
+        self,
+        n_components=None,
+        copy=True,
+        whiten=False,
+        svd_solver="auto",
+        tol=0.0,
+        iterated_power="auto",
+        random_state=None,
+    ):
+        self.n_components = n_components
+        self.copy = copy
+        self.whiten = whiten
+        self.svd_solver = svd_solver
+        self.tol = tol
+        self.iterated_power = iterated_power
+        self.random_state = random_state
+
+
+class NMF(BaseEstimator):
+    def __init__(
+        self,
+        n_components=None,
+        init=None,
+        solver="cd",
+        beta_loss="frobenius",
+        tol=1e-4,
+        max_iter=200,
+        random_state=None,
+        alpha=0.0,
+        l1_ratio=0.0,
+        verbose=0,
+        shuffle=False,
+    ):
+        self.n_components = n_components
+        self.init = init
+        self.solver = solver
+        self.beta_loss = beta_loss
+        self.tol = tol
+        self.max_iter = max_iter
+        self.random_state = random_state
+        self.alpha = alpha
+        self.l1_ratio = l1_ratio
+        self.verbose = verbose
+        self.shuffle = shuffle
+
+
+class SimpleImputer(BaseEstimator):
+    def __init__(
+        self,
+        missing_values=np.nan,
+        strategy="mean",
+        fill_value=None,
+        verbose=0,
+        copy=True,
+    ):
+        self.missing_values = missing_values
+        self.strategy = strategy
+        self.fill_value = fill_value
+        self.verbose = verbose
+        self.copy = copy
+
+
+def test_basic(print_changed_only_false):
+    # Basic pprint test
+    lr = LogisticRegression()
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_iter=100,
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert lr.__repr__() == expected
+
+
+def test_changed_only():
+    # Make sure the changed_only param is correctly used when True (default)
+    lr = LogisticRegression(C=99)
+    expected = """LogisticRegression(C=99)"""
+    assert lr.__repr__() == expected
+
+    # Check with a repr that doesn't fit on a single line
+    lr = LogisticRegression(
+        C=99, class_weight=0.4, fit_intercept=False, tol=1234, verbose=True
+    )
+    expected = """
+LogisticRegression(C=99, class_weight=0.4, fit_intercept=False, tol=1234,
+                   verbose=True)"""
+    expected = expected[1:]  # remove first \n
+    assert lr.__repr__() == expected
+
+    imputer = SimpleImputer(missing_values=0)
+    expected = """SimpleImputer(missing_values=0)"""
+    assert imputer.__repr__() == expected
+
+    # Defaults to np.nan, trying with float('NaN')
+    imputer = SimpleImputer(missing_values=float("NaN"))
+    expected = """SimpleImputer()"""
+    assert imputer.__repr__() == expected
+
+    # make sure array parameters don't throw error (see #13583)
+    repr(LogisticRegressionCV(Cs=np.array([0.1, 1])))
+
+
+def test_pipeline(print_changed_only_false):
+    # Render a pipeline object
+    pipeline = make_pipeline(StandardScaler(), LogisticRegression(C=999))
+    expected = """
+Pipeline(memory=None,
+         steps=[('standardscaler',
+                 StandardScaler(copy=True, with_mean=True, with_std=True)),
+                ('logisticregression',
+                 LogisticRegression(C=999, class_weight=None, dual=False,
+                                    fit_intercept=True, intercept_scaling=1,
+                                    l1_ratio=None, max_iter=100,
+                                    multi_class='warn', n_jobs=None,
+                                    penalty='l2', random_state=None,
+                                    solver='warn', tol=0.0001, verbose=0,
+                                    warm_start=False))],
+         verbose=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert pipeline.__repr__() == expected
+
+
+def test_deeply_nested(print_changed_only_false):
+    # Render a deeply nested estimator
+    rfe = RFE(RFE(RFE(RFE(RFE(RFE(RFE(LogisticRegression())))))))
+    expected = """
+RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=LogisticRegression(C=1.0,
+                                                                                                                     class_weight=None,
+                                                                                                                     dual=False,
+                                                                                                                     fit_intercept=True,
+                                                                                                                     intercept_scaling=1,
+                                                                                                                     l1_ratio=None,
+                                                                                                                     max_iter=100,
+                                                                                                                     multi_class='warn',
+                                                                                                                     n_jobs=None,
+                                                                                                                     penalty='l2',
+                                                                                                                     random_state=None,
+                                                                                                                     solver='warn',
+                                                                                                                     tol=0.0001,
+                                                                                                                     verbose=0,
+                                                                                                                     warm_start=False),
+                                                                                        n_features_to_select=None,
+                                                                                        step=1,
+                                                                                        verbose=0),
+                                                                          n_features_to_select=None,
+                                                                          step=1,
+                                                                          verbose=0),
+                                                            n_features_to_select=None,
+                                                            step=1, verbose=0),
+                                              n_features_to_select=None, step=1,
+                                              verbose=0),
+                                n_features_to_select=None, step=1, verbose=0),
+                  n_features_to_select=None, step=1, verbose=0),
+    n_features_to_select=None, step=1, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert rfe.__repr__() == expected
+
+
+def test_gridsearch(print_changed_only_false):
+    # render a gridsearch
+    param_grid = [
+        {"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100, 1000]},
+        {"kernel": ["linear"], "C": [1, 10, 100, 1000]},
+    ]
+    gs = GridSearchCV(SVC(), param_grid, cv=5)
+
+    expected = """
+GridSearchCV(cv=5, error_score='raise-deprecating',
+             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
+                           decision_function_shape='ovr', degree=3,
+                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
+                           probability=False, random_state=None, shrinking=True,
+                           tol=0.001, verbose=False),
+             iid='warn', n_jobs=None,
+             param_grid=[{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
+                          'kernel': ['rbf']},
+                         {'C': [1, 10, 100, 1000], 'kernel': ['linear']}],
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert gs.__repr__() == expected
+
+
+def test_gridsearch_pipeline(print_changed_only_false):
+    # render a pipeline inside a gridsearch
+    pp = _EstimatorPrettyPrinter(compact=True, indent=1, indent_at_name=True)
+
+    pipeline = Pipeline([("reduce_dim", PCA()), ("classify", SVC())])
+    N_FEATURES_OPTIONS = [2, 4, 8]
+    C_OPTIONS = [1, 10, 100, 1000]
+    param_grid = [
+        {
+            "reduce_dim": [PCA(iterated_power=7), NMF()],
+            "reduce_dim__n_components": N_FEATURES_OPTIONS,
+            "classify__C": C_OPTIONS,
+        },
+        {
+            "reduce_dim": [SelectKBest(chi2)],
+            "reduce_dim__k": N_FEATURES_OPTIONS,
+            "classify__C": C_OPTIONS,
+        },
+    ]
+    gspipline = GridSearchCV(pipeline, cv=3, n_jobs=1, param_grid=param_grid)
+    expected = """
+GridSearchCV(cv=3, error_score='raise-deprecating',
+             estimator=Pipeline(memory=None,
+                                steps=[('reduce_dim',
+                                        PCA(copy=True, iterated_power='auto',
+                                            n_components=None,
+                                            random_state=None,
+                                            svd_solver='auto', tol=0.0,
+                                            whiten=False)),
+                                       ('classify',
+                                        SVC(C=1.0, cache_size=200,
+                                            class_weight=None, coef0=0.0,
+                                            decision_function_shape='ovr',
+                                            degree=3, gamma='auto_deprecated',
+                                            kernel='rbf', max_iter=-1,
+                                            probability=False,
+                                            random_state=None, shrinking=True,
+                                            tol=0.001, verbose=False))]),
+             iid='warn', n_jobs=1,
+             param_grid=[{'classify__C': [1, 10, 100, 1000],
+                          'reduce_dim': [PCA(copy=True, iterated_power=7,
+                                             n_components=None,
+                                             random_state=None,
+                                             svd_solver='auto', tol=0.0,
+                                             whiten=False),
+                                         NMF(alpha=0.0, beta_loss='frobenius',
+                                             init=None, l1_ratio=0.0,
+                                             max_iter=200, n_components=None,
+                                             random_state=None, shuffle=False,
+                                             solver='cd', tol=0.0001,
+                                             verbose=0)],
+                          'reduce_dim__n_components': [2, 4, 8]},
+                         {'classify__C': [1, 10, 100, 1000],
+                          'reduce_dim': [SelectKBest(k=10,
+                                                     score_func=<function chi2 at some_address>)],
+                          'reduce_dim__k': [2, 4, 8]}],
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    repr_ = pp.pformat(gspipline)
+    # Remove address of '<function chi2 at 0x.....>' for reproducibility
+    repr_ = re.sub("function chi2 at 0x.*>", "function chi2 at some_address>", repr_)
+    assert repr_ == expected
+
+
+def test_n_max_elements_to_show(print_changed_only_false):
+    n_max_elements_to_show = 30
+    pp = _EstimatorPrettyPrinter(
+        compact=True,
+        indent=1,
+        indent_at_name=True,
+        n_max_elements_to_show=n_max_elements_to_show,
+    )
+
+    # No ellipsis
+    vocabulary = {i: i for i in range(n_max_elements_to_show)}
+    vectorizer = CountVectorizer(vocabulary=vocabulary)
+
+    expected = r"""
+CountVectorizer(analyzer='word', binary=False, decode_error='strict',
+                dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
+                lowercase=True, max_df=1.0, max_features=None, min_df=1,
+                ngram_range=(1, 1), preprocessor=None, stop_words=None,
+                strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
+                tokenizer=None,
+                vocabulary={0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7,
+                            8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14,
+                            15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20,
+                            21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26,
+                            27: 27, 28: 28, 29: 29})"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(vectorizer) == expected
+
+    # Now with ellipsis
+    vocabulary = {i: i for i in range(n_max_elements_to_show + 1)}
+    vectorizer = CountVectorizer(vocabulary=vocabulary)
+
+    expected = r"""
+CountVectorizer(analyzer='word', binary=False, decode_error='strict',
+                dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
+                lowercase=True, max_df=1.0, max_features=None, min_df=1,
+                ngram_range=(1, 1), preprocessor=None, stop_words=None,
+                strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
+                tokenizer=None,
+                vocabulary={0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7,
+                            8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14,
+                            15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20,
+                            21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26,
+                            27: 27, 28: 28, 29: 29, ...})"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(vectorizer) == expected
+
+    # Also test with lists
+    param_grid = {"C": list(range(n_max_elements_to_show))}
+    gs = GridSearchCV(SVC(), param_grid)
+    expected = """
+GridSearchCV(cv='warn', error_score='raise-deprecating',
+             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
+                           decision_function_shape='ovr', degree=3,
+                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
+                           probability=False, random_state=None, shrinking=True,
+                           tol=0.001, verbose=False),
+             iid='warn', n_jobs=None,
+             param_grid={'C': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                               15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                               27, 28, 29]},
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(gs) == expected
+
+    # Now with ellipsis
+    param_grid = {"C": list(range(n_max_elements_to_show + 1))}
+    gs = GridSearchCV(SVC(), param_grid)
+    expected = """
+GridSearchCV(cv='warn', error_score='raise-deprecating',
+             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
+                           decision_function_shape='ovr', degree=3,
+                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
+                           probability=False, random_state=None, shrinking=True,
+                           tol=0.001, verbose=False),
+             iid='warn', n_jobs=None,
+             param_grid={'C': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                               15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                               27, 28, 29, ...]},
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(gs) == expected
+
+
+def test_bruteforce_ellipsis(print_changed_only_false):
+    # Check that the bruteforce ellipsis (used when the number of non-blank
+    # characters exceeds N_CHAR_MAX) renders correctly.
+
+    lr = LogisticRegression()
+
+    # test when the left and right side of the ellipsis aren't on the same
+    # line.
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   in...
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=150)
+
+    # test with very small N_CHAR_MAX
+    # Note that N_CHAR_MAX is not strictly enforced, but it's normal: to avoid
+    # weird reprs we still keep the whole line of the right part (after the
+    # ellipsis).
+    expected = """
+Lo...
+                   warm_start=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=4)
+
+    # test with N_CHAR_MAX == number of non-blank characters: In this case we
+    # don't want ellipsis
+    full_repr = lr.__repr__(N_CHAR_MAX=float("inf"))
+    n_nonblank = len("".join(full_repr.split()))
+    assert lr.__repr__(N_CHAR_MAX=n_nonblank) == full_repr
+    assert "..." not in full_repr
+
+    # test with N_CHAR_MAX == number of non-blank characters - 10: the left and
+    # right side of the ellispsis are on different lines. In this case we
+    # want to expend the whole line of the right side
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_i...
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=n_nonblank - 10)
+
+    # test with N_CHAR_MAX == number of non-blank characters - 10: the left and
+    # right side of the ellispsis are on the same line. In this case we don't
+    # want to expend the whole line of the right side, just add the ellispsis
+    # between the 2 sides.
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_iter...,
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=n_nonblank - 4)
+
+    # test with N_CHAR_MAX == number of non-blank characters - 2: the left and
+    # right side of the ellispsis are on the same line, but adding the ellipsis
+    # would actually make the repr longer. So we don't add the ellipsis.
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_iter=100,
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=n_nonblank - 2)
+
+
+def test_builtin_prettyprinter():
+    # non regression test than ensures we can still use the builtin
+    # PrettyPrinter class for estimators (as done e.g. by joblib).
+    # Used to be a bug
+
+    PrettyPrinter().pprint(LogisticRegression())
+
+
+def test_kwargs_in_init():
+    # Make sure the changed_only=True mode is OK when an argument is passed as
+    # kwargs.
+    # Non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/issues/17206
+
+    class WithKWargs(BaseEstimator):
+        # Estimator with a kwargs argument. These need to hack around
+        # set_params and get_params. Here we mimic what LightGBM does.
+        def __init__(self, a="willchange", b="unchanged", **kwargs):
+            self.a = a
+            self.b = b
+            self._other_params = {}
+            self.set_params(**kwargs)
+
+        def get_params(self, deep=True):
+            params = super().get_params(deep=deep)
+            params.update(self._other_params)
+            return params
+
+        def set_params(self, **params):
+            for key, value in params.items():
+                setattr(self, key, value)
+                self._other_params[key] = value
+            return self
+
+    est = WithKWargs(a="something", c="abcd", d=None)
+
+    expected = "WithKWargs(a='something', c='abcd', d=None)"
+    assert expected == est.__repr__()
+
+    with config_context(print_changed_only=False):
+        expected = "WithKWargs(a='something', b='unchanged', c='abcd', d=None)"
+        assert expected == est.__repr__()
+
+
+def test_complexity_print_changed_only():
+    # Make sure `__repr__` is called the same amount of times
+    # whether `print_changed_only` is True or False
+    # Non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/issues/18490
+
+    class DummyEstimator(TransformerMixin, BaseEstimator):
+        nb_times_repr_called = 0
+
+        def __init__(self, estimator=None):
+            self.estimator = estimator
+
+        def __repr__(self):
+            DummyEstimator.nb_times_repr_called += 1
+            return super().__repr__()
+
+        def transform(self, X, copy=None):  # pragma: no cover
+            return X
+
+    estimator = DummyEstimator(
+        make_pipeline(DummyEstimator(DummyEstimator()), DummyEstimator(), "passthrough")
+    )
+    with config_context(print_changed_only=False):
+        repr(estimator)
+        nb_repr_print_changed_only_false = DummyEstimator.nb_times_repr_called
+
+    DummyEstimator.nb_times_repr_called = 0
+    with config_context(print_changed_only=True):
+        repr(estimator)
+        nb_repr_print_changed_only_true = DummyEstimator.nb_times_repr_called
+
+    assert nb_repr_print_changed_only_false == nb_repr_print_changed_only_true
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_random.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_random.py
@@ -0,0 +1,192 @@
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_array_almost_equal
+from scipy.special import comb
+
+from sklearn.utils._random import _our_rand_r_py
+from sklearn.utils.random import _random_choice_csc, sample_without_replacement
+
+
+###############################################################################
+# test custom sampling without replacement algorithm
+###############################################################################
+def test_invalid_sample_without_replacement_algorithm():
+    with pytest.raises(ValueError):
+        sample_without_replacement(5, 4, "unknown")
+
+
+def test_sample_without_replacement_algorithms():
+    methods = ("auto", "tracking_selection", "reservoir_sampling", "pool")
+
+    for m in methods:
+
+        def sample_without_replacement_method(
+            n_population, n_samples, random_state=None
+        ):
+            return sample_without_replacement(
+                n_population, n_samples, method=m, random_state=random_state
+            )
+
+        check_edge_case_of_sample_int(sample_without_replacement_method)
+        check_sample_int(sample_without_replacement_method)
+        check_sample_int_distribution(sample_without_replacement_method)
+
+
+def check_edge_case_of_sample_int(sample_without_replacement):
+    # n_population < n_sample
+    with pytest.raises(ValueError):
+        sample_without_replacement(0, 1)
+    with pytest.raises(ValueError):
+        sample_without_replacement(1, 2)
+
+    # n_population == n_samples
+    assert sample_without_replacement(0, 0).shape == (0,)
+
+    assert sample_without_replacement(1, 1).shape == (1,)
+
+    # n_population >= n_samples
+    assert sample_without_replacement(5, 0).shape == (0,)
+    assert sample_without_replacement(5, 1).shape == (1,)
+
+    # n_population < 0 or n_samples < 0
+    with pytest.raises(ValueError):
+        sample_without_replacement(-1, 5)
+    with pytest.raises(ValueError):
+        sample_without_replacement(5, -1)
+
+
+def check_sample_int(sample_without_replacement):
+    # This test is heavily inspired from test_random.py of python-core.
+    #
+    # For the entire allowable range of 0 <= k <= N, validate that
+    # the sample is of the correct length and contains only unique items
+    n_population = 100
+
+    for n_samples in range(n_population + 1):
+        s = sample_without_replacement(n_population, n_samples)
+        assert len(s) == n_samples
+        unique = np.unique(s)
+        assert np.size(unique) == n_samples
+        assert np.all(unique < n_population)
+
+    # test edge case n_population == n_samples == 0
+    assert np.size(sample_without_replacement(0, 0)) == 0
+
+
+def check_sample_int_distribution(sample_without_replacement):
+    # This test is heavily inspired from test_random.py of python-core.
+    #
+    # For the entire allowable range of 0 <= k <= N, validate that
+    # sample generates all possible permutations
+    n_population = 10
+
+    # a large number of trials prevents false negatives without slowing normal
+    # case
+    n_trials = 10000
+
+    for n_samples in range(n_population):
+        # Counting the number of combinations is not as good as counting the
+        # the number of permutations. However, it works with sampling algorithm
+        # that does not provide a random permutation of the subset of integer.
+        n_expected = comb(n_population, n_samples, exact=True)
+
+        output = {}
+        for i in range(n_trials):
+            output[frozenset(sample_without_replacement(n_population, n_samples))] = (
+                None
+            )
+
+            if len(output) == n_expected:
+                break
+        else:
+            raise AssertionError(
+                "number of combinations != number of expected (%s != %s)"
+                % (len(output), n_expected)
+            )
+
+
+def test_random_choice_csc(n_samples=10000, random_state=24):
+    # Explicit class probabilities
+    classes = [np.array([0, 1]), np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+
+    got = _random_choice_csc(n_samples, classes, class_probabilities, random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # Implicit class probabilities
+    classes = [[0, 1], [1, 2]]  # test for array-like support
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0, 1 / 2, 1 / 2])]
+
+    got = _random_choice_csc(
+        n_samples=n_samples, classes=classes, random_state=random_state
+    )
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # Edge case probabilities 1.0 and 0.0
+    classes = [np.array([0, 1]), np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.0, 1.0]), np.array([0.0, 1.0, 0.0])]
+
+    got = _random_choice_csc(n_samples, classes, class_probabilities, random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = (
+            np.bincount(
+                got.getcol(k).toarray().ravel(), minlength=len(class_probabilities[k])
+            )
+            / n_samples
+        )
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # One class target data
+    classes = [[1], [0]]  # test for array-like support
+    class_probabilities = [np.array([0.0, 1.0]), np.array([1.0])]
+
+    got = _random_choice_csc(
+        n_samples=n_samples, classes=classes, random_state=random_state
+    )
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / n_samples
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+
+def test_random_choice_csc_errors():
+    # the length of an array in classes and class_probabilities is mismatched
+    classes = [np.array([0, 1]), np.array([0, 1, 2, 3])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # the class dtype is not supported
+    classes = [np.array(["a", "1"]), np.array(["z", "1", "2"])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # the class dtype is not supported
+    classes = [np.array([4.2, 0.1]), np.array([0.1, 0.2, 9.4])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # Given probabilities don't sum to 1
+    classes = [np.array([0, 1]), np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.5, 0.6]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+
+def test_our_rand_r():
+    assert 131541053 == _our_rand_r_py(1273642419)
+    assert 270369 == _our_rand_r_py(0)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_response.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_response.py
@@ -0,0 +1,371 @@
+import numpy as np
+import pytest
+
+from sklearn.datasets import (
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+from sklearn.ensemble import IsolationForest
+from sklearn.linear_model import (
+    LinearRegression,
+    LogisticRegression,
+)
+from sklearn.multioutput import ClassifierChain
+from sklearn.preprocessing import scale
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._mocking import _MockEstimatorOnOffPrediction
+from sklearn.utils._response import _get_response_values, _get_response_values_binary
+from sklearn.utils._testing import assert_allclose, assert_array_equal
+
+X, y = load_iris(return_X_y=True)
+# scale the data to avoid ConvergenceWarning with LogisticRegression
+X = scale(X, copy=False)
+X_binary, y_binary = X[:100], y[:100]
+
+
+@pytest.mark.parametrize(
+    "response_method", ["decision_function", "predict_proba", "predict_log_proba"]
+)
+def test_get_response_values_regressor_error(response_method):
+    """Check the error message with regressor an not supported response
+    method."""
+    my_estimator = _MockEstimatorOnOffPrediction(response_methods=[response_method])
+    X = "mocking_data", "mocking_target"
+    err_msg = f"{my_estimator.__class__.__name__} should either be a classifier"
+    with pytest.raises(ValueError, match=err_msg):
+        _get_response_values(my_estimator, X, response_method=response_method)
+
+
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+def test_get_response_values_regressor(return_response_method_used):
+    """Check the behaviour of `_get_response_values` with regressor."""
+    X, y = make_regression(n_samples=10, random_state=0)
+    regressor = LinearRegression().fit(X, y)
+    results = _get_response_values(
+        regressor,
+        X,
+        response_method="predict",
+        return_response_method_used=return_response_method_used,
+    )
+    assert_array_equal(results[0], regressor.predict(X))
+    assert results[1] is None
+    if return_response_method_used:
+        assert results[2] == "predict"
+
+
+@pytest.mark.parametrize(
+    "response_method",
+    ["predict", "decision_function", ["decision_function", "predict"]],
+)
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+def test_get_response_values_outlier_detection(
+    response_method, return_response_method_used
+):
+    """Check the behaviour of `_get_response_values` with outlier detector."""
+    X, y = make_classification(n_samples=50, random_state=0)
+    outlier_detector = IsolationForest(random_state=0).fit(X, y)
+    results = _get_response_values(
+        outlier_detector,
+        X,
+        response_method=response_method,
+        return_response_method_used=return_response_method_used,
+    )
+    chosen_response_method = (
+        response_method[0] if isinstance(response_method, list) else response_method
+    )
+    prediction_method = getattr(outlier_detector, chosen_response_method)
+    assert_array_equal(results[0], prediction_method(X))
+    assert results[1] is None
+    if return_response_method_used:
+        assert results[2] == chosen_response_method
+
+
+@pytest.mark.parametrize(
+    "response_method",
+    ["predict_proba", "decision_function", "predict", "predict_log_proba"],
+)
+def test_get_response_values_classifier_unknown_pos_label(response_method):
+    """Check that `_get_response_values` raises the proper error message with
+    classifier."""
+    X, y = make_classification(n_samples=10, n_classes=2, random_state=0)
+    classifier = LogisticRegression().fit(X, y)
+
+    # provide a `pos_label` which is not in `y`
+    err_msg = r"pos_label=whatever is not a valid label: It should be one of \[0 1\]"
+    with pytest.raises(ValueError, match=err_msg):
+        _get_response_values(
+            classifier,
+            X,
+            response_method=response_method,
+            pos_label="whatever",
+        )
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])
+def test_get_response_values_classifier_inconsistent_y_pred_for_binary_proba(
+    response_method,
+):
+    """Check that `_get_response_values` will raise an error when `y_pred` has a
+    single class with `predict_proba`."""
+    X, y_two_class = make_classification(n_samples=10, n_classes=2, random_state=0)
+    y_single_class = np.zeros_like(y_two_class)
+    classifier = DecisionTreeClassifier().fit(X, y_single_class)
+
+    err_msg = (
+        r"Got predict_proba of shape \(10, 1\), but need classifier with "
+        r"two classes"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        _get_response_values(classifier, X, response_method=response_method)
+
+
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+def test_get_response_values_binary_classifier_decision_function(
+    return_response_method_used,
+):
+    """Check the behaviour of `_get_response_values` with `decision_function`
+    and binary classifier."""
+    X, y = make_classification(
+        n_samples=10,
+        n_classes=2,
+        weights=[0.3, 0.7],
+        random_state=0,
+    )
+    classifier = LogisticRegression().fit(X, y)
+    response_method = "decision_function"
+
+    # default `pos_label`
+    results = _get_response_values(
+        classifier,
+        X,
+        response_method=response_method,
+        pos_label=None,
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], classifier.decision_function(X))
+    assert results[1] == 1
+    if return_response_method_used:
+        assert results[2] == "decision_function"
+
+    # when forcing `pos_label=classifier.classes_[0]`
+    results = _get_response_values(
+        classifier,
+        X,
+        response_method=response_method,
+        pos_label=classifier.classes_[0],
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], classifier.decision_function(X) * -1)
+    assert results[1] == 0
+    if return_response_method_used:
+        assert results[2] == "decision_function"
+
+
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])
+def test_get_response_values_binary_classifier_predict_proba(
+    return_response_method_used, response_method
+):
+    """Check that `_get_response_values` with `predict_proba` and binary
+    classifier."""
+    X, y = make_classification(
+        n_samples=10,
+        n_classes=2,
+        weights=[0.3, 0.7],
+        random_state=0,
+    )
+    classifier = LogisticRegression().fit(X, y)
+
+    # default `pos_label`
+    results = _get_response_values(
+        classifier,
+        X,
+        response_method=response_method,
+        pos_label=None,
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], getattr(classifier, response_method)(X)[:, 1])
+    assert results[1] == 1
+    if return_response_method_used:
+        assert len(results) == 3
+        assert results[2] == response_method
+    else:
+        assert len(results) == 2
+
+    # when forcing `pos_label=classifier.classes_[0]`
+    y_pred, pos_label, *_ = _get_response_values(
+        classifier,
+        X,
+        response_method=response_method,
+        pos_label=classifier.classes_[0],
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(y_pred, getattr(classifier, response_method)(X)[:, 0])
+    assert pos_label == 0
+
+
+@pytest.mark.parametrize(
+    "estimator, X, y, err_msg, params",
+    [
+        (
+            DecisionTreeRegressor(),
+            X_binary,
+            y_binary,
+            "Expected 'estimator' to be a binary classifier",
+            {"response_method": "auto"},
+        ),
+        (
+            DecisionTreeClassifier(),
+            X_binary,
+            y_binary,
+            r"pos_label=unknown is not a valid label: It should be one of \[0 1\]",
+            {"response_method": "auto", "pos_label": "unknown"},
+        ),
+        (
+            DecisionTreeClassifier(),
+            X,
+            y,
+            "be a binary classifier. Got 3 classes instead.",
+            {"response_method": "predict_proba"},
+        ),
+    ],
+)
+def test_get_response_error(estimator, X, y, err_msg, params):
+    """Check that we raise the proper error messages in _get_response_values_binary."""
+
+    estimator.fit(X, y)
+    with pytest.raises(ValueError, match=err_msg):
+        _get_response_values_binary(estimator, X, **params)
+
+
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+def test_get_response_predict_proba(return_response_method_used):
+    """Check the behaviour of `_get_response_values_binary` using `predict_proba`."""
+    classifier = DecisionTreeClassifier().fit(X_binary, y_binary)
+    results = _get_response_values_binary(
+        classifier,
+        X_binary,
+        response_method="predict_proba",
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], classifier.predict_proba(X_binary)[:, 1])
+    assert results[1] == 1
+    if return_response_method_used:
+        assert results[2] == "predict_proba"
+
+    results = _get_response_values_binary(
+        classifier,
+        X_binary,
+        response_method="predict_proba",
+        pos_label=0,
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], classifier.predict_proba(X_binary)[:, 0])
+    assert results[1] == 0
+    if return_response_method_used:
+        assert results[2] == "predict_proba"
+
+
+@pytest.mark.parametrize("return_response_method_used", [True, False])
+def test_get_response_decision_function(return_response_method_used):
+    """Check the behaviour of `_get_response_values_binary` using decision_function."""
+    classifier = LogisticRegression().fit(X_binary, y_binary)
+    results = _get_response_values_binary(
+        classifier,
+        X_binary,
+        response_method="decision_function",
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], classifier.decision_function(X_binary))
+    assert results[1] == 1
+    if return_response_method_used:
+        assert results[2] == "decision_function"
+
+    results = _get_response_values_binary(
+        classifier,
+        X_binary,
+        response_method="decision_function",
+        pos_label=0,
+        return_response_method_used=return_response_method_used,
+    )
+    assert_allclose(results[0], classifier.decision_function(X_binary) * -1)
+    assert results[1] == 0
+    if return_response_method_used:
+        assert results[2] == "decision_function"
+
+
+@pytest.mark.parametrize(
+    "estimator, response_method",
+    [
+        (DecisionTreeClassifier(max_depth=2, random_state=0), "predict_proba"),
+        (DecisionTreeClassifier(max_depth=2, random_state=0), "predict_log_proba"),
+        (LogisticRegression(), "decision_function"),
+    ],
+)
+def test_get_response_values_multiclass(estimator, response_method):
+    """Check that we can call `_get_response_values` with a multiclass estimator.
+    It should return the predictions untouched.
+    """
+    estimator.fit(X, y)
+    predictions, pos_label = _get_response_values(
+        estimator, X, response_method=response_method
+    )
+
+    assert pos_label is None
+    assert predictions.shape == (X.shape[0], len(estimator.classes_))
+    if response_method == "predict_proba":
+        assert np.logical_and(predictions >= 0, predictions <= 1).all()
+    elif response_method == "predict_log_proba":
+        assert (predictions <= 0.0).all()
+
+
+def test_get_response_values_with_response_list():
+    """Check the behaviour of passing a list of responses to `_get_response_values`."""
+    classifier = LogisticRegression().fit(X_binary, y_binary)
+
+    # it should use `predict_proba`
+    y_pred, pos_label, response_method = _get_response_values(
+        classifier,
+        X_binary,
+        response_method=["predict_proba", "decision_function"],
+        return_response_method_used=True,
+    )
+    assert_allclose(y_pred, classifier.predict_proba(X_binary)[:, 1])
+    assert pos_label == 1
+    assert response_method == "predict_proba"
+
+    # it should use `decision_function`
+    y_pred, pos_label, response_method = _get_response_values(
+        classifier,
+        X_binary,
+        response_method=["decision_function", "predict_proba"],
+        return_response_method_used=True,
+    )
+    assert_allclose(y_pred, classifier.decision_function(X_binary))
+    assert pos_label == 1
+    assert response_method == "decision_function"
+
+
+@pytest.mark.parametrize(
+    "response_method", ["predict_proba", "decision_function", "predict"]
+)
+def test_get_response_values_multilabel_indicator(response_method):
+    X, Y = make_multilabel_classification(random_state=0)
+    estimator = ClassifierChain(LogisticRegression()).fit(X, Y)
+
+    y_pred, pos_label = _get_response_values(
+        estimator, X, response_method=response_method
+    )
+    assert pos_label is None
+    assert y_pred.shape == Y.shape
+
+    if response_method == "predict_proba":
+        assert np.logical_and(y_pred >= 0, y_pred <= 1).all()
+    elif response_method == "decision_function":
+        # values returned by `decision_function` are not bounded in [0, 1]
+        assert (y_pred < 0).sum() > 0
+        assert (y_pred > 1).sum() > 0
+    else:  # response_method == "predict"
+        assert np.logical_or(y_pred == 0, y_pred == 1).all()
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_seq_dataset.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_seq_dataset.py
@@ -0,0 +1,185 @@
+# Author: Tom Dupre la Tour
+#         Joan Massich <mailsik@gmail.com>
+#
+# License: BSD 3 clause
+
+from itertools import product
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import load_iris
+from sklearn.utils._seq_dataset import (
+    ArrayDataset32,
+    ArrayDataset64,
+    CSRDataset32,
+    CSRDataset64,
+)
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+iris = load_iris()
+X64 = iris.data.astype(np.float64)
+y64 = iris.target.astype(np.float64)
+sample_weight64 = np.arange(y64.size, dtype=np.float64)
+
+X32 = iris.data.astype(np.float32)
+y32 = iris.target.astype(np.float32)
+sample_weight32 = np.arange(y32.size, dtype=np.float32)
+
+floating = [np.float32, np.float64]
+
+
+def assert_csr_equal_values(current, expected):
+    current.eliminate_zeros()
+    expected.eliminate_zeros()
+    expected = expected.astype(current.dtype)
+    assert current.shape[0] == expected.shape[0]
+    assert current.shape[1] == expected.shape[1]
+    assert_array_equal(current.data, expected.data)
+    assert_array_equal(current.indices, expected.indices)
+    assert_array_equal(current.indptr, expected.indptr)
+
+
+def _make_dense_dataset(float_dtype):
+    if float_dtype == np.float32:
+        return ArrayDataset32(X32, y32, sample_weight32, seed=42)
+    return ArrayDataset64(X64, y64, sample_weight64, seed=42)
+
+
+def _make_sparse_dataset(csr_container, float_dtype):
+    if float_dtype == np.float32:
+        X, y, sample_weight, csr_dataset = X32, y32, sample_weight32, CSRDataset32
+    else:
+        X, y, sample_weight, csr_dataset = X64, y64, sample_weight64, CSRDataset64
+    X = csr_container(X)
+    return csr_dataset(X.data, X.indptr, X.indices, y, sample_weight, seed=42)
+
+
+def _make_dense_datasets():
+    return [_make_dense_dataset(float_dtype) for float_dtype in floating]
+
+
+def _make_sparse_datasets():
+    return [
+        _make_sparse_dataset(csr_container, float_dtype)
+        for csr_container, float_dtype in product(CSR_CONTAINERS, floating)
+    ]
+
+
+def _make_fused_types_datasets():
+    all_datasets = _make_dense_datasets() + _make_sparse_datasets()
+    # group dataset by array types to get a tuple (float32, float64)
+    return (all_datasets[idx : idx + 2] for idx in range(0, len(all_datasets), 2))
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("dataset", _make_dense_datasets() + _make_sparse_datasets())
+def test_seq_dataset_basic_iteration(dataset, csr_container):
+    NUMBER_OF_RUNS = 5
+    X_csr64 = csr_container(X64)
+    for _ in range(NUMBER_OF_RUNS):
+        # next sample
+        xi_, yi, swi, idx = dataset._next_py()
+        xi = csr_container(xi_, shape=(1, X64.shape[1]))
+
+        assert_csr_equal_values(xi, X_csr64[[idx]])
+        assert yi == y64[idx]
+        assert swi == sample_weight64[idx]
+
+        # random sample
+        xi_, yi, swi, idx = dataset._random_py()
+        xi = csr_container(xi_, shape=(1, X64.shape[1]))
+
+        assert_csr_equal_values(xi, X_csr64[[idx]])
+        assert yi == y64[idx]
+        assert swi == sample_weight64[idx]
+
+
+@pytest.mark.parametrize(
+    "dense_dataset,sparse_dataset",
+    [
+        (
+            _make_dense_dataset(float_dtype),
+            _make_sparse_dataset(csr_container, float_dtype),
+        )
+        for float_dtype, csr_container in product(floating, CSR_CONTAINERS)
+    ],
+)
+def test_seq_dataset_shuffle(dense_dataset, sparse_dataset):
+    # not shuffled
+    for i in range(5):
+        _, _, _, idx1 = dense_dataset._next_py()
+        _, _, _, idx2 = sparse_dataset._next_py()
+        assert idx1 == i
+        assert idx2 == i
+
+    for i in [132, 50, 9, 18, 58]:
+        _, _, _, idx1 = dense_dataset._random_py()
+        _, _, _, idx2 = sparse_dataset._random_py()
+        assert idx1 == i
+        assert idx2 == i
+
+    seed = 77
+    dense_dataset._shuffle_py(seed)
+    sparse_dataset._shuffle_py(seed)
+
+    idx_next = [63, 91, 148, 87, 29]
+    idx_shuffle = [137, 125, 56, 121, 127]
+    for i, j in zip(idx_next, idx_shuffle):
+        _, _, _, idx1 = dense_dataset._next_py()
+        _, _, _, idx2 = sparse_dataset._next_py()
+        assert idx1 == i
+        assert idx2 == i
+
+        _, _, _, idx1 = dense_dataset._random_py()
+        _, _, _, idx2 = sparse_dataset._random_py()
+        assert idx1 == j
+        assert idx2 == j
+
+
+@pytest.mark.parametrize("dataset_32,dataset_64", _make_fused_types_datasets())
+def test_fused_types_consistency(dataset_32, dataset_64):
+    NUMBER_OF_RUNS = 5
+    for _ in range(NUMBER_OF_RUNS):
+        # next sample
+        (xi_data32, _, _), yi32, _, _ = dataset_32._next_py()
+        (xi_data64, _, _), yi64, _, _ = dataset_64._next_py()
+
+        assert xi_data32.dtype == np.float32
+        assert xi_data64.dtype == np.float64
+
+        assert_allclose(xi_data64, xi_data32, rtol=1e-5)
+        assert_allclose(yi64, yi32, rtol=1e-5)
+
+
+def test_buffer_dtype_mismatch_error():
+    with pytest.raises(ValueError, match="Buffer dtype mismatch"):
+        ArrayDataset64(X32, y32, sample_weight32, seed=42),
+
+    with pytest.raises(ValueError, match="Buffer dtype mismatch"):
+        ArrayDataset32(X64, y64, sample_weight64, seed=42),
+
+    for csr_container in CSR_CONTAINERS:
+        X_csr32 = csr_container(X32)
+        X_csr64 = csr_container(X64)
+        with pytest.raises(ValueError, match="Buffer dtype mismatch"):
+            CSRDataset64(
+                X_csr32.data,
+                X_csr32.indptr,
+                X_csr32.indices,
+                y32,
+                sample_weight32,
+                seed=42,
+            ),
+
+        with pytest.raises(ValueError, match="Buffer dtype mismatch"):
+            CSRDataset32(
+                X_csr64.data,
+                X_csr64.indptr,
+                X_csr64.indices,
+                y64,
+                sample_weight64,
+                seed=42,
+            ),
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_set_output.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_set_output.py
@@ -0,0 +1,464 @@
+import importlib
+from collections import namedtuple
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn._config import config_context, get_config
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils._set_output import (
+    ADAPTERS_MANAGER,
+    ContainerAdapterProtocol,
+    _get_adapter_from_container,
+    _get_output_config,
+    _safe_set_output,
+    _SetOutputMixin,
+    _wrap_data_with_container,
+    check_library_installed,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+
+def test_pandas_adapter():
+    """Check pandas adapter has expected behavior."""
+    pd = pytest.importorskip("pandas")
+    X_np = np.asarray([[1, 0, 3], [0, 0, 1]])
+    columns = np.asarray(["f0", "f1", "f2"], dtype=object)
+    index = np.asarray([0, 1])
+    X_df_orig = pd.DataFrame([[1, 2], [1, 3]], index=index)
+
+    adapter = ADAPTERS_MANAGER.adapters["pandas"]
+    X_container = adapter.create_container(X_np, X_df_orig, columns=lambda: columns)
+    assert isinstance(X_container, pd.DataFrame)
+    assert_array_equal(X_container.columns, columns)
+    assert_array_equal(X_container.index, index)
+
+    # Input dataframe's index does not change
+    new_columns = np.asarray(["f0", "f1"], dtype=object)
+    X_df = pd.DataFrame([[1, 2], [1, 3]], index=[10, 12])
+    new_df = adapter.create_container(X_df, X_df_orig, columns=new_columns)
+    assert_array_equal(new_df.columns, new_columns)
+    assert_array_equal(new_df.index, X_df.index)
+
+    assert adapter.is_supported_container(X_df)
+    assert not adapter.is_supported_container(X_np)
+
+    # adapter.update_columns updates the columns
+    new_columns = np.array(["a", "c"], dtype=object)
+    new_df = adapter.rename_columns(X_df, new_columns)
+    assert_array_equal(new_df.columns, new_columns)
+
+    # adapter.hstack stacks the dataframes horizontally.
+    X_df_1 = pd.DataFrame([[1, 2, 5], [3, 4, 6]], columns=["a", "b", "e"])
+    X_df_2 = pd.DataFrame([[4], [5]], columns=["c"])
+    X_stacked = adapter.hstack([X_df_1, X_df_2])
+
+    expected_df = pd.DataFrame(
+        [[1, 2, 5, 4], [3, 4, 6, 5]], columns=["a", "b", "e", "c"]
+    )
+    pd.testing.assert_frame_equal(X_stacked, expected_df)
+
+    # check that we update properly the columns even with duplicate column names
+    # this use-case potentially happen when using ColumnTransformer
+    # non-regression test for gh-28260
+    X_df = pd.DataFrame([[1, 2], [1, 3]], columns=["a", "a"])
+    new_columns = np.array(["x__a", "y__a"], dtype=object)
+    new_df = adapter.rename_columns(X_df, new_columns)
+    assert_array_equal(new_df.columns, new_columns)
+
+    # check the behavior of the inplace parameter in `create_container`
+    # we should trigger a copy
+    X_df = pd.DataFrame([[1, 2], [1, 3]], index=index)
+    X_output = adapter.create_container(X_df, X_df, columns=["a", "b"], inplace=False)
+    assert X_output is not X_df
+    assert list(X_df.columns) == [0, 1]
+    assert list(X_output.columns) == ["a", "b"]
+
+    # the operation is inplace
+    X_df = pd.DataFrame([[1, 2], [1, 3]], index=index)
+    X_output = adapter.create_container(X_df, X_df, columns=["a", "b"], inplace=True)
+    assert X_output is X_df
+    assert list(X_df.columns) == ["a", "b"]
+    assert list(X_output.columns) == ["a", "b"]
+
+
+def test_polars_adapter():
+    """Check Polars adapter has expected behavior."""
+    pl = pytest.importorskip("polars")
+    X_np = np.array([[1, 0, 3], [0, 0, 1]])
+    columns = ["f1", "f2", "f3"]
+    X_df_orig = pl.DataFrame(X_np, schema=columns, orient="row")
+
+    adapter = ADAPTERS_MANAGER.adapters["polars"]
+    X_container = adapter.create_container(X_np, X_df_orig, columns=lambda: columns)
+
+    assert isinstance(X_container, pl.DataFrame)
+    assert_array_equal(X_container.columns, columns)
+
+    # Update columns with create_container
+    new_columns = np.asarray(["a", "b", "c"], dtype=object)
+    new_df = adapter.create_container(X_df_orig, X_df_orig, columns=new_columns)
+    assert_array_equal(new_df.columns, new_columns)
+
+    assert adapter.is_supported_container(X_df_orig)
+    assert not adapter.is_supported_container(X_np)
+
+    # adapter.update_columns updates the columns
+    new_columns = np.array(["a", "c", "g"], dtype=object)
+    new_df = adapter.rename_columns(X_df_orig, new_columns)
+    assert_array_equal(new_df.columns, new_columns)
+
+    # adapter.hstack stacks the dataframes horizontally.
+    X_df_1 = pl.DataFrame([[1, 2, 5], [3, 4, 6]], schema=["a", "b", "e"], orient="row")
+    X_df_2 = pl.DataFrame([[4], [5]], schema=["c"], orient="row")
+    X_stacked = adapter.hstack([X_df_1, X_df_2])
+
+    expected_df = pl.DataFrame(
+        [[1, 2, 5, 4], [3, 4, 6, 5]], schema=["a", "b", "e", "c"], orient="row"
+    )
+    from polars.testing import assert_frame_equal
+
+    assert_frame_equal(X_stacked, expected_df)
+
+    # check the behavior of the inplace parameter in `create_container`
+    # we should trigger a copy
+    X_df = pl.DataFrame([[1, 2], [1, 3]], schema=["a", "b"], orient="row")
+    X_output = adapter.create_container(X_df, X_df, columns=["c", "d"], inplace=False)
+    assert X_output is not X_df
+    assert list(X_df.columns) == ["a", "b"]
+    assert list(X_output.columns) == ["c", "d"]
+
+    # the operation is inplace
+    X_df = pl.DataFrame([[1, 2], [1, 3]], schema=["a", "b"], orient="row")
+    X_output = adapter.create_container(X_df, X_df, columns=["c", "d"], inplace=True)
+    assert X_output is X_df
+    assert list(X_df.columns) == ["c", "d"]
+    assert list(X_output.columns) == ["c", "d"]
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test__container_error_validation(csr_container):
+    """Check errors in _wrap_data_with_container."""
+    X = np.asarray([[1, 0, 3], [0, 0, 1]])
+    X_csr = csr_container(X)
+    match = "The transformer outputs a scipy sparse matrix."
+    with config_context(transform_output="pandas"):
+        with pytest.raises(ValueError, match=match):
+            _wrap_data_with_container("transform", X_csr, X, StandardScaler())
+
+
+class EstimatorWithoutSetOutputAndWithoutTransform:
+    pass
+
+
+class EstimatorNoSetOutputWithTransform:
+    def transform(self, X, y=None):
+        return X  # pragma: no cover
+
+
+class EstimatorWithSetOutput(_SetOutputMixin):
+    def fit(self, X, y=None):
+        self.n_features_in_ = X.shape[1]
+        return self
+
+    def transform(self, X, y=None):
+        return X
+
+    def get_feature_names_out(self, input_features=None):
+        return np.asarray([f"X{i}" for i in range(self.n_features_in_)], dtype=object)
+
+
+def test__safe_set_output():
+    """Check _safe_set_output works as expected."""
+
+    # Estimator without transform will not raise when setting set_output for transform.
+    est = EstimatorWithoutSetOutputAndWithoutTransform()
+    _safe_set_output(est, transform="pandas")
+
+    # Estimator with transform but without set_output will raise
+    est = EstimatorNoSetOutputWithTransform()
+    with pytest.raises(ValueError, match="Unable to configure output"):
+        _safe_set_output(est, transform="pandas")
+
+    est = EstimatorWithSetOutput().fit(np.asarray([[1, 2, 3]]))
+    _safe_set_output(est, transform="pandas")
+    config = _get_output_config("transform", est)
+    assert config["dense"] == "pandas"
+
+    _safe_set_output(est, transform="default")
+    config = _get_output_config("transform", est)
+    assert config["dense"] == "default"
+
+    # transform is None is a no-op, so the config remains "default"
+    _safe_set_output(est, transform=None)
+    config = _get_output_config("transform", est)
+    assert config["dense"] == "default"
+
+
+class EstimatorNoSetOutputWithTransformNoFeatureNamesOut(_SetOutputMixin):
+    def transform(self, X, y=None):
+        return X  # pragma: no cover
+
+
+def test_set_output_mixin():
+    """Estimator without get_feature_names_out does not define `set_output`."""
+    est = EstimatorNoSetOutputWithTransformNoFeatureNamesOut()
+    assert not hasattr(est, "set_output")
+
+
+def test__safe_set_output_error():
+    """Check transform with invalid config."""
+    X = np.asarray([[1, 0, 3], [0, 0, 1]])
+
+    est = EstimatorWithSetOutput()
+    _safe_set_output(est, transform="bad")
+
+    msg = "output config must be in"
+    with pytest.raises(ValueError, match=msg):
+        est.transform(X)
+
+
+@pytest.mark.parametrize("dataframe_lib", ["pandas", "polars"])
+def test_set_output_method(dataframe_lib):
+    """Check that the output is a dataframe."""
+    lib = pytest.importorskip(dataframe_lib)
+
+    X = np.asarray([[1, 0, 3], [0, 0, 1]])
+    est = EstimatorWithSetOutput().fit(X)
+
+    # transform=None is a no-op
+    est2 = est.set_output(transform=None)
+    assert est2 is est
+    X_trans_np = est2.transform(X)
+    assert isinstance(X_trans_np, np.ndarray)
+
+    est.set_output(transform=dataframe_lib)
+
+    X_trans_pd = est.transform(X)
+
+    assert isinstance(X_trans_pd, lib.DataFrame)
+
+
+def test_set_output_method_error():
+    """Check transform fails with invalid transform."""
+
+    X = np.asarray([[1, 0, 3], [0, 0, 1]])
+    est = EstimatorWithSetOutput().fit(X)
+    est.set_output(transform="bad")
+
+    msg = "output config must be in"
+    with pytest.raises(ValueError, match=msg):
+        est.transform(X)
+
+
+@pytest.mark.parametrize("transform_output", ["pandas", "polars"])
+def test__get_output_config(transform_output):
+    """Check _get_output_config works as expected."""
+
+    # Without a configuration set, the global config is used
+    global_config = get_config()["transform_output"]
+    config = _get_output_config("transform")
+    assert config["dense"] == global_config
+
+    with config_context(transform_output=transform_output):
+        # with estimator=None, the global config is used
+        config = _get_output_config("transform")
+        assert config["dense"] == transform_output
+
+        est = EstimatorNoSetOutputWithTransform()
+        config = _get_output_config("transform", est)
+        assert config["dense"] == transform_output
+
+        est = EstimatorWithSetOutput()
+        # If estimator has not config, use global config
+        config = _get_output_config("transform", est)
+        assert config["dense"] == transform_output
+
+        # If estimator has a config, use local config
+        est.set_output(transform="default")
+        config = _get_output_config("transform", est)
+        assert config["dense"] == "default"
+
+    est.set_output(transform=transform_output)
+    config = _get_output_config("transform", est)
+    assert config["dense"] == transform_output
+
+
+class EstimatorWithSetOutputNoAutoWrap(_SetOutputMixin, auto_wrap_output_keys=None):
+    def transform(self, X, y=None):
+        return X
+
+
+def test_get_output_auto_wrap_false():
+    """Check that auto_wrap_output_keys=None does not wrap."""
+    est = EstimatorWithSetOutputNoAutoWrap()
+    assert not hasattr(est, "set_output")
+
+    X = np.asarray([[1, 0, 3], [0, 0, 1]])
+    assert X is est.transform(X)
+
+
+def test_auto_wrap_output_keys_errors_with_incorrect_input():
+    msg = "auto_wrap_output_keys must be None or a tuple of keys."
+    with pytest.raises(ValueError, match=msg):
+
+        class BadEstimator(_SetOutputMixin, auto_wrap_output_keys="bad_parameter"):
+            pass
+
+
+class AnotherMixin:
+    def __init_subclass__(cls, custom_parameter, **kwargs):
+        super().__init_subclass__(**kwargs)
+        cls.custom_parameter = custom_parameter
+
+
+def test_set_output_mixin_custom_mixin():
+    """Check that multiple init_subclasses passes parameters up."""
+
+    class BothMixinEstimator(_SetOutputMixin, AnotherMixin, custom_parameter=123):
+        def transform(self, X, y=None):
+            return X
+
+        def get_feature_names_out(self, input_features=None):
+            return input_features
+
+    est = BothMixinEstimator()
+    assert est.custom_parameter == 123
+    assert hasattr(est, "set_output")
+
+
+def test_set_output_mro():
+    """Check that multi-inheritance resolves to the correct class method.
+
+    Non-regression test gh-25293.
+    """
+
+    class Base(_SetOutputMixin):
+        def transform(self, X):
+            return "Base"  # noqa
+
+    class A(Base):
+        pass
+
+    class B(Base):
+        def transform(self, X):
+            return "B"
+
+    class C(A, B):
+        pass
+
+    assert C().transform(None) == "B"
+
+
+class EstimatorWithSetOutputIndex(_SetOutputMixin):
+    def fit(self, X, y=None):
+        self.n_features_in_ = X.shape[1]
+        return self
+
+    def transform(self, X, y=None):
+        import pandas as pd
+
+        # transform by giving output a new index.
+        return pd.DataFrame(X.to_numpy(), index=[f"s{i}" for i in range(X.shape[0])])
+
+    def get_feature_names_out(self, input_features=None):
+        return np.asarray([f"X{i}" for i in range(self.n_features_in_)], dtype=object)
+
+
+def test_set_output_pandas_keep_index():
+    """Check that set_output does not override index.
+
+    Non-regression test for gh-25730.
+    """
+    pd = pytest.importorskip("pandas")
+
+    X = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=[0, 1])
+    est = EstimatorWithSetOutputIndex().set_output(transform="pandas")
+    est.fit(X)
+
+    X_trans = est.transform(X)
+    assert_array_equal(X_trans.index, ["s0", "s1"])
+
+
+class EstimatorReturnTuple(_SetOutputMixin):
+    def __init__(self, OutputTuple):
+        self.OutputTuple = OutputTuple
+
+    def transform(self, X, y=None):
+        return self.OutputTuple(X, 2 * X)
+
+
+def test_set_output_named_tuple_out():
+    """Check that namedtuples are kept by default."""
+    Output = namedtuple("Output", "X, Y")
+    X = np.asarray([[1, 2, 3]])
+    est = EstimatorReturnTuple(OutputTuple=Output)
+    X_trans = est.transform(X)
+
+    assert isinstance(X_trans, Output)
+    assert_array_equal(X_trans.X, X)
+    assert_array_equal(X_trans.Y, 2 * X)
+
+
+class EstimatorWithListInput(_SetOutputMixin):
+    def fit(self, X, y=None):
+        assert isinstance(X, list)
+        self.n_features_in_ = len(X[0])
+        return self
+
+    def transform(self, X, y=None):
+        return X
+
+    def get_feature_names_out(self, input_features=None):
+        return np.asarray([f"X{i}" for i in range(self.n_features_in_)], dtype=object)
+
+
+@pytest.mark.parametrize("dataframe_lib", ["pandas", "polars"])
+def test_set_output_list_input(dataframe_lib):
+    """Check set_output for list input.
+
+    Non-regression test for #27037.
+    """
+    lib = pytest.importorskip(dataframe_lib)
+
+    X = [[0, 1, 2, 3], [4, 5, 6, 7]]
+    est = EstimatorWithListInput()
+    est.set_output(transform=dataframe_lib)
+
+    X_out = est.fit(X).transform(X)
+    assert isinstance(X_out, lib.DataFrame)
+    assert_array_equal(X_out.columns, ["X0", "X1", "X2", "X3"])
+
+
+@pytest.mark.parametrize("name", sorted(ADAPTERS_MANAGER.adapters))
+def test_adapter_class_has_interface(name):
+    """Check adapters have the correct interface."""
+    assert isinstance(ADAPTERS_MANAGER.adapters[name], ContainerAdapterProtocol)
+
+
+def test_check_library_installed(monkeypatch):
+    """Check import error changed."""
+    orig_import_module = importlib.import_module
+
+    def patched_import_module(name):
+        if name == "pandas":
+            raise ImportError()
+        orig_import_module(name, package=None)
+
+    monkeypatch.setattr(importlib, "import_module", patched_import_module)
+
+    msg = "Setting output container to 'pandas' requires"
+    with pytest.raises(ImportError, match=msg):
+        check_library_installed("pandas")
+
+
+def test_get_adapter_from_container():
+    """Check the behavior fo `_get_adapter_from_container`."""
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+    adapter = _get_adapter_from_container(X)
+    assert adapter.container_lib == "pandas"
+    err_msg = "The container does not have a registered adapter in scikit-learn."
+    with pytest.raises(ValueError, match=err_msg):
+        _get_adapter_from_container(X.to_numpy())
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_shortest_path.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_shortest_path.py
@@ -0,0 +1,65 @@
+from collections import defaultdict
+
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+from sklearn.utils.graph import single_source_shortest_path_length
+
+
+def floyd_warshall_slow(graph, directed=False):
+    N = graph.shape[0]
+
+    # set nonzero entries to infinity
+    graph[np.where(graph == 0)] = np.inf
+
+    # set diagonal to zero
+    graph.flat[:: N + 1] = 0
+
+    if not directed:
+        graph = np.minimum(graph, graph.T)
+
+    for k in range(N):
+        for i in range(N):
+            for j in range(N):
+                graph[i, j] = min(graph[i, j], graph[i, k] + graph[k, j])
+
+    graph[np.where(np.isinf(graph))] = 0
+
+    return graph
+
+
+def generate_graph(N=20):
+    # sparse grid of distances
+    rng = np.random.RandomState(0)
+    dist_matrix = rng.random_sample((N, N))
+
+    # make symmetric: distances are not direction-dependent
+    dist_matrix = dist_matrix + dist_matrix.T
+
+    # make graph sparse
+    i = (rng.randint(N, size=N * N // 2), rng.randint(N, size=N * N // 2))
+    dist_matrix[i] = 0
+
+    # set diagonal to zero
+    dist_matrix.flat[:: N + 1] = 0
+
+    return dist_matrix
+
+
+def test_shortest_path():
+    dist_matrix = generate_graph(20)
+    # We compare path length and not costs (-> set distances to 0 or 1)
+    dist_matrix[dist_matrix != 0] = 1
+
+    for directed in (True, False):
+        if not directed:
+            dist_matrix = np.minimum(dist_matrix, dist_matrix.T)
+
+        graph_py = floyd_warshall_slow(dist_matrix.copy(), directed)
+        for i in range(dist_matrix.shape[0]):
+            # Non-reachable nodes have distance 0 in graph_py
+            dist_dict = defaultdict(int)
+            dist_dict.update(single_source_shortest_path_length(dist_matrix, i))
+
+            for j in range(graph_py[i].shape[0]):
+                assert_array_almost_equal(dist_dict[j], graph_py[i, j])
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_show_versions.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_show_versions.py
@@ -0,0 +1,40 @@
+from threadpoolctl import threadpool_info
+
+from sklearn.utils._show_versions import _get_deps_info, _get_sys_info, show_versions
+from sklearn.utils._testing import ignore_warnings
+
+
+def test_get_sys_info():
+    sys_info = _get_sys_info()
+
+    assert "python" in sys_info
+    assert "executable" in sys_info
+    assert "machine" in sys_info
+
+
+def test_get_deps_info():
+    with ignore_warnings():
+        deps_info = _get_deps_info()
+
+    assert "pip" in deps_info
+    assert "setuptools" in deps_info
+    assert "sklearn" in deps_info
+    assert "numpy" in deps_info
+    assert "scipy" in deps_info
+    assert "Cython" in deps_info
+    assert "pandas" in deps_info
+    assert "matplotlib" in deps_info
+    assert "joblib" in deps_info
+
+
+def test_show_versions(capsys):
+    with ignore_warnings():
+        show_versions()
+        out, err = capsys.readouterr()
+
+    assert "python" in out
+    assert "numpy" in out
+
+    info = threadpool_info()
+    if info:
+        assert "threadpoolctl info:" in out
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_sparsefuncs.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_sparsefuncs.py
@@ -0,0 +1,998 @@
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.random import RandomState
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from scipy import linalg
+
+from sklearn.datasets import make_classification
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS, LIL_CONTAINERS
+from sklearn.utils.sparsefuncs import (
+    _implicit_column_offset,
+    count_nonzero,
+    csc_median_axis_0,
+    incr_mean_variance_axis,
+    inplace_column_scale,
+    inplace_row_scale,
+    inplace_swap_column,
+    inplace_swap_row,
+    mean_variance_axis,
+    min_max_axis,
+)
+from sklearn.utils.sparsefuncs_fast import (
+    assign_rows_csr,
+    csr_row_norms,
+    inplace_csr_row_normalize_l1,
+    inplace_csr_row_normalize_l2,
+)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_mean_variance_axis0(csc_container, csr_container, lil_container):
+    X, _ = make_classification(5, 4, random_state=0)
+    # Sparsify the array a little bit
+    X[0, 0] = 0
+    X[2, 1] = 0
+    X[4, 3] = 0
+    X_lil = lil_container(X)
+    X_lil[1, 0] = 0
+    X[1, 0] = 0
+
+    with pytest.raises(TypeError):
+        mean_variance_axis(X_lil, axis=0)
+
+    X_csr = csr_container(X_lil)
+    X_csc = csc_container(X_lil)
+
+    expected_dtypes = [
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ]
+
+    for input_dtype, output_dtype in expected_dtypes:
+        X_test = X.astype(input_dtype)
+        for X_sparse in (X_csr, X_csc):
+            X_sparse = X_sparse.astype(input_dtype)
+            X_means, X_vars = mean_variance_axis(X_sparse, axis=0)
+            assert X_means.dtype == output_dtype
+            assert X_vars.dtype == output_dtype
+            assert_array_almost_equal(X_means, np.mean(X_test, axis=0))
+            assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("sparse_constructor", CSC_CONTAINERS + CSR_CONTAINERS)
+def test_mean_variance_axis0_precision(dtype, sparse_constructor):
+    # Check that there's no big loss of precision when the real variance is
+    # exactly 0. (#19766)
+    rng = np.random.RandomState(0)
+    X = np.full(fill_value=100.0, shape=(1000, 1), dtype=dtype)
+    # Add some missing records which should be ignored:
+    missing_indices = rng.choice(np.arange(X.shape[0]), 10, replace=False)
+    X[missing_indices, 0] = np.nan
+    X = sparse_constructor(X)
+
+    # Random positive weights:
+    sample_weight = rng.rand(X.shape[0]).astype(dtype)
+
+    _, var = mean_variance_axis(X, weights=sample_weight, axis=0)
+
+    assert var < np.finfo(dtype).eps
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_mean_variance_axis1(csc_container, csr_container, lil_container):
+    X, _ = make_classification(5, 4, random_state=0)
+    # Sparsify the array a little bit
+    X[0, 0] = 0
+    X[2, 1] = 0
+    X[4, 3] = 0
+    X_lil = lil_container(X)
+    X_lil[1, 0] = 0
+    X[1, 0] = 0
+
+    with pytest.raises(TypeError):
+        mean_variance_axis(X_lil, axis=1)
+
+    X_csr = csr_container(X_lil)
+    X_csc = csc_container(X_lil)
+
+    expected_dtypes = [
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ]
+
+    for input_dtype, output_dtype in expected_dtypes:
+        X_test = X.astype(input_dtype)
+        for X_sparse in (X_csr, X_csc):
+            X_sparse = X_sparse.astype(input_dtype)
+            X_means, X_vars = mean_variance_axis(X_sparse, axis=0)
+            assert X_means.dtype == output_dtype
+            assert X_vars.dtype == output_dtype
+            assert_array_almost_equal(X_means, np.mean(X_test, axis=0))
+            assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
+
+
+@pytest.mark.parametrize(
+    ["Xw", "X", "weights"],
+    [
+        ([[0, 0, 1], [0, 2, 3]], [[0, 0, 1], [0, 2, 3]], [1, 1, 1]),
+        ([[0, 0, 1], [0, 1, 1]], [[0, 0, 0, 1], [0, 1, 1, 1]], [1, 2, 1]),
+        ([[0, 0, 1], [0, 1, 1]], [[0, 0, 1], [0, 1, 1]], None),
+        (
+            [[0, np.nan, 2], [0, np.nan, np.nan]],
+            [[0, np.nan, 2], [0, np.nan, np.nan]],
+            [1.0, 1.0, 1.0],
+        ),
+        (
+            [[0, 0], [1, np.nan], [2, 0], [0, 3], [np.nan, np.nan], [np.nan, 2]],
+            [
+                [0, 0, 0],
+                [1, 1, np.nan],
+                [2, 2, 0],
+                [0, 0, 3],
+                [np.nan, np.nan, np.nan],
+                [np.nan, np.nan, 2],
+            ],
+            [2.0, 1.0],
+        ),
+        (
+            [[1, 0, 1], [0, 3, 1]],
+            [[1, 0, 0, 0, 1], [0, 3, 3, 3, 1]],
+            np.array([1, 3, 1]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("sparse_constructor", CSC_CONTAINERS + CSR_CONTAINERS)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_incr_mean_variance_axis_weighted_axis1(
+    Xw, X, weights, sparse_constructor, dtype
+):
+    axis = 1
+    Xw_sparse = sparse_constructor(Xw).astype(dtype)
+    X_sparse = sparse_constructor(X).astype(dtype)
+
+    last_mean = np.zeros(np.shape(Xw)[0], dtype=dtype)
+    last_var = np.zeros_like(last_mean, dtype=dtype)
+    last_n = np.zeros_like(last_mean, dtype=np.int64)
+    means0, vars0, n_incr0 = incr_mean_variance_axis(
+        X=X_sparse,
+        axis=axis,
+        last_mean=last_mean,
+        last_var=last_var,
+        last_n=last_n,
+        weights=None,
+    )
+
+    means_w0, vars_w0, n_incr_w0 = incr_mean_variance_axis(
+        X=Xw_sparse,
+        axis=axis,
+        last_mean=last_mean,
+        last_var=last_var,
+        last_n=last_n,
+        weights=weights,
+    )
+
+    assert means_w0.dtype == dtype
+    assert vars_w0.dtype == dtype
+    assert n_incr_w0.dtype == dtype
+
+    means_simple, vars_simple = mean_variance_axis(X=X_sparse, axis=axis)
+
+    assert_array_almost_equal(means0, means_w0)
+    assert_array_almost_equal(means0, means_simple)
+    assert_array_almost_equal(vars0, vars_w0)
+    assert_array_almost_equal(vars0, vars_simple)
+    assert_array_almost_equal(n_incr0, n_incr_w0)
+
+    # check second round for incremental
+    means1, vars1, n_incr1 = incr_mean_variance_axis(
+        X=X_sparse,
+        axis=axis,
+        last_mean=means0,
+        last_var=vars0,
+        last_n=n_incr0,
+        weights=None,
+    )
+
+    means_w1, vars_w1, n_incr_w1 = incr_mean_variance_axis(
+        X=Xw_sparse,
+        axis=axis,
+        last_mean=means_w0,
+        last_var=vars_w0,
+        last_n=n_incr_w0,
+        weights=weights,
+    )
+
+    assert_array_almost_equal(means1, means_w1)
+    assert_array_almost_equal(vars1, vars_w1)
+    assert_array_almost_equal(n_incr1, n_incr_w1)
+
+    assert means_w1.dtype == dtype
+    assert vars_w1.dtype == dtype
+    assert n_incr_w1.dtype == dtype
+
+
+@pytest.mark.parametrize(
+    ["Xw", "X", "weights"],
+    [
+        ([[0, 0, 1], [0, 2, 3]], [[0, 0, 1], [0, 2, 3]], [1, 1]),
+        ([[0, 0, 1], [0, 1, 1]], [[0, 0, 1], [0, 1, 1], [0, 1, 1]], [1, 2]),
+        ([[0, 0, 1], [0, 1, 1]], [[0, 0, 1], [0, 1, 1]], None),
+        (
+            [[0, np.nan, 2], [0, np.nan, np.nan]],
+            [[0, np.nan, 2], [0, np.nan, np.nan]],
+            [1.0, 1.0],
+        ),
+        (
+            [[0, 0, 1, np.nan, 2, 0], [0, 3, np.nan, np.nan, np.nan, 2]],
+            [
+                [0, 0, 1, np.nan, 2, 0],
+                [0, 0, 1, np.nan, 2, 0],
+                [0, 3, np.nan, np.nan, np.nan, 2],
+            ],
+            [2.0, 1.0],
+        ),
+        (
+            [[1, 0, 1], [0, 0, 1]],
+            [[1, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]],
+            np.array([1, 3]),
+        ),
+    ],
+)
+@pytest.mark.parametrize("sparse_constructor", CSC_CONTAINERS + CSR_CONTAINERS)
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_incr_mean_variance_axis_weighted_axis0(
+    Xw, X, weights, sparse_constructor, dtype
+):
+    axis = 0
+    Xw_sparse = sparse_constructor(Xw).astype(dtype)
+    X_sparse = sparse_constructor(X).astype(dtype)
+
+    last_mean = np.zeros(np.size(Xw, 1), dtype=dtype)
+    last_var = np.zeros_like(last_mean)
+    last_n = np.zeros_like(last_mean, dtype=np.int64)
+    means0, vars0, n_incr0 = incr_mean_variance_axis(
+        X=X_sparse,
+        axis=axis,
+        last_mean=last_mean,
+        last_var=last_var,
+        last_n=last_n,
+        weights=None,
+    )
+
+    means_w0, vars_w0, n_incr_w0 = incr_mean_variance_axis(
+        X=Xw_sparse,
+        axis=axis,
+        last_mean=last_mean,
+        last_var=last_var,
+        last_n=last_n,
+        weights=weights,
+    )
+
+    assert means_w0.dtype == dtype
+    assert vars_w0.dtype == dtype
+    assert n_incr_w0.dtype == dtype
+
+    means_simple, vars_simple = mean_variance_axis(X=X_sparse, axis=axis)
+
+    assert_array_almost_equal(means0, means_w0)
+    assert_array_almost_equal(means0, means_simple)
+    assert_array_almost_equal(vars0, vars_w0)
+    assert_array_almost_equal(vars0, vars_simple)
+    assert_array_almost_equal(n_incr0, n_incr_w0)
+
+    # check second round for incremental
+    means1, vars1, n_incr1 = incr_mean_variance_axis(
+        X=X_sparse,
+        axis=axis,
+        last_mean=means0,
+        last_var=vars0,
+        last_n=n_incr0,
+        weights=None,
+    )
+
+    means_w1, vars_w1, n_incr_w1 = incr_mean_variance_axis(
+        X=Xw_sparse,
+        axis=axis,
+        last_mean=means_w0,
+        last_var=vars_w0,
+        last_n=n_incr_w0,
+        weights=weights,
+    )
+
+    assert_array_almost_equal(means1, means_w1)
+    assert_array_almost_equal(vars1, vars_w1)
+    assert_array_almost_equal(n_incr1, n_incr_w1)
+
+    assert means_w1.dtype == dtype
+    assert vars_w1.dtype == dtype
+    assert n_incr_w1.dtype == dtype
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
+def test_incr_mean_variance_axis(csc_container, csr_container, lil_container):
+    for axis in [0, 1]:
+        rng = np.random.RandomState(0)
+        n_features = 50
+        n_samples = 10
+        if axis == 0:
+            data_chunks = [rng.randint(0, 2, size=n_features) for i in range(n_samples)]
+        else:
+            data_chunks = [rng.randint(0, 2, size=n_samples) for i in range(n_features)]
+
+        # default params for incr_mean_variance
+        last_mean = np.zeros(n_features) if axis == 0 else np.zeros(n_samples)
+        last_var = np.zeros_like(last_mean)
+        last_n = np.zeros_like(last_mean, dtype=np.int64)
+
+        # Test errors
+        X = np.array(data_chunks[0])
+        X = np.atleast_2d(X)
+        X = X.T if axis == 1 else X
+        X_lil = lil_container(X)
+        X_csr = csr_container(X_lil)
+
+        with pytest.raises(TypeError):
+            incr_mean_variance_axis(
+                X=axis, axis=last_mean, last_mean=last_var, last_var=last_n
+            )
+        with pytest.raises(TypeError):
+            incr_mean_variance_axis(
+                X_lil, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+            )
+
+        # Test _incr_mean_and_var with a 1 row input
+        X_means, X_vars = mean_variance_axis(X_csr, axis)
+        X_means_incr, X_vars_incr, n_incr = incr_mean_variance_axis(
+            X_csr, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+        )
+        assert_array_almost_equal(X_means, X_means_incr)
+        assert_array_almost_equal(X_vars, X_vars_incr)
+        # X.shape[axis] picks # samples
+        assert_array_equal(X.shape[axis], n_incr)
+
+        X_csc = csc_container(X_lil)
+        X_means, X_vars = mean_variance_axis(X_csc, axis)
+        assert_array_almost_equal(X_means, X_means_incr)
+        assert_array_almost_equal(X_vars, X_vars_incr)
+        assert_array_equal(X.shape[axis], n_incr)
+
+        # Test _incremental_mean_and_var with whole data
+        X = np.vstack(data_chunks)
+        X = X.T if axis == 1 else X
+        X_lil = lil_container(X)
+        X_csr = csr_container(X_lil)
+        X_csc = csc_container(X_lil)
+
+        expected_dtypes = [
+            (np.float32, np.float32),
+            (np.float64, np.float64),
+            (np.int32, np.float64),
+            (np.int64, np.float64),
+        ]
+
+        for input_dtype, output_dtype in expected_dtypes:
+            for X_sparse in (X_csr, X_csc):
+                X_sparse = X_sparse.astype(input_dtype)
+                last_mean = last_mean.astype(output_dtype)
+                last_var = last_var.astype(output_dtype)
+                X_means, X_vars = mean_variance_axis(X_sparse, axis)
+                X_means_incr, X_vars_incr, n_incr = incr_mean_variance_axis(
+                    X_sparse,
+                    axis=axis,
+                    last_mean=last_mean,
+                    last_var=last_var,
+                    last_n=last_n,
+                )
+                assert X_means_incr.dtype == output_dtype
+                assert X_vars_incr.dtype == output_dtype
+                assert_array_almost_equal(X_means, X_means_incr)
+                assert_array_almost_equal(X_vars, X_vars_incr)
+                assert_array_equal(X.shape[axis], n_incr)
+
+
+@pytest.mark.parametrize("sparse_constructor", CSC_CONTAINERS + CSR_CONTAINERS)
+def test_incr_mean_variance_axis_dim_mismatch(sparse_constructor):
+    """Check that we raise proper error when axis=1 and the dimension mismatch.
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/pull/18655
+    """
+    n_samples, n_features = 60, 4
+    rng = np.random.RandomState(42)
+    X = sparse_constructor(rng.rand(n_samples, n_features))
+
+    last_mean = np.zeros(n_features)
+    last_var = np.zeros_like(last_mean)
+    last_n = np.zeros(last_mean.shape, dtype=np.int64)
+
+    kwargs = dict(last_mean=last_mean, last_var=last_var, last_n=last_n)
+    mean0, var0, _ = incr_mean_variance_axis(X, axis=0, **kwargs)
+    assert_allclose(np.mean(X.toarray(), axis=0), mean0)
+    assert_allclose(np.var(X.toarray(), axis=0), var0)
+
+    # test ValueError if axis=1 and last_mean.size == n_features
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(X, axis=1, **kwargs)
+
+    # test inconsistent shapes of last_mean, last_var, last_n
+    kwargs = dict(last_mean=last_mean[:-1], last_var=last_var, last_n=last_n)
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(X, axis=0, **kwargs)
+
+
+@pytest.mark.parametrize(
+    "X1, X2",
+    [
+        (
+            sp.random(5, 2, density=0.8, format="csr", random_state=0),
+            sp.random(13, 2, density=0.8, format="csr", random_state=0),
+        ),
+        (
+            sp.random(5, 2, density=0.8, format="csr", random_state=0),
+            sp.hstack(
+                [
+                    np.full((13, 1), fill_value=np.nan),
+                    sp.random(13, 1, density=0.8, random_state=42),
+                ],
+                format="csr",
+            ),
+        ),
+    ],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_incr_mean_variance_axis_equivalence_mean_variance(X1, X2, csr_container):
+    # non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/16448
+    # check that computing the incremental mean and variance is equivalent to
+    # computing the mean and variance on the stacked dataset.
+    X1 = csr_container(X1)
+    X2 = csr_container(X2)
+    axis = 0
+    last_mean, last_var = np.zeros(X1.shape[1]), np.zeros(X1.shape[1])
+    last_n = np.zeros(X1.shape[1], dtype=np.int64)
+    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
+        X1, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
+        X2, axis=axis, last_mean=updated_mean, last_var=updated_var, last_n=updated_n
+    )
+    X = sp.vstack([X1, X2])
+    assert_allclose(updated_mean, np.nanmean(X.toarray(), axis=axis))
+    assert_allclose(updated_var, np.nanvar(X.toarray(), axis=axis))
+    assert_allclose(updated_n, np.count_nonzero(~np.isnan(X.toarray()), axis=0))
+
+
+def test_incr_mean_variance_no_new_n():
+    # check the behaviour when we update the variance with an empty matrix
+    axis = 0
+    X1 = sp.random(5, 1, density=0.8, random_state=0).tocsr()
+    X2 = sp.random(0, 1, density=0.8, random_state=0).tocsr()
+    last_mean, last_var = np.zeros(X1.shape[1]), np.zeros(X1.shape[1])
+    last_n = np.zeros(X1.shape[1], dtype=np.int64)
+    last_mean, last_var, last_n = incr_mean_variance_axis(
+        X1, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    # update statistic with a column which should ignored
+    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
+        X2, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    assert_allclose(updated_mean, last_mean)
+    assert_allclose(updated_var, last_var)
+    assert_allclose(updated_n, last_n)
+
+
+def test_incr_mean_variance_n_float():
+    # check the behaviour when last_n is just a number
+    axis = 0
+    X = sp.random(5, 2, density=0.8, random_state=0).tocsr()
+    last_mean, last_var = np.zeros(X.shape[1]), np.zeros(X.shape[1])
+    last_n = 0
+    _, _, new_n = incr_mean_variance_axis(
+        X, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    assert_allclose(new_n, np.full(X.shape[1], X.shape[0]))
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize("sparse_constructor", CSC_CONTAINERS + CSR_CONTAINERS)
+def test_incr_mean_variance_axis_ignore_nan(axis, sparse_constructor):
+    old_means = np.array([535.0, 535.0, 535.0, 535.0])
+    old_variances = np.array([4225.0, 4225.0, 4225.0, 4225.0])
+    old_sample_count = np.array([2, 2, 2, 2], dtype=np.int64)
+
+    X = sparse_constructor(
+        np.array([[170, 170, 170, 170], [430, 430, 430, 430], [300, 300, 300, 300]])
+    )
+
+    X_nan = sparse_constructor(
+        np.array(
+            [
+                [170, np.nan, 170, 170],
+                [np.nan, 170, 430, 430],
+                [430, 430, np.nan, 300],
+                [300, 300, 300, np.nan],
+            ]
+        )
+    )
+
+    # we avoid creating specific data for axis 0 and 1: translating the data is
+    # enough.
+    if axis:
+        X = X.T
+        X_nan = X_nan.T
+
+    # take a copy of the old statistics since they are modified in place.
+    X_means, X_vars, X_sample_count = incr_mean_variance_axis(
+        X,
+        axis=axis,
+        last_mean=old_means.copy(),
+        last_var=old_variances.copy(),
+        last_n=old_sample_count.copy(),
+    )
+    X_nan_means, X_nan_vars, X_nan_sample_count = incr_mean_variance_axis(
+        X_nan,
+        axis=axis,
+        last_mean=old_means.copy(),
+        last_var=old_variances.copy(),
+        last_n=old_sample_count.copy(),
+    )
+
+    assert_allclose(X_nan_means, X_means)
+    assert_allclose(X_nan_vars, X_vars)
+    assert_allclose(X_nan_sample_count, X_sample_count)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_mean_variance_illegal_axis(csr_container):
+    X, _ = make_classification(5, 4, random_state=0)
+    # Sparsify the array a little bit
+    X[0, 0] = 0
+    X[2, 1] = 0
+    X[4, 3] = 0
+    X_csr = csr_container(X)
+    with pytest.raises(ValueError):
+        mean_variance_axis(X_csr, axis=-3)
+    with pytest.raises(ValueError):
+        mean_variance_axis(X_csr, axis=2)
+    with pytest.raises(ValueError):
+        mean_variance_axis(X_csr, axis=-1)
+
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(
+            X_csr, axis=-3, last_mean=None, last_var=None, last_n=None
+        )
+
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(
+            X_csr, axis=2, last_mean=None, last_var=None, last_n=None
+        )
+
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(
+            X_csr, axis=-1, last_mean=None, last_var=None, last_n=None
+        )
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_densify_rows(csr_container):
+    for dtype in (np.float32, np.float64):
+        X = csr_container(
+            [[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=dtype
+        )
+        X_rows = np.array([0, 2, 3], dtype=np.intp)
+        out = np.ones((6, X.shape[1]), dtype=dtype)
+        out_rows = np.array([1, 3, 4], dtype=np.intp)
+
+        expect = np.ones_like(out)
+        expect[out_rows] = X[X_rows, :].toarray()
+
+        assign_rows_csr(X, X_rows, out_rows, out)
+        assert_array_equal(out, expect)
+
+
+def test_inplace_column_scale():
+    rng = np.random.RandomState(0)
+    X = sp.rand(100, 200, 0.05)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    scale = rng.rand(200)
+    XA *= scale
+
+    inplace_column_scale(Xc, scale)
+    inplace_column_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+    X = X.astype(np.float32)
+    scale = scale.astype(np.float32)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    XA *= scale
+    inplace_column_scale(Xc, scale)
+    inplace_column_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+
+def test_inplace_row_scale():
+    rng = np.random.RandomState(0)
+    X = sp.rand(100, 200, 0.05)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    scale = rng.rand(100)
+    XA *= scale.reshape(-1, 1)
+
+    inplace_row_scale(Xc, scale)
+    inplace_row_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+    X = X.astype(np.float32)
+    scale = scale.astype(np.float32)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    XA *= scale.reshape(-1, 1)
+    inplace_row_scale(Xc, scale)
+    inplace_row_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_inplace_swap_row(csc_container, csr_container):
+    X = np.array(
+        [[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64
+    )
+    X_csr = csr_container(X)
+    X_csc = csc_container(X)
+
+    swap = linalg.get_blas_funcs(("swap",), (X,))
+    swap = swap[0]
+    X[0], X[-1] = swap(X[0], X[-1])
+    inplace_swap_row(X_csr, 0, -1)
+    inplace_swap_row(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+
+    X[2], X[3] = swap(X[2], X[3])
+    inplace_swap_row(X_csr, 2, 3)
+    inplace_swap_row(X_csc, 2, 3)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_row(X_csr.tolil())
+
+    X = np.array(
+        [[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float32
+    )
+    X_csr = csr_container(X)
+    X_csc = csc_container(X)
+    swap = linalg.get_blas_funcs(("swap",), (X,))
+    swap = swap[0]
+    X[0], X[-1] = swap(X[0], X[-1])
+    inplace_swap_row(X_csr, 0, -1)
+    inplace_swap_row(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    X[2], X[3] = swap(X[2], X[3])
+    inplace_swap_row(X_csr, 2, 3)
+    inplace_swap_row(X_csc, 2, 3)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_row(X_csr.tolil())
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_inplace_swap_column(csc_container, csr_container):
+    X = np.array(
+        [[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64
+    )
+    X_csr = csr_container(X)
+    X_csc = csc_container(X)
+
+    swap = linalg.get_blas_funcs(("swap",), (X,))
+    swap = swap[0]
+    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
+    inplace_swap_column(X_csr, 0, -1)
+    inplace_swap_column(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+
+    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
+    inplace_swap_column(X_csr, 0, 1)
+    inplace_swap_column(X_csc, 0, 1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_column(X_csr.tolil())
+
+    X = np.array(
+        [[0, 3, 0], [2, 4, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float32
+    )
+    X_csr = csr_container(X)
+    X_csc = csc_container(X)
+    swap = linalg.get_blas_funcs(("swap",), (X,))
+    swap = swap[0]
+    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
+    inplace_swap_column(X_csr, 0, -1)
+    inplace_swap_column(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
+    inplace_swap_column(X_csr, 0, 1)
+    inplace_swap_column(X_csc, 0, 1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_column(X_csr.tolil())
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize("sparse_format", CSC_CONTAINERS + CSR_CONTAINERS)
+@pytest.mark.parametrize(
+    "missing_values, min_func, max_func, ignore_nan",
+    [(0, np.min, np.max, False), (np.nan, np.nanmin, np.nanmax, True)],
+)
+@pytest.mark.parametrize("large_indices", [True, False])
+def test_min_max(
+    dtype,
+    axis,
+    sparse_format,
+    missing_values,
+    min_func,
+    max_func,
+    ignore_nan,
+    large_indices,
+):
+    X = np.array(
+        [
+            [0, 3, 0],
+            [2, -1, missing_values],
+            [0, 0, 0],
+            [9, missing_values, 7],
+            [4, 0, 5],
+        ],
+        dtype=dtype,
+    )
+    X_sparse = sparse_format(X)
+
+    if large_indices:
+        X_sparse.indices = X_sparse.indices.astype("int64")
+        X_sparse.indptr = X_sparse.indptr.astype("int64")
+
+    mins_sparse, maxs_sparse = min_max_axis(X_sparse, axis=axis, ignore_nan=ignore_nan)
+    assert_array_equal(mins_sparse, min_func(X, axis=axis))
+    assert_array_equal(maxs_sparse, max_func(X, axis=axis))
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_min_max_axis_errors(csc_container, csr_container):
+    X = np.array(
+        [[0, 3, 0], [2, -1, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64
+    )
+    X_csr = csr_container(X)
+    X_csc = csc_container(X)
+    with pytest.raises(TypeError):
+        min_max_axis(X_csr.tolil(), axis=0)
+    with pytest.raises(ValueError):
+        min_max_axis(X_csr, axis=2)
+    with pytest.raises(ValueError):
+        min_max_axis(X_csc, axis=-3)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_count_nonzero(csc_container, csr_container):
+    X = np.array(
+        [[0, 3, 0], [2, -1, 0], [0, 0, 0], [9, 8, 7], [4, 0, 5]], dtype=np.float64
+    )
+    X_csr = csr_container(X)
+    X_csc = csc_container(X)
+    X_nonzero = X != 0
+    sample_weight = [0.5, 0.2, 0.3, 0.1, 0.1]
+    X_nonzero_weighted = X_nonzero * np.array(sample_weight)[:, None]
+
+    for axis in [0, 1, -1, -2, None]:
+        assert_array_almost_equal(
+            count_nonzero(X_csr, axis=axis), X_nonzero.sum(axis=axis)
+        )
+        assert_array_almost_equal(
+            count_nonzero(X_csr, axis=axis, sample_weight=sample_weight),
+            X_nonzero_weighted.sum(axis=axis),
+        )
+
+    with pytest.raises(TypeError):
+        count_nonzero(X_csc)
+    with pytest.raises(ValueError):
+        count_nonzero(X_csr, axis=2)
+
+    assert count_nonzero(X_csr, axis=0).dtype == count_nonzero(X_csr, axis=1).dtype
+    assert (
+        count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype
+        == count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype
+    )
+
+    # Check dtypes with large sparse matrices too
+    # XXX: test fails on 32bit (Windows/Linux)
+    try:
+        X_csr.indices = X_csr.indices.astype(np.int64)
+        X_csr.indptr = X_csr.indptr.astype(np.int64)
+        assert count_nonzero(X_csr, axis=0).dtype == count_nonzero(X_csr, axis=1).dtype
+        assert (
+            count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype
+            == count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype
+        )
+    except TypeError as e:
+        assert "according to the rule 'safe'" in e.args[0] and np.intp().nbytes < 8, e
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_csc_row_median(csc_container, csr_container):
+    # Test csc_row_median actually calculates the median.
+
+    # Test that it gives the same output when X is dense.
+    rng = np.random.RandomState(0)
+    X = rng.rand(100, 50)
+    dense_median = np.median(X, axis=0)
+    csc = csc_container(X)
+    sparse_median = csc_median_axis_0(csc)
+    assert_array_equal(sparse_median, dense_median)
+
+    # Test that it gives the same output when X is sparse
+    X = rng.rand(51, 100)
+    X[X < 0.7] = 0.0
+    ind = rng.randint(0, 50, 10)
+    X[ind] = -X[ind]
+    csc = csc_container(X)
+    dense_median = np.median(X, axis=0)
+    sparse_median = csc_median_axis_0(csc)
+    assert_array_equal(sparse_median, dense_median)
+
+    # Test for toy data.
+    X = [[0, -2], [-1, -1], [1, 0], [2, 1]]
+    csc = csc_container(X)
+    assert_array_equal(csc_median_axis_0(csc), np.array([0.5, -0.5]))
+    X = [[0, -2], [-1, -5], [1, -3]]
+    csc = csc_container(X)
+    assert_array_equal(csc_median_axis_0(csc), np.array([0.0, -3]))
+
+    # Test that it raises an Error for non-csc matrices.
+    with pytest.raises(TypeError):
+        csc_median_axis_0(csr_container(X))
+
+
+@pytest.mark.parametrize(
+    "inplace_csr_row_normalize",
+    (inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2),
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_inplace_normalize(csr_container, inplace_csr_row_normalize):
+    if csr_container is sp.csr_matrix:
+        ones = np.ones((10, 1))
+    else:
+        ones = np.ones(10)
+    rs = RandomState(10)
+
+    for dtype in (np.float64, np.float32):
+        X = rs.randn(10, 5).astype(dtype)
+        X_csr = csr_container(X)
+        for index_dtype in [np.int32, np.int64]:
+            # csr_matrix will use int32 indices by default,
+            # up-casting those to int64 when necessary
+            if index_dtype is np.int64:
+                X_csr.indptr = X_csr.indptr.astype(index_dtype)
+                X_csr.indices = X_csr.indices.astype(index_dtype)
+            assert X_csr.indices.dtype == index_dtype
+            assert X_csr.indptr.dtype == index_dtype
+            inplace_csr_row_normalize(X_csr)
+            assert X_csr.dtype == dtype
+            if inplace_csr_row_normalize is inplace_csr_row_normalize_l2:
+                X_csr.data **= 2
+            assert_array_almost_equal(np.abs(X_csr).sum(axis=1), ones)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_csr_row_norms(dtype):
+    # checks that csr_row_norms returns the same output as
+    # scipy.sparse.linalg.norm, and that the dype is the same as X.dtype.
+    X = sp.random(100, 10, format="csr", dtype=dtype, random_state=42)
+
+    scipy_norms = sp.linalg.norm(X, axis=1) ** 2
+    norms = csr_row_norms(X)
+
+    assert norms.dtype == dtype
+    rtol = 1e-6 if dtype == np.float32 else 1e-7
+    assert_allclose(norms, scipy_norms, rtol=rtol)
+
+
+@pytest.fixture(scope="module", params=CSR_CONTAINERS + CSC_CONTAINERS)
+def centered_matrices(request):
+    """Returns equivalent tuple[sp.linalg.LinearOperator, np.ndarray]."""
+    sparse_container = request.param
+
+    random_state = np.random.default_rng(42)
+
+    X_sparse = sparse_container(
+        sp.random(500, 100, density=0.1, format="csr", random_state=random_state)
+    )
+    X_dense = X_sparse.toarray()
+    mu = np.asarray(X_sparse.mean(axis=0)).ravel()
+
+    X_sparse_centered = _implicit_column_offset(X_sparse, mu)
+    X_dense_centered = X_dense - mu
+
+    return X_sparse_centered, X_dense_centered
+
+
+def test_implicit_center_matmat(global_random_seed, centered_matrices):
+    X_sparse_centered, X_dense_centered = centered_matrices
+    rng = np.random.default_rng(global_random_seed)
+    Y = rng.standard_normal((X_dense_centered.shape[1], 50))
+    assert_allclose(X_dense_centered @ Y, X_sparse_centered.matmat(Y))
+    assert_allclose(X_dense_centered @ Y, X_sparse_centered @ Y)
+
+
+def test_implicit_center_matvec(global_random_seed, centered_matrices):
+    X_sparse_centered, X_dense_centered = centered_matrices
+    rng = np.random.default_rng(global_random_seed)
+    y = rng.standard_normal(X_dense_centered.shape[1])
+    assert_allclose(X_dense_centered @ y, X_sparse_centered.matvec(y))
+    assert_allclose(X_dense_centered @ y, X_sparse_centered @ y)
+
+
+def test_implicit_center_rmatmat(global_random_seed, centered_matrices):
+    X_sparse_centered, X_dense_centered = centered_matrices
+    rng = np.random.default_rng(global_random_seed)
+    Y = rng.standard_normal((X_dense_centered.shape[0], 50))
+    assert_allclose(X_dense_centered.T @ Y, X_sparse_centered.rmatmat(Y))
+    assert_allclose(X_dense_centered.T @ Y, X_sparse_centered.T @ Y)
+
+
+def test_implit_center_rmatvec(global_random_seed, centered_matrices):
+    X_sparse_centered, X_dense_centered = centered_matrices
+    rng = np.random.default_rng(global_random_seed)
+    y = rng.standard_normal(X_dense_centered.shape[0])
+    assert_allclose(X_dense_centered.T @ y, X_sparse_centered.rmatvec(y))
+    assert_allclose(X_dense_centered.T @ y, X_sparse_centered.T @ y)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_stats.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_stats.py
@@ -0,0 +1,98 @@
+import numpy as np
+from numpy.testing import assert_allclose
+from pytest import approx
+
+from sklearn.utils.stats import _weighted_percentile
+
+
+def test_weighted_percentile():
+    y = np.empty(102, dtype=np.float64)
+    y[:50] = 0
+    y[-51:] = 2
+    y[-1] = 100000
+    y[50] = 1
+    sw = np.ones(102, dtype=np.float64)
+    sw[-1] = 0.0
+    score = _weighted_percentile(y, sw, 50)
+    assert approx(score) == 1
+
+
+def test_weighted_percentile_equal():
+    y = np.empty(102, dtype=np.float64)
+    y.fill(0.0)
+    sw = np.ones(102, dtype=np.float64)
+    sw[-1] = 0.0
+    score = _weighted_percentile(y, sw, 50)
+    assert score == 0
+
+
+def test_weighted_percentile_zero_weight():
+    y = np.empty(102, dtype=np.float64)
+    y.fill(1.0)
+    sw = np.ones(102, dtype=np.float64)
+    sw.fill(0.0)
+    score = _weighted_percentile(y, sw, 50)
+    assert approx(score) == 1.0
+
+
+def test_weighted_percentile_zero_weight_zero_percentile():
+    y = np.array([0, 1, 2, 3, 4, 5])
+    sw = np.array([0, 0, 1, 1, 1, 0])
+    score = _weighted_percentile(y, sw, 0)
+    assert approx(score) == 2
+
+    score = _weighted_percentile(y, sw, 50)
+    assert approx(score) == 3
+
+    score = _weighted_percentile(y, sw, 100)
+    assert approx(score) == 4
+
+
+def test_weighted_median_equal_weights():
+    # Checks weighted percentile=0.5 is same as median when weights equal
+    rng = np.random.RandomState(0)
+    # Odd size as _weighted_percentile takes lower weighted percentile
+    x = rng.randint(10, size=11)
+    weights = np.ones(x.shape)
+
+    median = np.median(x)
+    w_median = _weighted_percentile(x, weights)
+    assert median == approx(w_median)
+
+
+def test_weighted_median_integer_weights():
+    # Checks weighted percentile=0.5 is same as median when manually weight
+    # data
+    rng = np.random.RandomState(0)
+    x = rng.randint(20, size=10)
+    weights = rng.choice(5, size=10)
+    x_manual = np.repeat(x, weights)
+
+    median = np.median(x_manual)
+    w_median = _weighted_percentile(x, weights)
+
+    assert median == approx(w_median)
+
+
+def test_weighted_percentile_2d():
+    # Check for when array 2D and sample_weight 1D
+    rng = np.random.RandomState(0)
+    x1 = rng.randint(10, size=10)
+    w1 = rng.choice(5, size=10)
+
+    x2 = rng.randint(20, size=10)
+    x_2d = np.vstack((x1, x2)).T
+
+    w_median = _weighted_percentile(x_2d, w1)
+    p_axis_0 = [_weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1])]
+    assert_allclose(w_median, p_axis_0)
+
+    # Check when array and sample_weight boht 2D
+    w2 = rng.choice(5, size=10)
+    w_2d = np.vstack((w1, w2)).T
+
+    w_median = _weighted_percentile(x_2d, w_2d)
+    p_axis_0 = [
+        _weighted_percentile(x_2d[:, i], w_2d[:, i]) for i in range(x_2d.shape[1])
+    ]
+    assert_allclose(w_median, p_axis_0)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_tags.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_tags.py
@@ -0,0 +1,47 @@
+import pytest
+
+from sklearn.base import BaseEstimator
+from sklearn.utils._tags import (
+    _DEFAULT_TAGS,
+    _safe_tags,
+)
+
+
+class NoTagsEstimator:
+    pass
+
+
+class MoreTagsEstimator:
+    def _more_tags(self):
+        return {"allow_nan": True}
+
+
+@pytest.mark.parametrize(
+    "estimator, err_msg",
+    [
+        (BaseEstimator(), "The key xxx is not defined in _get_tags"),
+        (NoTagsEstimator(), "The key xxx is not defined in _DEFAULT_TAGS"),
+    ],
+)
+def test_safe_tags_error(estimator, err_msg):
+    # Check that safe_tags raises error in ambiguous case.
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_tags(estimator, key="xxx")
+
+
+@pytest.mark.parametrize(
+    "estimator, key, expected_results",
+    [
+        (NoTagsEstimator(), None, _DEFAULT_TAGS),
+        (NoTagsEstimator(), "allow_nan", _DEFAULT_TAGS["allow_nan"]),
+        (MoreTagsEstimator(), None, {**_DEFAULT_TAGS, **{"allow_nan": True}}),
+        (MoreTagsEstimator(), "allow_nan", True),
+        (BaseEstimator(), None, _DEFAULT_TAGS),
+        (BaseEstimator(), "allow_nan", _DEFAULT_TAGS["allow_nan"]),
+        (BaseEstimator(), "allow_nan", _DEFAULT_TAGS["allow_nan"]),
+    ],
+)
+def test_safe_tags_no_get_tags(estimator, key, expected_results):
+    # check the behaviour of _safe_tags when an estimator does not implement
+    # _get_tags
+    assert _safe_tags(estimator, key=key) == expected_results
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_testing.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_testing.py
@@ -0,0 +1,923 @@
+import atexit
+import os
+import unittest
+import warnings
+
+import numpy as np
+import pytest
+from scipy import sparse
+
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._testing import (
+    TempMemmap,
+    _convert_container,
+    _delete_folder,
+    _get_warnings_filters_info_list,
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_no_warnings,
+    assert_raise_message,
+    assert_raises,
+    assert_raises_regex,
+    assert_run_python_script_without_output,
+    check_docstring_parameters,
+    create_memmap_backed_data,
+    ignore_warnings,
+    raises,
+    set_random_state,
+    turn_warnings_into_errors,
+)
+from sklearn.utils.deprecation import deprecated
+from sklearn.utils.fixes import (
+    _IS_WASM,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    parse_version,
+    sp_version,
+)
+from sklearn.utils.metaestimators import available_if
+
+
+def test_set_random_state():
+    lda = LinearDiscriminantAnalysis()
+    tree = DecisionTreeClassifier()
+    # Linear Discriminant Analysis doesn't have random state: smoke test
+    set_random_state(lda, 3)
+    set_random_state(tree, 3)
+    assert tree.random_state == 3
+
+
+@pytest.mark.parametrize("csr_container", CSC_CONTAINERS)
+def test_assert_allclose_dense_sparse(csr_container):
+    x = np.arange(9).reshape(3, 3)
+    msg = "Not equal to tolerance "
+    y = csr_container(x)
+    for X in [x, y]:
+        # basic compare
+        with pytest.raises(AssertionError, match=msg):
+            assert_allclose_dense_sparse(X, X * 2)
+        assert_allclose_dense_sparse(X, X)
+
+    with pytest.raises(ValueError, match="Can only compare two sparse"):
+        assert_allclose_dense_sparse(x, y)
+
+    A = sparse.diags(np.ones(5), offsets=0).tocsr()
+    B = csr_container(np.ones((1, 5)))
+    with pytest.raises(AssertionError, match="Arrays are not equal"):
+        assert_allclose_dense_sparse(B, A)
+
+
+def test_assert_raises_msg():
+    with assert_raises_regex(AssertionError, "Hello world"):
+        with assert_raises(ValueError, msg="Hello world"):
+            pass
+
+
+def test_assert_raise_message():
+    def _raise_ValueError(message):
+        raise ValueError(message)
+
+    def _no_raise():
+        pass
+
+    assert_raise_message(ValueError, "test", _raise_ValueError, "test")
+
+    assert_raises(
+        AssertionError,
+        assert_raise_message,
+        ValueError,
+        "something else",
+        _raise_ValueError,
+        "test",
+    )
+
+    assert_raises(
+        ValueError,
+        assert_raise_message,
+        TypeError,
+        "something else",
+        _raise_ValueError,
+        "test",
+    )
+
+    assert_raises(AssertionError, assert_raise_message, ValueError, "test", _no_raise)
+
+    # multiple exceptions in a tuple
+    assert_raises(
+        AssertionError,
+        assert_raise_message,
+        (ValueError, AttributeError),
+        "test",
+        _no_raise,
+    )
+
+
+def test_ignore_warning():
+    # This check that ignore_warning decorator and context manager are working
+    # as expected
+    def _warning_function():
+        warnings.warn("deprecation warning", DeprecationWarning)
+
+    def _multiple_warning_function():
+        warnings.warn("deprecation warning", DeprecationWarning)
+        warnings.warn("deprecation warning")
+
+    # Check the function directly
+    assert_no_warnings(ignore_warnings(_warning_function))
+    assert_no_warnings(ignore_warnings(_warning_function, category=DeprecationWarning))
+    with pytest.warns(DeprecationWarning):
+        ignore_warnings(_warning_function, category=UserWarning)()
+
+    with pytest.warns() as record:
+        ignore_warnings(_multiple_warning_function, category=FutureWarning)()
+    assert len(record) == 2
+    assert isinstance(record[0].message, DeprecationWarning)
+    assert isinstance(record[1].message, UserWarning)
+
+    with pytest.warns() as record:
+        ignore_warnings(_multiple_warning_function, category=UserWarning)()
+    assert len(record) == 1
+    assert isinstance(record[0].message, DeprecationWarning)
+
+    assert_no_warnings(
+        ignore_warnings(_warning_function, category=(DeprecationWarning, UserWarning))
+    )
+
+    # Check the decorator
+    @ignore_warnings
+    def decorator_no_warning():
+        _warning_function()
+        _multiple_warning_function()
+
+    @ignore_warnings(category=(DeprecationWarning, UserWarning))
+    def decorator_no_warning_multiple():
+        _multiple_warning_function()
+
+    @ignore_warnings(category=DeprecationWarning)
+    def decorator_no_deprecation_warning():
+        _warning_function()
+
+    @ignore_warnings(category=UserWarning)
+    def decorator_no_user_warning():
+        _warning_function()
+
+    @ignore_warnings(category=DeprecationWarning)
+    def decorator_no_deprecation_multiple_warning():
+        _multiple_warning_function()
+
+    @ignore_warnings(category=UserWarning)
+    def decorator_no_user_multiple_warning():
+        _multiple_warning_function()
+
+    assert_no_warnings(decorator_no_warning)
+    assert_no_warnings(decorator_no_warning_multiple)
+    assert_no_warnings(decorator_no_deprecation_warning)
+    with pytest.warns(DeprecationWarning):
+        decorator_no_user_warning()
+    with pytest.warns(UserWarning):
+        decorator_no_deprecation_multiple_warning()
+    with pytest.warns(DeprecationWarning):
+        decorator_no_user_multiple_warning()
+
+    # Check the context manager
+    def context_manager_no_warning():
+        with ignore_warnings():
+            _warning_function()
+
+    def context_manager_no_warning_multiple():
+        with ignore_warnings(category=(DeprecationWarning, UserWarning)):
+            _multiple_warning_function()
+
+    def context_manager_no_deprecation_warning():
+        with ignore_warnings(category=DeprecationWarning):
+            _warning_function()
+
+    def context_manager_no_user_warning():
+        with ignore_warnings(category=UserWarning):
+            _warning_function()
+
+    def context_manager_no_deprecation_multiple_warning():
+        with ignore_warnings(category=DeprecationWarning):
+            _multiple_warning_function()
+
+    def context_manager_no_user_multiple_warning():
+        with ignore_warnings(category=UserWarning):
+            _multiple_warning_function()
+
+    assert_no_warnings(context_manager_no_warning)
+    assert_no_warnings(context_manager_no_warning_multiple)
+    assert_no_warnings(context_manager_no_deprecation_warning)
+    with pytest.warns(DeprecationWarning):
+        context_manager_no_user_warning()
+    with pytest.warns(UserWarning):
+        context_manager_no_deprecation_multiple_warning()
+    with pytest.warns(DeprecationWarning):
+        context_manager_no_user_multiple_warning()
+
+    # Check that passing warning class as first positional argument
+    warning_class = UserWarning
+    match = "'obj' should be a callable.+you should use 'category=UserWarning'"
+
+    with pytest.raises(ValueError, match=match):
+        silence_warnings_func = ignore_warnings(warning_class)(_warning_function)
+        silence_warnings_func()
+
+    with pytest.raises(ValueError, match=match):
+
+        @ignore_warnings(warning_class)
+        def test():
+            pass
+
+
+class TestWarns(unittest.TestCase):
+    def test_warn(self):
+        def f():
+            warnings.warn("yo")
+            return 3
+
+        with pytest.raises(AssertionError):
+            assert_no_warnings(f)
+        assert assert_no_warnings(lambda x: x, 1) == 1
+
+
+# Tests for docstrings:
+
+
+def f_ok(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_sections(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Results
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_order(b, a):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_too_many_param_docstring(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : int
+        Parameter b
+    c : int
+        Parameter c
+
+    Returns
+    -------
+    d : list
+        Parameter c
+    """
+    d = a + b
+    return d
+
+
+def f_missing(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_check_param_definition(a, b, c, d, e):
+    """Function f
+
+    Parameters
+    ----------
+    a: int
+        Parameter a
+    b:
+        Parameter b
+    c :
+        This is parsed correctly in numpydoc 1.2
+    d:int
+        Parameter d
+    e
+        No typespec is allowed without colon
+    """
+    return a + b + c + d
+
+
+class Klass:
+    def f_missing(self, X, y):
+        pass
+
+    def f_bad_sections(self, X, y):
+        """Function f
+
+        Parameter
+        ---------
+        a : int
+            Parameter a
+        b : float
+            Parameter b
+
+        Results
+        -------
+        c : list
+            Parameter c
+        """
+        pass
+
+
+class MockEst:
+    def __init__(self):
+        """MockEstimator"""
+
+    def fit(self, X, y):
+        return X
+
+    def predict(self, X):
+        return X
+
+    def predict_proba(self, X):
+        return X
+
+    def score(self, X):
+        return 1.0
+
+
+class MockMetaEstimator:
+    def __init__(self, delegate):
+        """MetaEstimator to check if doctest on delegated methods work.
+
+        Parameters
+        ---------
+        delegate : estimator
+            Delegated estimator.
+        """
+        self.delegate = delegate
+
+    @available_if(lambda self: hasattr(self.delegate, "predict"))
+    def predict(self, X):
+        """This is available only if delegate has predict.
+
+        Parameters
+        ----------
+        y : ndarray
+            Parameter y
+        """
+        return self.delegate.predict(X)
+
+    @available_if(lambda self: hasattr(self.delegate, "score"))
+    @deprecated("Testing a deprecated delegated method")
+    def score(self, X):
+        """This is available only if delegate has score.
+
+        Parameters
+        ---------
+        y : ndarray
+            Parameter y
+        """
+
+    @available_if(lambda self: hasattr(self.delegate, "predict_proba"))
+    def predict_proba(self, X):
+        """This is available only if delegate has predict_proba.
+
+        Parameters
+        ---------
+        X : ndarray
+            Parameter X
+        """
+        return X
+
+    @deprecated("Testing deprecated function with wrong params")
+    def fit(self, X, y):
+        """Incorrect docstring but should not be tested"""
+
+
+def test_check_docstring_parameters():
+    pytest.importorskip(
+        "numpydoc",
+        reason="numpydoc is required to test the docstrings",
+        minversion="1.2.0",
+    )
+
+    incorrect = check_docstring_parameters(f_ok)
+    assert incorrect == []
+    incorrect = check_docstring_parameters(f_ok, ignore=["b"])
+    assert incorrect == []
+    incorrect = check_docstring_parameters(f_missing, ignore=["b"])
+    assert incorrect == []
+    with pytest.raises(RuntimeError, match="Unknown section Results"):
+        check_docstring_parameters(f_bad_sections)
+    with pytest.raises(RuntimeError, match="Unknown section Parameter"):
+        check_docstring_parameters(Klass.f_bad_sections)
+
+    incorrect = check_docstring_parameters(f_check_param_definition)
+    mock_meta = MockMetaEstimator(delegate=MockEst())
+    mock_meta_name = mock_meta.__class__.__name__
+    assert incorrect == [
+        (
+            "sklearn.utils.tests.test_testing.f_check_param_definition There "
+            "was no space between the param name and colon ('a: int')"
+        ),
+        (
+            "sklearn.utils.tests.test_testing.f_check_param_definition There "
+            "was no space between the param name and colon ('b:')"
+        ),
+        (
+            "sklearn.utils.tests.test_testing.f_check_param_definition There "
+            "was no space between the param name and colon ('d:int')"
+        ),
+    ]
+
+    messages = [
+        [
+            "In function: sklearn.utils.tests.test_testing.f_bad_order",
+            (
+                "There's a parameter name mismatch in function docstring w.r.t."
+                " function signature, at index 0 diff: 'b' != 'a'"
+            ),
+            "Full diff:",
+            "- ['b', 'a']",
+            "+ ['a', 'b']",
+        ],
+        [
+            "In function: "
+            + "sklearn.utils.tests.test_testing.f_too_many_param_docstring",
+            (
+                "Parameters in function docstring have more items w.r.t. function"
+                " signature, first extra item: c"
+            ),
+            "Full diff:",
+            "- ['a', 'b']",
+            "+ ['a', 'b', 'c']",
+            "?          +++++",
+        ],
+        [
+            "In function: sklearn.utils.tests.test_testing.f_missing",
+            (
+                "Parameters in function docstring have less items w.r.t. function"
+                " signature, first missing item: b"
+            ),
+            "Full diff:",
+            "- ['a', 'b']",
+            "+ ['a']",
+        ],
+        [
+            "In function: sklearn.utils.tests.test_testing.Klass.f_missing",
+            (
+                "Parameters in function docstring have less items w.r.t. function"
+                " signature, first missing item: X"
+            ),
+            "Full diff:",
+            "- ['X', 'y']",
+            "+ []",
+        ],
+        [
+            "In function: "
+            + f"sklearn.utils.tests.test_testing.{mock_meta_name}.predict",
+            (
+                "There's a parameter name mismatch in function docstring w.r.t."
+                " function signature, at index 0 diff: 'X' != 'y'"
+            ),
+            "Full diff:",
+            "- ['X']",
+            "?   ^",
+            "+ ['y']",
+            "?   ^",
+        ],
+        [
+            "In function: "
+            + f"sklearn.utils.tests.test_testing.{mock_meta_name}."
+            + "predict_proba",
+            "potentially wrong underline length... ",
+            "Parameters ",
+            "--------- in ",
+        ],
+        [
+            "In function: "
+            + f"sklearn.utils.tests.test_testing.{mock_meta_name}.score",
+            "potentially wrong underline length... ",
+            "Parameters ",
+            "--------- in ",
+        ],
+        [
+            "In function: " + f"sklearn.utils.tests.test_testing.{mock_meta_name}.fit",
+            (
+                "Parameters in function docstring have less items w.r.t. function"
+                " signature, first missing item: X"
+            ),
+            "Full diff:",
+            "- ['X', 'y']",
+            "+ []",
+        ],
+    ]
+
+    for msg, f in zip(
+        messages,
+        [
+            f_bad_order,
+            f_too_many_param_docstring,
+            f_missing,
+            Klass.f_missing,
+            mock_meta.predict,
+            mock_meta.predict_proba,
+            mock_meta.score,
+            mock_meta.fit,
+        ],
+    ):
+        incorrect = check_docstring_parameters(f)
+        assert msg == incorrect, '\n"%s"\n not in \n"%s"' % (msg, incorrect)
+
+
+class RegistrationCounter:
+    def __init__(self):
+        self.nb_calls = 0
+
+    def __call__(self, to_register_func):
+        self.nb_calls += 1
+        assert to_register_func.func is _delete_folder
+
+
+def check_memmap(input_array, mmap_data, mmap_mode="r"):
+    assert isinstance(mmap_data, np.memmap)
+    writeable = mmap_mode != "r"
+    assert mmap_data.flags.writeable is writeable
+    np.testing.assert_array_equal(input_array, mmap_data)
+
+
+def test_tempmemmap(monkeypatch):
+    registration_counter = RegistrationCounter()
+    monkeypatch.setattr(atexit, "register", registration_counter)
+
+    input_array = np.ones(3)
+    with TempMemmap(input_array) as data:
+        check_memmap(input_array, data)
+        temp_folder = os.path.dirname(data.filename)
+    if os.name != "nt":
+        assert not os.path.exists(temp_folder)
+    assert registration_counter.nb_calls == 1
+
+    mmap_mode = "r+"
+    with TempMemmap(input_array, mmap_mode=mmap_mode) as data:
+        check_memmap(input_array, data, mmap_mode=mmap_mode)
+        temp_folder = os.path.dirname(data.filename)
+    if os.name != "nt":
+        assert not os.path.exists(temp_folder)
+    assert registration_counter.nb_calls == 2
+
+
+@pytest.mark.xfail(_IS_WASM, reason="memmap not fully supported")
+def test_create_memmap_backed_data(monkeypatch):
+    registration_counter = RegistrationCounter()
+    monkeypatch.setattr(atexit, "register", registration_counter)
+
+    input_array = np.ones(3)
+    data = create_memmap_backed_data(input_array)
+    check_memmap(input_array, data)
+    assert registration_counter.nb_calls == 1
+
+    data, folder = create_memmap_backed_data(input_array, return_folder=True)
+    check_memmap(input_array, data)
+    assert folder == os.path.dirname(data.filename)
+    assert registration_counter.nb_calls == 2
+
+    mmap_mode = "r+"
+    data = create_memmap_backed_data(input_array, mmap_mode=mmap_mode)
+    check_memmap(input_array, data, mmap_mode)
+    assert registration_counter.nb_calls == 3
+
+    input_list = [input_array, input_array + 1, input_array + 2]
+    mmap_data_list = create_memmap_backed_data(input_list)
+    for input_array, data in zip(input_list, mmap_data_list):
+        check_memmap(input_array, data)
+    assert registration_counter.nb_calls == 4
+
+    output_data, other = create_memmap_backed_data([input_array, "not-an-array"])
+    check_memmap(input_array, output_data)
+    assert other == "not-an-array"
+
+
+@pytest.mark.parametrize(
+    "constructor_name, container_type",
+    [
+        ("list", list),
+        ("tuple", tuple),
+        ("array", np.ndarray),
+        ("sparse", sparse.csr_matrix),
+        # using `zip` will only keep the available sparse containers
+        # depending of the installed SciPy version
+        *zip(["sparse_csr", "sparse_csr_array"], CSR_CONTAINERS),
+        *zip(["sparse_csc", "sparse_csc_array"], CSC_CONTAINERS),
+        ("dataframe", lambda: pytest.importorskip("pandas").DataFrame),
+        ("series", lambda: pytest.importorskip("pandas").Series),
+        ("index", lambda: pytest.importorskip("pandas").Index),
+        ("slice", slice),
+    ],
+)
+@pytest.mark.parametrize(
+    "dtype, superdtype",
+    [
+        (np.int32, np.integer),
+        (np.int64, np.integer),
+        (np.float32, np.floating),
+        (np.float64, np.floating),
+    ],
+)
+def test_convert_container(
+    constructor_name,
+    container_type,
+    dtype,
+    superdtype,
+):
+    """Check that we convert the container to the right type of array with the
+    right data type."""
+    if constructor_name in ("dataframe", "polars", "series", "polars_series", "index"):
+        # delay the import of pandas/polars within the function to only skip this test
+        # instead of the whole file
+        container_type = container_type()
+    container = [0, 1]
+
+    container_converted = _convert_container(
+        container,
+        constructor_name,
+        dtype=dtype,
+    )
+    assert isinstance(container_converted, container_type)
+
+    if constructor_name in ("list", "tuple", "index"):
+        # list and tuple will use Python class dtype: int, float
+        # pandas index will always use high precision: np.int64 and np.float64
+        assert np.issubdtype(type(container_converted[0]), superdtype)
+    elif hasattr(container_converted, "dtype"):
+        assert container_converted.dtype == dtype
+    elif hasattr(container_converted, "dtypes"):
+        assert container_converted.dtypes[0] == dtype
+
+
+def test_convert_container_categories_pandas():
+    pytest.importorskip("pandas")
+    df = _convert_container(
+        [["x"]], "dataframe", ["A"], categorical_feature_names=["A"]
+    )
+    assert df.dtypes.iloc[0] == "category"
+
+
+def test_convert_container_categories_polars():
+    pl = pytest.importorskip("polars")
+    df = _convert_container([["x"]], "polars", ["A"], categorical_feature_names=["A"])
+    assert df.schema["A"] == pl.Categorical()
+
+
+def test_convert_container_categories_pyarrow():
+    pa = pytest.importorskip("pyarrow")
+    df = _convert_container([["x"]], "pyarrow", ["A"], categorical_feature_names=["A"])
+    assert type(df.schema[0].type) is pa.DictionaryType
+
+
+@pytest.mark.skipif(
+    sp_version >= parse_version("1.8"),
+    reason="sparse arrays are available as of scipy 1.8.0",
+)
+@pytest.mark.parametrize("constructor_name", ["sparse_csr_array", "sparse_csc_array"])
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+def test_convert_container_raise_when_sparray_not_available(constructor_name, dtype):
+    """Check that if we convert to sparse array but sparse array are not supported
+    (scipy<1.8.0), we should raise an explicit error."""
+    container = [0, 1]
+
+    with pytest.raises(
+        ValueError,
+        match=f"only available with scipy>=1.8.0, got {sp_version}",
+    ):
+        _convert_container(container, constructor_name, dtype=dtype)
+
+
+def test_raises():
+    # Tests for the raises context manager
+
+    # Proper type, no match
+    with raises(TypeError):
+        raise TypeError()
+
+    # Proper type, proper match
+    with raises(TypeError, match="how are you") as cm:
+        raise TypeError("hello how are you")
+    assert cm.raised_and_matched
+
+    # Proper type, proper match with multiple patterns
+    with raises(TypeError, match=["not this one", "how are you"]) as cm:
+        raise TypeError("hello how are you")
+    assert cm.raised_and_matched
+
+    # bad type, no match
+    with pytest.raises(ValueError, match="this will be raised"):
+        with raises(TypeError) as cm:
+            raise ValueError("this will be raised")
+    assert not cm.raised_and_matched
+
+    # Bad type, no match, with a err_msg
+    with pytest.raises(AssertionError, match="the failure message"):
+        with raises(TypeError, err_msg="the failure message") as cm:
+            raise ValueError()
+    assert not cm.raised_and_matched
+
+    # bad type, with match (is ignored anyway)
+    with pytest.raises(ValueError, match="this will be raised"):
+        with raises(TypeError, match="this is ignored") as cm:
+            raise ValueError("this will be raised")
+    assert not cm.raised_and_matched
+
+    # proper type but bad match
+    with pytest.raises(
+        AssertionError, match="should contain one of the following patterns"
+    ):
+        with raises(TypeError, match="hello") as cm:
+            raise TypeError("Bad message")
+    assert not cm.raised_and_matched
+
+    # proper type but bad match, with err_msg
+    with pytest.raises(AssertionError, match="the failure message"):
+        with raises(TypeError, match="hello", err_msg="the failure message") as cm:
+            raise TypeError("Bad message")
+    assert not cm.raised_and_matched
+
+    # no raise with default may_pass=False
+    with pytest.raises(AssertionError, match="Did not raise"):
+        with raises(TypeError) as cm:
+            pass
+    assert not cm.raised_and_matched
+
+    # no raise with may_pass=True
+    with raises(TypeError, match="hello", may_pass=True) as cm:
+        pass  # still OK
+    assert not cm.raised_and_matched
+
+    # Multiple exception types:
+    with raises((TypeError, ValueError)):
+        raise TypeError()
+    with raises((TypeError, ValueError)):
+        raise ValueError()
+    with pytest.raises(AssertionError):
+        with raises((TypeError, ValueError)):
+            pass
+
+
+def test_float32_aware_assert_allclose():
+    # The relative tolerance for float32 inputs is 1e-4
+    assert_allclose(np.array([1.0 + 2e-5], dtype=np.float32), 1.0)
+    with pytest.raises(AssertionError):
+        assert_allclose(np.array([1.0 + 2e-4], dtype=np.float32), 1.0)
+
+    # The relative tolerance for other inputs is left to 1e-7 as in
+    # the original numpy version.
+    assert_allclose(np.array([1.0 + 2e-8], dtype=np.float64), 1.0)
+    with pytest.raises(AssertionError):
+        assert_allclose(np.array([1.0 + 2e-7], dtype=np.float64), 1.0)
+
+    # atol is left to 0.0 by default, even for float32
+    with pytest.raises(AssertionError):
+        assert_allclose(np.array([1e-5], dtype=np.float32), 0.0)
+    assert_allclose(np.array([1e-5], dtype=np.float32), 0.0, atol=2e-5)
+
+
+@pytest.mark.xfail(_IS_WASM, reason="cannot start subprocess")
+def test_assert_run_python_script_without_output():
+    code = "x = 1"
+    assert_run_python_script_without_output(code)
+
+    code = "print('something to stdout')"
+    with pytest.raises(AssertionError, match="Expected no output"):
+        assert_run_python_script_without_output(code)
+
+    code = "print('something to stdout')"
+    with pytest.raises(
+        AssertionError,
+        match="output was not supposed to match.+got.+something to stdout",
+    ):
+        assert_run_python_script_without_output(code, pattern="to.+stdout")
+
+    code = "\n".join(["import sys", "print('something to stderr', file=sys.stderr)"])
+    with pytest.raises(
+        AssertionError,
+        match="output was not supposed to match.+got.+something to stderr",
+    ):
+        assert_run_python_script_without_output(code, pattern="to.+stderr")
+
+
+@pytest.mark.parametrize(
+    "constructor_name",
+    [
+        "sparse_csr",
+        "sparse_csc",
+        pytest.param(
+            "sparse_csr_array",
+            marks=pytest.mark.skipif(
+                sp_version < parse_version("1.8"),
+                reason="sparse arrays are available as of scipy 1.8.0",
+            ),
+        ),
+        pytest.param(
+            "sparse_csc_array",
+            marks=pytest.mark.skipif(
+                sp_version < parse_version("1.8"),
+                reason="sparse arrays are available as of scipy 1.8.0",
+            ),
+        ),
+    ],
+)
+def test_convert_container_sparse_to_sparse(constructor_name):
+    """Non-regression test to check that we can still convert a sparse container
+    from a given format to another format.
+    """
+    X_sparse = sparse.random(10, 10, density=0.1, format="csr")
+    _convert_container(X_sparse, constructor_name)
+
+
+def check_warnings_as_errors(warning_info, warnings_as_errors):
+    if warning_info.action == "error" and warnings_as_errors:
+        with pytest.raises(warning_info.category, match=warning_info.message):
+            warnings.warn(
+                message=warning_info.message,
+                category=warning_info.category,
+            )
+    if warning_info.action == "ignore":
+        with warnings.catch_warnings(record=True) as record:
+            message = warning_info.message
+            # Special treatment when regex is used
+            if "Pyarrow" in message:
+                message = "\nPyarrow will become a required dependency"
+
+            warnings.warn(
+                message=message,
+                category=warning_info.category,
+            )
+            assert len(record) == 0 if warnings_as_errors else 1
+            if record:
+                assert str(record[0].message) == message
+                assert record[0].category == warning_info.category
+
+
+@pytest.mark.parametrize("warning_info", _get_warnings_filters_info_list())
+def test_sklearn_warnings_as_errors(warning_info):
+    warnings_as_errors = os.environ.get("SKLEARN_WARNINGS_AS_ERRORS", "0") != "0"
+    check_warnings_as_errors(warning_info, warnings_as_errors=warnings_as_errors)
+
+
+@pytest.mark.parametrize("warning_info", _get_warnings_filters_info_list())
+def test_turn_warnings_into_errors(warning_info):
+    with warnings.catch_warnings():
+        turn_warnings_into_errors()
+        check_warnings_as_errors(warning_info, warnings_as_errors=True)
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_typedefs.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_typedefs.py
@@ -0,0 +1,25 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._typedefs import testing_make_array_from_typed_val
+
+
+@pytest.mark.parametrize(
+    "type_t, value, expected_dtype",
+    [
+        ("float64_t", 1.0, np.float64),
+        ("float32_t", 1.0, np.float32),
+        ("intp_t", 1, np.intp),
+        ("int8_t", 1, np.int8),
+        ("int32_t", 1, np.int32),
+        ("int64_t", 1, np.int64),
+        ("uint8_t", 1, np.uint8),
+        ("uint32_t", 1, np.uint32),
+        ("uint64_t", 1, np.uint64),
+    ],
+)
+def test_types(type_t, value, expected_dtype):
+    """Check that the types defined in _typedefs correspond to the expected
+    numpy dtypes.
+    """
+    assert testing_make_array_from_typed_val[type_t](value).dtype == expected_dtype
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_user_interface.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_user_interface.py
@@ -0,0 +1,65 @@
+import string
+import timeit
+
+import pytest
+
+from sklearn.utils._user_interface import _message_with_time, _print_elapsed_time
+
+
+@pytest.mark.parametrize(
+    ["source", "message", "is_long"],
+    [
+        ("ABC", string.ascii_lowercase, False),
+        ("ABCDEF", string.ascii_lowercase, False),
+        ("ABC", string.ascii_lowercase * 3, True),
+        ("ABC" * 10, string.ascii_lowercase, True),
+        ("ABC", string.ascii_lowercase + "\u1048", False),
+    ],
+)
+@pytest.mark.parametrize(
+    ["time", "time_str"],
+    [
+        (0.2, "   0.2s"),
+        (20, "  20.0s"),
+        (2000, "33.3min"),
+        (20000, "333.3min"),
+    ],
+)
+def test_message_with_time(source, message, is_long, time, time_str):
+    out = _message_with_time(source, message, time)
+    if is_long:
+        assert len(out) > 70
+    else:
+        assert len(out) == 70
+
+    assert out.startswith("[" + source + "] ")
+    out = out[len(source) + 3 :]
+
+    assert out.endswith(time_str)
+    out = out[: -len(time_str)]
+    assert out.endswith(", total=")
+    out = out[: -len(", total=")]
+    assert out.endswith(message)
+    out = out[: -len(message)]
+    assert out.endswith(" ")
+    out = out[:-1]
+
+    if is_long:
+        assert not out
+    else:
+        assert list(set(out)) == ["."]
+
+
+@pytest.mark.parametrize(
+    ["message", "expected"],
+    [
+        ("hello", _message_with_time("ABC", "hello", 0.1) + "\n"),
+        ("", _message_with_time("ABC", "", 0.1) + "\n"),
+        (None, ""),
+    ],
+)
+def test_print_elapsed_time(message, expected, capsys, monkeypatch):
+    monkeypatch.setattr(timeit, "default_timer", lambda: 0)
+    with _print_elapsed_time("ABC", message):
+        monkeypatch.setattr(timeit, "default_timer", lambda: 0.1)
+    assert capsys.readouterr().out == expected
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_utils.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_utils.py
@@ -0,0 +1,27 @@
+import joblib
+import pytest
+
+from sklearn.utils import parallel_backend, register_parallel_backend, tosequence
+
+
+# TODO(1.7): remove
+def test_is_pypy_deprecated():
+    with pytest.warns(FutureWarning, match="IS_PYPY is deprecated"):
+        from sklearn.utils import IS_PYPY  # noqa
+
+
+# TODO(1.7): remove
+def test_tosequence_deprecated():
+    with pytest.warns(FutureWarning, match="tosequence was deprecated in 1.5"):
+        tosequence([1, 2, 3])
+
+
+# TODO(1.7): remove
+def test_parallel_backend_deprecated():
+    with pytest.warns(FutureWarning, match="parallel_backend is deprecated"):
+        parallel_backend("loky", None)
+
+    with pytest.warns(FutureWarning, match="register_parallel_backend is deprecated"):
+        register_parallel_backend("a_backend", None)
+
+    del joblib.parallel.BACKENDS["a_backend"]
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_validation.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_validation.py
--- a/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_weight_vector.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/utils/tests/test_weight_vector.py
@@ -0,0 +1,25 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._weight_vector import (
+    WeightVector32,
+    WeightVector64,
+)
+
+
+@pytest.mark.parametrize(
+    "dtype, WeightVector",
+    [
+        (np.float32, WeightVector32),
+        (np.float64, WeightVector64),
+    ],
+)
+def test_type_invariance(dtype, WeightVector):
+    """Check the `dtype` consistency of `WeightVector`."""
+    weights = np.random.rand(100).astype(dtype)
+    average_weights = np.random.rand(100).astype(dtype)
+
+    weight_vector = WeightVector(weights, average_weights)
+
+    assert np.asarray(weight_vector.w).dtype is np.dtype(dtype)
+    assert np.asarray(weight_vector.aw).dtype is np.dtype(dtype)