library packages

2024-09-28 22:56:00 -07:00
parent 64d9b78b3a
commit 1973934e95
4893 changed files with 1184173 additions and 31 deletions
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/init.py
@@ -0,0 +1,96 @@
+"""Tools for model selection, such as cross validation and hyper-parameter tuning."""
+
+import typing
+
+from ._classification_threshold import (
+    FixedThresholdClassifier,
+    TunedThresholdClassifierCV,
+)
+from ._plot import LearningCurveDisplay, ValidationCurveDisplay
+from ._search import GridSearchCV, ParameterGrid, ParameterSampler, RandomizedSearchCV
+from ._split import (
+    BaseCrossValidator,
+    BaseShuffleSplit,
+    GroupKFold,
+    GroupShuffleSplit,
+    KFold,
+    LeaveOneGroupOut,
+    LeaveOneOut,
+    LeavePGroupsOut,
+    LeavePOut,
+    PredefinedSplit,
+    RepeatedKFold,
+    RepeatedStratifiedKFold,
+    ShuffleSplit,
+    StratifiedGroupKFold,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    TimeSeriesSplit,
+    check_cv,
+    train_test_split,
+)
+from ._validation import (
+    cross_val_predict,
+    cross_val_score,
+    cross_validate,
+    learning_curve,
+    permutation_test_score,
+    validation_curve,
+)
+
+if typing.TYPE_CHECKING:
+    # Avoid errors in type checkers (e.g. mypy) for experimental estimators.
+    # TODO: remove this check once the estimator is no longer experimental.
+    from ._search_successive_halving import (  # noqa
+        HalvingGridSearchCV,
+        HalvingRandomSearchCV,
+    )
+
+
+__all__ = [
+    "BaseCrossValidator",
+    "BaseShuffleSplit",
+    "GridSearchCV",
+    "TimeSeriesSplit",
+    "KFold",
+    "GroupKFold",
+    "GroupShuffleSplit",
+    "LeaveOneGroupOut",
+    "LeaveOneOut",
+    "LeavePGroupsOut",
+    "LeavePOut",
+    "RepeatedKFold",
+    "RepeatedStratifiedKFold",
+    "ParameterGrid",
+    "ParameterSampler",
+    "PredefinedSplit",
+    "RandomizedSearchCV",
+    "ShuffleSplit",
+    "StratifiedKFold",
+    "StratifiedGroupKFold",
+    "StratifiedShuffleSplit",
+    "FixedThresholdClassifier",
+    "TunedThresholdClassifierCV",
+    "check_cv",
+    "cross_val_predict",
+    "cross_val_score",
+    "cross_validate",
+    "learning_curve",
+    "LearningCurveDisplay",
+    "permutation_test_score",
+    "train_test_split",
+    "validation_curve",
+    "ValidationCurveDisplay",
+]
+
+
+# TODO: remove this check once the estimator is no longer experimental.
+def __getattr__(name):
+    if name in {"HalvingGridSearchCV", "HalvingRandomSearchCV"}:
+        raise ImportError(
+            f"{name} is experimental and the API might change without any "
+            "deprecation cycle. To use it, you need to explicitly import "
+            "enable_halving_search_cv:\n"
+            "from sklearn.experimental import enable_halving_search_cv"
+        )
+    raise AttributeError(f"module {__name__} has no attribute {name}")
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_classification_threshold.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_classification_threshold.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_plot.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_plot.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_search.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_search.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_search_successive_halving.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_search_successive_halving.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_split.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_split.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_validation.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/pycache/_validation.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/_classification_threshold.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/_classification_threshold.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/_plot.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/_plot.py
@@ -0,0 +1,874 @@
+import numpy as np
+
+from ..utils._optional_dependencies import check_matplotlib_support
+from ..utils._plotting import _interval_max_min_ratio, _validate_score_name
+from ._validation import learning_curve, validation_curve
+
+
+class _BaseCurveDisplay:
+    def _plot_curve(
+        self,
+        x_data,
+        *,
+        ax=None,
+        negate_score=False,
+        score_name=None,
+        score_type="test",
+        std_display_style="fill_between",
+        line_kw=None,
+        fill_between_kw=None,
+        errorbar_kw=None,
+    ):
+        check_matplotlib_support(f"{self.__class__.__name__}.plot")
+
+        import matplotlib.pyplot as plt
+
+        if ax is None:
+            _, ax = plt.subplots()
+
+        if negate_score:
+            train_scores, test_scores = -self.train_scores, -self.test_scores
+        else:
+            train_scores, test_scores = self.train_scores, self.test_scores
+
+        if std_display_style not in ("errorbar", "fill_between", None):
+            raise ValueError(
+                f"Unknown std_display_style: {std_display_style}. Should be one of"
+                " 'errorbar', 'fill_between', or None."
+            )
+
+        if score_type not in ("test", "train", "both"):
+            raise ValueError(
+                f"Unknown score_type: {score_type}. Should be one of 'test', "
+                "'train', or 'both'."
+            )
+
+        if score_type == "train":
+            scores = {"Train": train_scores}
+        elif score_type == "test":
+            scores = {"Test": test_scores}
+        else:  # score_type == "both"
+            scores = {"Train": train_scores, "Test": test_scores}
+
+        if std_display_style in ("fill_between", None):
+            # plot the mean score
+            if line_kw is None:
+                line_kw = {}
+
+            self.lines_ = []
+            for line_label, score in scores.items():
+                self.lines_.append(
+                    *ax.plot(
+                        x_data,
+                        score.mean(axis=1),
+                        label=line_label,
+                        **line_kw,
+                    )
+                )
+            self.errorbar_ = None
+            self.fill_between_ = None  # overwritten below by fill_between
+
+        if std_display_style == "errorbar":
+            if errorbar_kw is None:
+                errorbar_kw = {}
+
+            self.errorbar_ = []
+            for line_label, score in scores.items():
+                self.errorbar_.append(
+                    ax.errorbar(
+                        x_data,
+                        score.mean(axis=1),
+                        score.std(axis=1),
+                        label=line_label,
+                        **errorbar_kw,
+                    )
+                )
+            self.lines_, self.fill_between_ = None, None
+        elif std_display_style == "fill_between":
+            if fill_between_kw is None:
+                fill_between_kw = {}
+            default_fill_between_kw = {"alpha": 0.5}
+            fill_between_kw = {**default_fill_between_kw, **fill_between_kw}
+
+            self.fill_between_ = []
+            for line_label, score in scores.items():
+                self.fill_between_.append(
+                    ax.fill_between(
+                        x_data,
+                        score.mean(axis=1) - score.std(axis=1),
+                        score.mean(axis=1) + score.std(axis=1),
+                        **fill_between_kw,
+                    )
+                )
+
+        score_name = self.score_name if score_name is None else score_name
+
+        ax.legend()
+
+        # We found that a ratio, smaller or bigger than 5, between the largest and
+        # smallest gap of the x values is a good indicator to choose between linear
+        # and log scale.
+        if _interval_max_min_ratio(x_data) > 5:
+            xscale = "symlog" if x_data.min() <= 0 else "log"
+        else:
+            xscale = "linear"
+
+        ax.set_xscale(xscale)
+        ax.set_ylabel(f"{score_name}")
+
+        self.ax_ = ax
+        self.figure_ = ax.figure
+
+
+class LearningCurveDisplay(_BaseCurveDisplay):
+    """Learning Curve visualization.
+
+    It is recommended to use
+    :meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to
+    create a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.
+    All parameters are stored as attributes.
+
+    Read more in the :ref:`User Guide <visualizations>` for general information
+    about the visualization API and
+    :ref:`detailed documentation <learning_curve>` regarding the learning
+    curve visualization.
+
+    .. versionadded:: 1.2
+
+    Parameters
+    ----------
+    train_sizes : ndarray of shape (n_unique_ticks,)
+        Numbers of training examples that has been used to generate the
+        learning curve.
+
+    train_scores : ndarray of shape (n_ticks, n_cv_folds)
+        Scores on training sets.
+
+    test_scores : ndarray of shape (n_ticks, n_cv_folds)
+        Scores on test set.
+
+    score_name : str, default=None
+        The name of the score used in `learning_curve`. It will override the name
+        inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
+        `negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
+        string or a callable, we infer the name. We replace `_` by spaces and capitalize
+        the first letter. We remove `neg_` and replace it by `"Negative"` if
+        `negate_score` is `False` or just remove it otherwise.
+
+    Attributes
+    ----------
+    ax_ : matplotlib Axes
+        Axes with the learning curve.
+
+    figure_ : matplotlib Figure
+        Figure containing the learning curve.
+
+    errorbar_ : list of matplotlib Artist or None
+        When the `std_display_style` is `"errorbar"`, this is a list of
+        `matplotlib.container.ErrorbarContainer` objects. If another style is
+        used, `errorbar_` is `None`.
+
+    lines_ : list of matplotlib Artist or None
+        When the `std_display_style` is `"fill_between"`, this is a list of
+        `matplotlib.lines.Line2D` objects corresponding to the mean train and
+        test scores. If another style is used, `line_` is `None`.
+
+    fill_between_ : list of matplotlib Artist or None
+        When the `std_display_style` is `"fill_between"`, this is a list of
+        `matplotlib.collections.PolyCollection` objects. If another style is
+        used, `fill_between_` is `None`.
+
+    See Also
+    --------
+    sklearn.model_selection.learning_curve : Compute the learning curve.
+
+    Examples
+    --------
+    >>> import matplotlib.pyplot as plt
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.model_selection import LearningCurveDisplay, learning_curve
+    >>> from sklearn.tree import DecisionTreeClassifier
+    >>> X, y = load_iris(return_X_y=True)
+    >>> tree = DecisionTreeClassifier(random_state=0)
+    >>> train_sizes, train_scores, test_scores = learning_curve(
+    ...     tree, X, y)
+    >>> display = LearningCurveDisplay(train_sizes=train_sizes,
+    ...     train_scores=train_scores, test_scores=test_scores, score_name="Score")
+    >>> display.plot()
+    <...>
+    >>> plt.show()
+    """
+
+    def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None):
+        self.train_sizes = train_sizes
+        self.train_scores = train_scores
+        self.test_scores = test_scores
+        self.score_name = score_name
+
+    def plot(
+        self,
+        ax=None,
+        *,
+        negate_score=False,
+        score_name=None,
+        score_type="both",
+        std_display_style="fill_between",
+        line_kw=None,
+        fill_between_kw=None,
+        errorbar_kw=None,
+    ):
+        """Plot visualization.
+
+        Parameters
+        ----------
+        ax : matplotlib Axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        negate_score : bool, default=False
+            Whether or not to negate the scores obtained through
+            :func:`~sklearn.model_selection.learning_curve`. This is
+            particularly useful when using the error denoted by `neg_*` in
+            `scikit-learn`.
+
+        score_name : str, default=None
+            The name of the score used to decorate the y-axis of the plot. It will
+            override the name inferred from the `scoring` parameter. If `score` is
+            `None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
+            otherwise. If `scoring` is a string or a callable, we infer the name. We
+            replace `_` by spaces and capitalize the first letter. We remove `neg_` and
+            replace it by `"Negative"` if `negate_score` is
+            `False` or just remove it otherwise.
+
+        score_type : {"test", "train", "both"}, default="both"
+            The type of score to plot. Can be one of `"test"`, `"train"`, or
+            `"both"`.
+
+        std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
+            The style used to display the score standard deviation around the
+            mean score. If None, no standard deviation representation is
+            displayed.
+
+        line_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.plot` used to draw
+            the mean score.
+
+        fill_between_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.fill_between` used
+            to draw the score standard deviation.
+
+        errorbar_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.errorbar` used to
+            draw mean score and standard deviation score.
+
+        Returns
+        -------
+        display : :class:`~sklearn.model_selection.LearningCurveDisplay`
+            Object that stores computed values.
+        """
+        self._plot_curve(
+            self.train_sizes,
+            ax=ax,
+            negate_score=negate_score,
+            score_name=score_name,
+            score_type=score_type,
+            std_display_style=std_display_style,
+            line_kw=line_kw,
+            fill_between_kw=fill_between_kw,
+            errorbar_kw=errorbar_kw,
+        )
+        self.ax_.set_xlabel("Number of samples in the training set")
+        return self
+
+    @classmethod
+    def from_estimator(
+        cls,
+        estimator,
+        X,
+        y,
+        *,
+        groups=None,
+        train_sizes=np.linspace(0.1, 1.0, 5),
+        cv=None,
+        scoring=None,
+        exploit_incremental_learning=False,
+        n_jobs=None,
+        pre_dispatch="all",
+        verbose=0,
+        shuffle=False,
+        random_state=None,
+        error_score=np.nan,
+        fit_params=None,
+        ax=None,
+        negate_score=False,
+        score_name=None,
+        score_type="both",
+        std_display_style="fill_between",
+        line_kw=None,
+        fill_between_kw=None,
+        errorbar_kw=None,
+    ):
+        """Create a learning curve display from an estimator.
+
+        Read more in the :ref:`User Guide <visualizations>` for general
+        information about the visualization API and :ref:`detailed
+        documentation <learning_curve>` regarding the learning curve
+        visualization.
+
+        Parameters
+        ----------
+        estimator : object type that implements the "fit" and "predict" methods
+            An object of that type which is cloned for each validation.
+
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+
+        groups : array-like of shape (n_samples,), default=None
+            Group labels for the samples used while splitting the dataset into
+            train/test set. Only used in conjunction with a "Group" :term:`cv`
+            instance (e.g., :class:`GroupKFold`).
+
+        train_sizes : array-like of shape (n_ticks,), \
+                default=np.linspace(0.1, 1.0, 5)
+            Relative or absolute numbers of training examples that will be used
+            to generate the learning curve. If the dtype is float, it is
+            regarded as a fraction of the maximum size of the training set
+            (that is determined by the selected validation method), i.e. it has
+            to be within (0, 1]. Otherwise it is interpreted as absolute sizes
+            of the training sets. Note that for classification the number of
+            samples usually have to be big enough to contain at least one
+            sample from each class.
+
+        cv : int, cross-validation generator or an iterable, default=None
+            Determines the cross-validation splitting strategy.
+            Possible inputs for cv are:
+
+            - None, to use the default 5-fold cross validation,
+            - int, to specify the number of folds in a `(Stratified)KFold`,
+            - :term:`CV splitter`,
+            - An iterable yielding (train, test) splits as arrays of indices.
+
+            For int/None inputs, if the estimator is a classifier and `y` is
+            either binary or multiclass,
+            :class:`~sklearn.model_selection.StratifiedKFold` is used. In all
+            other cases, :class:`~sklearn.model_selection.KFold` is used. These
+            splitters are instantiated with `shuffle=False` so the splits will
+            be the same across calls.
+
+            Refer :ref:`User Guide <cross_validation>` for the various
+            cross-validation strategies that can be used here.
+
+        scoring : str or callable, default=None
+            A string (see :ref:`scoring_parameter`) or
+            a scorer callable object / function with signature
+            `scorer(estimator, X, y)` (see :ref:`scoring`).
+
+        exploit_incremental_learning : bool, default=False
+            If the estimator supports incremental learning, this will be
+            used to speed up fitting for different training set sizes.
+
+        n_jobs : int, default=None
+            Number of jobs to run in parallel. Training the estimator and
+            computing the score are parallelized over the different training
+            and test sets. `None` means 1 unless in a
+            :obj:`joblib.parallel_backend` context. `-1` means using all
+            processors. See :term:`Glossary <n_jobs>` for more details.
+
+        pre_dispatch : int or str, default='all'
+            Number of predispatched jobs for parallel execution (default is
+            all). The option can reduce the allocated memory. The str can
+            be an expression like '2*n_jobs'.
+
+        verbose : int, default=0
+            Controls the verbosity: the higher, the more messages.
+
+        shuffle : bool, default=False
+            Whether to shuffle training data before taking prefixes of it
+            based on`train_sizes`.
+
+        random_state : int, RandomState instance or None, default=None
+            Used when `shuffle` is True. Pass an int for reproducible
+            output across multiple function calls.
+            See :term:`Glossary <random_state>`.
+
+        error_score : 'raise' or numeric, default=np.nan
+            Value to assign to the score if an error occurs in estimator
+            fitting. If set to 'raise', the error is raised. If a numeric value
+            is given, FitFailedWarning is raised.
+
+        fit_params : dict, default=None
+            Parameters to pass to the fit method of the estimator.
+
+        ax : matplotlib Axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        negate_score : bool, default=False
+            Whether or not to negate the scores obtained through
+            :func:`~sklearn.model_selection.learning_curve`. This is
+            particularly useful when using the error denoted by `neg_*` in
+            `scikit-learn`.
+
+        score_name : str, default=None
+            The name of the score used to decorate the y-axis of the plot. It will
+            override the name inferred from the `scoring` parameter. If `score` is
+            `None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
+            otherwise. If `scoring` is a string or a callable, we infer the name. We
+            replace `_` by spaces and capitalize the first letter. We remove `neg_` and
+            replace it by `"Negative"` if `negate_score` is
+            `False` or just remove it otherwise.
+
+        score_type : {"test", "train", "both"}, default="both"
+            The type of score to plot. Can be one of `"test"`, `"train"`, or
+            `"both"`.
+
+        std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
+            The style used to display the score standard deviation around the
+            mean score. If `None`, no representation of the standard deviation
+            is displayed.
+
+        line_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.plot` used to draw
+            the mean score.
+
+        fill_between_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.fill_between` used
+            to draw the score standard deviation.
+
+        errorbar_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.errorbar` used to
+            draw mean score and standard deviation score.
+
+        Returns
+        -------
+        display : :class:`~sklearn.model_selection.LearningCurveDisplay`
+            Object that stores computed values.
+
+        Examples
+        --------
+        >>> import matplotlib.pyplot as plt
+        >>> from sklearn.datasets import load_iris
+        >>> from sklearn.model_selection import LearningCurveDisplay
+        >>> from sklearn.tree import DecisionTreeClassifier
+        >>> X, y = load_iris(return_X_y=True)
+        >>> tree = DecisionTreeClassifier(random_state=0)
+        >>> LearningCurveDisplay.from_estimator(tree, X, y)
+        <...>
+        >>> plt.show()
+        """
+        check_matplotlib_support(f"{cls.__name__}.from_estimator")
+
+        score_name = _validate_score_name(score_name, scoring, negate_score)
+
+        train_sizes, train_scores, test_scores = learning_curve(
+            estimator,
+            X,
+            y,
+            groups=groups,
+            train_sizes=train_sizes,
+            cv=cv,
+            scoring=scoring,
+            exploit_incremental_learning=exploit_incremental_learning,
+            n_jobs=n_jobs,
+            pre_dispatch=pre_dispatch,
+            verbose=verbose,
+            shuffle=shuffle,
+            random_state=random_state,
+            error_score=error_score,
+            return_times=False,
+            fit_params=fit_params,
+        )
+
+        viz = cls(
+            train_sizes=train_sizes,
+            train_scores=train_scores,
+            test_scores=test_scores,
+            score_name=score_name,
+        )
+        return viz.plot(
+            ax=ax,
+            negate_score=negate_score,
+            score_type=score_type,
+            std_display_style=std_display_style,
+            line_kw=line_kw,
+            fill_between_kw=fill_between_kw,
+            errorbar_kw=errorbar_kw,
+        )
+
+
+class ValidationCurveDisplay(_BaseCurveDisplay):
+    """Validation Curve visualization.
+
+    It is recommended to use
+    :meth:`~sklearn.model_selection.ValidationCurveDisplay.from_estimator` to
+    create a :class:`~sklearn.model_selection.ValidationCurveDisplay` instance.
+    All parameters are stored as attributes.
+
+    Read more in the :ref:`User Guide <visualizations>` for general information
+    about the visualization API and :ref:`detailed documentation
+    <validation_curve>` regarding the validation curve visualization.
+
+    .. versionadded:: 1.3
+
+    Parameters
+    ----------
+    param_name : str
+        Name of the parameter that has been varied.
+
+    param_range : array-like of shape (n_ticks,)
+        The values of the parameter that have been evaluated.
+
+    train_scores : ndarray of shape (n_ticks, n_cv_folds)
+        Scores on training sets.
+
+    test_scores : ndarray of shape (n_ticks, n_cv_folds)
+        Scores on test set.
+
+    score_name : str, default=None
+        The name of the score used in `validation_curve`. It will override the name
+        inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
+        `negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
+        string or a callable, we infer the name. We replace `_` by spaces and capitalize
+        the first letter. We remove `neg_` and replace it by `"Negative"` if
+        `negate_score` is `False` or just remove it otherwise.
+
+    Attributes
+    ----------
+    ax_ : matplotlib Axes
+        Axes with the validation curve.
+
+    figure_ : matplotlib Figure
+        Figure containing the validation curve.
+
+    errorbar_ : list of matplotlib Artist or None
+        When the `std_display_style` is `"errorbar"`, this is a list of
+        `matplotlib.container.ErrorbarContainer` objects. If another style is
+        used, `errorbar_` is `None`.
+
+    lines_ : list of matplotlib Artist or None
+        When the `std_display_style` is `"fill_between"`, this is a list of
+        `matplotlib.lines.Line2D` objects corresponding to the mean train and
+        test scores. If another style is used, `line_` is `None`.
+
+    fill_between_ : list of matplotlib Artist or None
+        When the `std_display_style` is `"fill_between"`, this is a list of
+        `matplotlib.collections.PolyCollection` objects. If another style is
+        used, `fill_between_` is `None`.
+
+    See Also
+    --------
+    sklearn.model_selection.validation_curve : Compute the validation curve.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import matplotlib.pyplot as plt
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.model_selection import ValidationCurveDisplay, validation_curve
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> X, y = make_classification(n_samples=1_000, random_state=0)
+    >>> logistic_regression = LogisticRegression()
+    >>> param_name, param_range = "C", np.logspace(-8, 3, 10)
+    >>> train_scores, test_scores = validation_curve(
+    ...     logistic_regression, X, y, param_name=param_name, param_range=param_range
+    ... )
+    >>> display = ValidationCurveDisplay(
+    ...     param_name=param_name, param_range=param_range,
+    ...     train_scores=train_scores, test_scores=test_scores, score_name="Score"
+    ... )
+    >>> display.plot()
+    <...>
+    >>> plt.show()
+    """
+
+    def __init__(
+        self, *, param_name, param_range, train_scores, test_scores, score_name=None
+    ):
+        self.param_name = param_name
+        self.param_range = param_range
+        self.train_scores = train_scores
+        self.test_scores = test_scores
+        self.score_name = score_name
+
+    def plot(
+        self,
+        ax=None,
+        *,
+        negate_score=False,
+        score_name=None,
+        score_type="both",
+        std_display_style="fill_between",
+        line_kw=None,
+        fill_between_kw=None,
+        errorbar_kw=None,
+    ):
+        """Plot visualization.
+
+        Parameters
+        ----------
+        ax : matplotlib Axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        negate_score : bool, default=False
+            Whether or not to negate the scores obtained through
+            :func:`~sklearn.model_selection.validation_curve`. This is
+            particularly useful when using the error denoted by `neg_*` in
+            `scikit-learn`.
+
+        score_name : str, default=None
+            The name of the score used to decorate the y-axis of the plot. It will
+            override the name inferred from the `scoring` parameter. If `score` is
+            `None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
+            otherwise. If `scoring` is a string or a callable, we infer the name. We
+            replace `_` by spaces and capitalize the first letter. We remove `neg_` and
+            replace it by `"Negative"` if `negate_score` is
+            `False` or just remove it otherwise.
+
+        score_type : {"test", "train", "both"}, default="both"
+            The type of score to plot. Can be one of `"test"`, `"train"`, or
+            `"both"`.
+
+        std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
+            The style used to display the score standard deviation around the
+            mean score. If None, no standard deviation representation is
+            displayed.
+
+        line_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.plot` used to draw
+            the mean score.
+
+        fill_between_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.fill_between` used
+            to draw the score standard deviation.
+
+        errorbar_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.errorbar` used to
+            draw mean score and standard deviation score.
+
+        Returns
+        -------
+        display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
+            Object that stores computed values.
+        """
+        self._plot_curve(
+            self.param_range,
+            ax=ax,
+            negate_score=negate_score,
+            score_name=score_name,
+            score_type=score_type,
+            std_display_style=std_display_style,
+            line_kw=line_kw,
+            fill_between_kw=fill_between_kw,
+            errorbar_kw=errorbar_kw,
+        )
+        self.ax_.set_xlabel(f"{self.param_name}")
+        return self
+
+    @classmethod
+    def from_estimator(
+        cls,
+        estimator,
+        X,
+        y,
+        *,
+        param_name,
+        param_range,
+        groups=None,
+        cv=None,
+        scoring=None,
+        n_jobs=None,
+        pre_dispatch="all",
+        verbose=0,
+        error_score=np.nan,
+        fit_params=None,
+        ax=None,
+        negate_score=False,
+        score_name=None,
+        score_type="both",
+        std_display_style="fill_between",
+        line_kw=None,
+        fill_between_kw=None,
+        errorbar_kw=None,
+    ):
+        """Create a validation curve display from an estimator.
+
+        Read more in the :ref:`User Guide <visualizations>` for general
+        information about the visualization API and :ref:`detailed
+        documentation <validation_curve>` regarding the validation curve
+        visualization.
+
+        Parameters
+        ----------
+        estimator : object type that implements the "fit" and "predict" methods
+            An object of that type which is cloned for each validation.
+
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
+            Target relative to X for classification or regression;
+            None for unsupervised learning.
+
+        param_name : str
+            Name of the parameter that will be varied.
+
+        param_range : array-like of shape (n_values,)
+            The values of the parameter that will be evaluated.
+
+        groups : array-like of shape (n_samples,), default=None
+            Group labels for the samples used while splitting the dataset into
+            train/test set. Only used in conjunction with a "Group" :term:`cv`
+            instance (e.g., :class:`GroupKFold`).
+
+        cv : int, cross-validation generator or an iterable, default=None
+            Determines the cross-validation splitting strategy.
+            Possible inputs for cv are:
+
+            - None, to use the default 5-fold cross validation,
+            - int, to specify the number of folds in a `(Stratified)KFold`,
+            - :term:`CV splitter`,
+            - An iterable yielding (train, test) splits as arrays of indices.
+
+            For int/None inputs, if the estimator is a classifier and `y` is
+            either binary or multiclass,
+            :class:`~sklearn.model_selection.StratifiedKFold` is used. In all
+            other cases, :class:`~sklearn.model_selection.KFold` is used. These
+            splitters are instantiated with `shuffle=False` so the splits will
+            be the same across calls.
+
+            Refer :ref:`User Guide <cross_validation>` for the various
+            cross-validation strategies that can be used here.
+
+        scoring : str or callable, default=None
+            A string (see :ref:`scoring_parameter`) or
+            a scorer callable object / function with signature
+            `scorer(estimator, X, y)` (see :ref:`scoring`).
+
+        n_jobs : int, default=None
+            Number of jobs to run in parallel. Training the estimator and
+            computing the score are parallelized over the different training
+            and test sets. `None` means 1 unless in a
+            :obj:`joblib.parallel_backend` context. `-1` means using all
+            processors. See :term:`Glossary <n_jobs>` for more details.
+
+        pre_dispatch : int or str, default='all'
+            Number of predispatched jobs for parallel execution (default is
+            all). The option can reduce the allocated memory. The str can
+            be an expression like '2*n_jobs'.
+
+        verbose : int, default=0
+            Controls the verbosity: the higher, the more messages.
+
+        error_score : 'raise' or numeric, default=np.nan
+            Value to assign to the score if an error occurs in estimator
+            fitting. If set to 'raise', the error is raised. If a numeric value
+            is given, FitFailedWarning is raised.
+
+        fit_params : dict, default=None
+            Parameters to pass to the fit method of the estimator.
+
+        ax : matplotlib Axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        negate_score : bool, default=False
+            Whether or not to negate the scores obtained through
+            :func:`~sklearn.model_selection.validation_curve`. This is
+            particularly useful when using the error denoted by `neg_*` in
+            `scikit-learn`.
+
+        score_name : str, default=None
+            The name of the score used to decorate the y-axis of the plot. It will
+            override the name inferred from the `scoring` parameter. If `score` is
+            `None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
+            otherwise. If `scoring` is a string or a callable, we infer the name. We
+            replace `_` by spaces and capitalize the first letter. We remove `neg_` and
+            replace it by `"Negative"` if `negate_score` is
+            `False` or just remove it otherwise.
+
+        score_type : {"test", "train", "both"}, default="both"
+            The type of score to plot. Can be one of `"test"`, `"train"`, or
+            `"both"`.
+
+        std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
+            The style used to display the score standard deviation around the
+            mean score. If `None`, no representation of the standard deviation
+            is displayed.
+
+        line_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.plot` used to draw
+            the mean score.
+
+        fill_between_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.fill_between` used
+            to draw the score standard deviation.
+
+        errorbar_kw : dict, default=None
+            Additional keyword arguments passed to the `plt.errorbar` used to
+            draw mean score and standard deviation score.
+
+        Returns
+        -------
+        display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
+            Object that stores computed values.
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> import matplotlib.pyplot as plt
+        >>> from sklearn.datasets import make_classification
+        >>> from sklearn.model_selection import ValidationCurveDisplay
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> X, y = make_classification(n_samples=1_000, random_state=0)
+        >>> logistic_regression = LogisticRegression()
+        >>> param_name, param_range = "C", np.logspace(-8, 3, 10)
+        >>> ValidationCurveDisplay.from_estimator(
+        ...     logistic_regression, X, y, param_name=param_name,
+        ...     param_range=param_range,
+        ... )
+        <...>
+        >>> plt.show()
+        """
+        check_matplotlib_support(f"{cls.__name__}.from_estimator")
+
+        score_name = _validate_score_name(score_name, scoring, negate_score)
+
+        train_scores, test_scores = validation_curve(
+            estimator,
+            X,
+            y,
+            param_name=param_name,
+            param_range=param_range,
+            groups=groups,
+            cv=cv,
+            scoring=scoring,
+            n_jobs=n_jobs,
+            pre_dispatch=pre_dispatch,
+            verbose=verbose,
+            error_score=error_score,
+            fit_params=fit_params,
+        )
+
+        viz = cls(
+            param_name=param_name,
+            param_range=np.asarray(param_range),
+            train_scores=train_scores,
+            test_scores=test_scores,
+            score_name=score_name,
+        )
+        return viz.plot(
+            ax=ax,
+            negate_score=negate_score,
+            score_type=score_type,
+            std_display_style=std_display_style,
+            line_kw=line_kw,
+            fill_between_kw=fill_between_kw,
+            errorbar_kw=errorbar_kw,
+        )
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/_search.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/_search.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/_search_successive_halving.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/_search_successive_halving.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/_split.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/_split.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/init.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/common.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/common.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_classification_threshold.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_classification_threshold.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_plot.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_plot.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_search.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_search.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_split.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_split.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_successive_halving.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_successive_halving.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_validation.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/pycache/test_validation.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/common.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/common.py
@@ -0,0 +1,24 @@
+"""
+Common utilities for testing model selection.
+"""
+
+import numpy as np
+
+from sklearn.model_selection import KFold
+
+
+class OneTimeSplitter:
+    """A wrapper to make KFold single entry cv iterator"""
+
+    def __init__(self, n_splits=4, n_samples=99):
+        self.n_splits = n_splits
+        self.n_samples = n_samples
+        self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
+
+    def split(self, X=None, y=None, groups=None):
+        """Split can be called only once"""
+        for index in self.indices:
+            yield index
+
+    def get_n_splits(self, X=None, y=None, groups=None):
+        return self.n_splits
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_classification_threshold.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_classification_threshold.py
@@ -0,0 +1,684 @@
+import numpy as np
+import pytest
+
+from sklearn.base import clone
+from sklearn.datasets import (
+    load_breast_cancer,
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+)
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import (
+    balanced_accuracy_score,
+    f1_score,
+    fbeta_score,
+    make_scorer,
+    recall_score,
+)
+from sklearn.model_selection import (
+    FixedThresholdClassifier,
+    StratifiedShuffleSplit,
+    TunedThresholdClassifierCV,
+)
+from sklearn.model_selection._classification_threshold import (
+    _CurveScorer,
+    _fit_and_score_over_thresholds,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._mocking import CheckingClassifier
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+
+
+def test_curve_scorer():
+    """Check the behaviour of the `_CurveScorer` class."""
+    X, y = make_classification(random_state=0)
+    estimator = LogisticRegression().fit(X, y)
+    curve_scorer = _CurveScorer(
+        balanced_accuracy_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={},
+    )
+    scores, thresholds = curve_scorer(estimator, X, y)
+
+    assert thresholds.shape == scores.shape
+    # check that the thresholds are probabilities with extreme values close to 0 and 1.
+    # they are not exactly 0 and 1 because they are the extremum of the
+    # `estimator.predict_proba(X)` values.
+    assert 0 <= thresholds.min() <= 0.01
+    assert 0.99 <= thresholds.max() <= 1
+    # balanced accuracy should be between 0.5 and 1 when it is not adjusted
+    assert 0.5 <= scores.min() <= 1
+
+    # check that passing kwargs to the scorer works
+    curve_scorer = _CurveScorer(
+        balanced_accuracy_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={"adjusted": True},
+    )
+    scores, thresholds = curve_scorer(estimator, X, y)
+
+    # balanced accuracy should be between 0.5 and 1 when it is not adjusted
+    assert 0 <= scores.min() <= 0.5
+
+    # check that we can inverse the sign of the score when dealing with `neg_*` scorer
+    curve_scorer = _CurveScorer(
+        balanced_accuracy_score,
+        sign=-1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={"adjusted": True},
+    )
+    scores, thresholds = curve_scorer(estimator, X, y)
+
+    assert all(scores <= 0)
+
+
+def test_curve_scorer_pos_label(global_random_seed):
+    """Check that we propagate properly the `pos_label` parameter to the scorer."""
+    n_samples = 30
+    X, y = make_classification(
+        n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed
+    )
+    estimator = LogisticRegression().fit(X, y)
+
+    curve_scorer = _CurveScorer(
+        recall_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={"pos_label": 1},
+    )
+    scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y)
+
+    curve_scorer = _CurveScorer(
+        recall_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={"pos_label": 0},
+    )
+    scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y)
+
+    # Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal.
+    assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all()
+    # The min-max range for the thresholds is defined by the probabilities of the
+    # `pos_label` class (the column of `predict_proba`).
+    y_pred = estimator.predict_proba(X)
+    assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0])
+    assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0])
+    assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1])
+    assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1])
+
+    # The recall cannot be negative and `pos_label=1` should have a higher recall
+    # since there is less samples to be considered.
+    assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min()
+    assert scores_pos_label_0.max() == pytest.approx(1.0)
+    assert scores_pos_label_1.max() == pytest.approx(1.0)
+
+
+def test_fit_and_score_over_thresholds_curve_scorers():
+    """Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order
+    for the different accepted curve scorers."""
+    X, y = make_classification(n_samples=100, random_state=0)
+    train_idx, val_idx = np.arange(50), np.arange(50, 100)
+    classifier = LogisticRegression()
+
+    curve_scorer = _CurveScorer(
+        score_func=balanced_accuracy_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={},
+    )
+    scores, thresholds = _fit_and_score_over_thresholds(
+        classifier,
+        X,
+        y,
+        fit_params={},
+        train_idx=train_idx,
+        val_idx=val_idx,
+        curve_scorer=curve_scorer,
+        score_params={},
+    )
+
+    assert np.all(thresholds[:-1] <= thresholds[1:])
+    assert isinstance(scores, np.ndarray)
+    assert np.logical_and(scores >= 0, scores <= 1).all()
+
+
+def test_fit_and_score_over_thresholds_prefit():
+    """Check the behaviour with a prefit classifier."""
+    X, y = make_classification(n_samples=100, random_state=0)
+
+    # `train_idx is None` to indicate that the classifier is prefit
+    train_idx, val_idx = None, np.arange(50, 100)
+    classifier = DecisionTreeClassifier(random_state=0).fit(X, y)
+    # make sure that the classifier memorized the full dataset such that
+    # we get perfect predictions and thus match the expected score
+    assert classifier.score(X[val_idx], y[val_idx]) == pytest.approx(1.0)
+
+    curve_scorer = _CurveScorer(
+        score_func=balanced_accuracy_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=2,
+        kwargs={},
+    )
+    scores, thresholds = _fit_and_score_over_thresholds(
+        classifier,
+        X,
+        y,
+        fit_params={},
+        train_idx=train_idx,
+        val_idx=val_idx,
+        curve_scorer=curve_scorer,
+        score_params={},
+    )
+    assert np.all(thresholds[:-1] <= thresholds[1:])
+    assert_allclose(scores, [0.5, 1.0])
+
+
+@pytest.mark.usefixtures("enable_slep006")
+def test_fit_and_score_over_thresholds_sample_weight():
+    """Check that we dispatch the sample-weight to fit and score the classifier."""
+    X, y = load_iris(return_X_y=True)
+    X, y = X[:100], y[:100]  # only 2 classes
+
+    # create a dataset and repeat twice the sample of class #0
+    X_repeated, y_repeated = np.vstack([X, X[y == 0]]), np.hstack([y, y[y == 0]])
+    # create a sample weight vector that is equivalent to the repeated dataset
+    sample_weight = np.ones_like(y)
+    sample_weight[:50] *= 2
+
+    classifier = LogisticRegression()
+    train_repeated_idx = np.arange(X_repeated.shape[0])
+    val_repeated_idx = np.arange(X_repeated.shape[0])
+    curve_scorer = _CurveScorer(
+        score_func=balanced_accuracy_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={},
+    )
+    scores_repeated, thresholds_repeated = _fit_and_score_over_thresholds(
+        classifier,
+        X_repeated,
+        y_repeated,
+        fit_params={},
+        train_idx=train_repeated_idx,
+        val_idx=val_repeated_idx,
+        curve_scorer=curve_scorer,
+        score_params={},
+    )
+
+    train_idx, val_idx = np.arange(X.shape[0]), np.arange(X.shape[0])
+    scores, thresholds = _fit_and_score_over_thresholds(
+        classifier.set_fit_request(sample_weight=True),
+        X,
+        y,
+        fit_params={"sample_weight": sample_weight},
+        train_idx=train_idx,
+        val_idx=val_idx,
+        curve_scorer=curve_scorer.set_score_request(sample_weight=True),
+        score_params={"sample_weight": sample_weight},
+    )
+
+    assert_allclose(thresholds_repeated, thresholds)
+    assert_allclose(scores_repeated, scores)
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize("fit_params_type", ["list", "array"])
+def test_fit_and_score_over_thresholds_fit_params(fit_params_type):
+    """Check that we pass `fit_params` to the classifier when calling `fit`."""
+    X, y = make_classification(n_samples=100, random_state=0)
+    fit_params = {
+        "a": _convert_container(y, fit_params_type),
+        "b": _convert_container(y, fit_params_type),
+    }
+
+    classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
+    classifier.set_fit_request(a=True, b=True)
+    train_idx, val_idx = np.arange(50), np.arange(50, 100)
+
+    curve_scorer = _CurveScorer(
+        score_func=balanced_accuracy_score,
+        sign=1,
+        response_method="predict_proba",
+        thresholds=10,
+        kwargs={},
+    )
+    _fit_and_score_over_thresholds(
+        classifier,
+        X,
+        y,
+        fit_params=fit_params,
+        train_idx=train_idx,
+        val_idx=val_idx,
+        curve_scorer=curve_scorer,
+        score_params={},
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        make_classification(n_classes=3, n_clusters_per_class=1, random_state=0),
+        make_multilabel_classification(random_state=0),
+    ],
+)
+def test_tuned_threshold_classifier_no_binary(data):
+    """Check that we raise an informative error message for non-binary problem."""
+    err_msg = "Only binary classification is supported."
+    with pytest.raises(ValueError, match=err_msg):
+        TunedThresholdClassifierCV(LogisticRegression()).fit(*data)
+
+
+@pytest.mark.parametrize(
+    "params, err_type, err_msg",
+    [
+        (
+            {"cv": "prefit", "refit": True},
+            ValueError,
+            "When cv='prefit', refit cannot be True.",
+        ),
+        (
+            {"cv": 10, "refit": False},
+            ValueError,
+            "When cv has several folds, refit cannot be False.",
+        ),
+        (
+            {"cv": "prefit", "refit": False},
+            NotFittedError,
+            "`estimator` must be fitted.",
+        ),
+    ],
+)
+def test_tuned_threshold_classifier_conflict_cv_refit(params, err_type, err_msg):
+    """Check that we raise an informative error message when `cv` and `refit`
+    cannot be used together.
+    """
+    X, y = make_classification(n_samples=100, random_state=0)
+    with pytest.raises(err_type, match=err_msg):
+        TunedThresholdClassifierCV(LogisticRegression(), **params).fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [LogisticRegression(), SVC(), GradientBoostingClassifier(n_estimators=4)],
+)
+@pytest.mark.parametrize(
+    "response_method", ["predict_proba", "predict_log_proba", "decision_function"]
+)
+@pytest.mark.parametrize(
+    "ThresholdClassifier", [FixedThresholdClassifier, TunedThresholdClassifierCV]
+)
+def test_threshold_classifier_estimator_response_methods(
+    ThresholdClassifier, estimator, response_method
+):
+    """Check that `TunedThresholdClassifierCV` exposes the same response methods as the
+    underlying estimator.
+    """
+    X, y = make_classification(n_samples=100, random_state=0)
+
+    model = ThresholdClassifier(estimator=estimator)
+    assert hasattr(model, response_method) == hasattr(estimator, response_method)
+
+    model.fit(X, y)
+    assert hasattr(model, response_method) == hasattr(estimator, response_method)
+
+    if hasattr(model, response_method):
+        y_pred_cutoff = getattr(model, response_method)(X)
+        y_pred_underlying_estimator = getattr(model.estimator_, response_method)(X)
+
+        assert_allclose(y_pred_cutoff, y_pred_underlying_estimator)
+
+
+@pytest.mark.parametrize(
+    "response_method", ["auto", "decision_function", "predict_proba"]
+)
+def test_tuned_threshold_classifier_without_constraint_value(response_method):
+    """Check that `TunedThresholdClassifierCV` is optimizing a given objective
+    metric."""
+    X, y = load_breast_cancer(return_X_y=True)
+    # remove feature to degrade performances
+    X = X[:, :5]
+
+    # make the problem completely imbalanced such that the balanced accuracy is low
+    indices_pos = np.flatnonzero(y == 1)
+    indices_pos = indices_pos[: indices_pos.size // 50]
+    indices_neg = np.flatnonzero(y == 0)
+
+    X = np.vstack([X[indices_neg], X[indices_pos]])
+    y = np.hstack([y[indices_neg], y[indices_pos]])
+
+    lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
+    thresholds = 100
+    model = TunedThresholdClassifierCV(
+        estimator=lr,
+        scoring="balanced_accuracy",
+        response_method=response_method,
+        thresholds=thresholds,
+        store_cv_results=True,
+    )
+    score_optimized = balanced_accuracy_score(y, model.fit(X, y).predict(X))
+    score_baseline = balanced_accuracy_score(y, lr.predict(X))
+    assert score_optimized > score_baseline
+    assert model.cv_results_["thresholds"].shape == (thresholds,)
+    assert model.cv_results_["scores"].shape == (thresholds,)
+
+
+def test_tuned_threshold_classifier_metric_with_parameter():
+    """Check that we can pass a metric with a parameter in addition check that
+    `f_beta` with `beta=1` is equivalent to `f1` and different from `f_beta` with
+    `beta=2`.
+    """
+    X, y = load_breast_cancer(return_X_y=True)
+    lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
+    model_fbeta_1 = TunedThresholdClassifierCV(
+        estimator=lr, scoring=make_scorer(fbeta_score, beta=1)
+    ).fit(X, y)
+    model_fbeta_2 = TunedThresholdClassifierCV(
+        estimator=lr, scoring=make_scorer(fbeta_score, beta=2)
+    ).fit(X, y)
+    model_f1 = TunedThresholdClassifierCV(
+        estimator=lr, scoring=make_scorer(f1_score)
+    ).fit(X, y)
+
+    assert model_fbeta_1.best_threshold_ == pytest.approx(model_f1.best_threshold_)
+    assert model_fbeta_1.best_threshold_ != pytest.approx(model_fbeta_2.best_threshold_)
+
+
+@pytest.mark.parametrize(
+    "response_method", ["auto", "decision_function", "predict_proba"]
+)
+@pytest.mark.parametrize(
+    "metric",
+    [
+        make_scorer(balanced_accuracy_score),
+        make_scorer(f1_score, pos_label="cancer"),
+    ],
+)
+def test_tuned_threshold_classifier_with_string_targets(response_method, metric):
+    """Check that targets represented by str are properly managed.
+    Also, check with several metrics to be sure that `pos_label` is properly
+    dispatched.
+    """
+    X, y = load_breast_cancer(return_X_y=True)
+    # Encode numeric targets by meaningful strings. We purposely designed the class
+    # names such that the `pos_label` is the first alphabetically sorted class and thus
+    # encoded as 0.
+    classes = np.array(["cancer", "healthy"], dtype=object)
+    y = classes[y]
+    model = TunedThresholdClassifierCV(
+        estimator=make_pipeline(StandardScaler(), LogisticRegression()),
+        scoring=metric,
+        response_method=response_method,
+        thresholds=100,
+    ).fit(X, y)
+    assert_array_equal(model.classes_, np.sort(classes))
+    y_pred = model.predict(X)
+    assert_array_equal(np.unique(y_pred), np.sort(classes))
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize("with_sample_weight", [True, False])
+def test_tuned_threshold_classifier_refit(with_sample_weight, global_random_seed):
+    """Check the behaviour of the `refit` parameter."""
+    rng = np.random.RandomState(global_random_seed)
+    X, y = make_classification(n_samples=100, random_state=0)
+    if with_sample_weight:
+        sample_weight = rng.randn(X.shape[0])
+        sample_weight = np.abs(sample_weight, out=sample_weight)
+    else:
+        sample_weight = None
+
+    # check that `estimator_` if fitted on the full dataset when `refit=True`
+    estimator = LogisticRegression().set_fit_request(sample_weight=True)
+    model = TunedThresholdClassifierCV(estimator, refit=True).fit(
+        X, y, sample_weight=sample_weight
+    )
+
+    assert model.estimator_ is not estimator
+    estimator.fit(X, y, sample_weight=sample_weight)
+    assert_allclose(model.estimator_.coef_, estimator.coef_)
+    assert_allclose(model.estimator_.intercept_, estimator.intercept_)
+
+    # check that `estimator_` was not altered when `refit=False` and `cv="prefit"`
+    estimator = LogisticRegression().set_fit_request(sample_weight=True)
+    estimator.fit(X, y, sample_weight=sample_weight)
+    coef = estimator.coef_.copy()
+    model = TunedThresholdClassifierCV(estimator, cv="prefit", refit=False).fit(
+        X, y, sample_weight=sample_weight
+    )
+
+    assert model.estimator_ is estimator
+    assert_allclose(model.estimator_.coef_, coef)
+
+    # check that we train `estimator_` on the training split of a given cross-validation
+    estimator = LogisticRegression().set_fit_request(sample_weight=True)
+    cv = [
+        (np.arange(50), np.arange(50, 100)),
+    ]  # single split
+    model = TunedThresholdClassifierCV(estimator, cv=cv, refit=False).fit(
+        X, y, sample_weight=sample_weight
+    )
+
+    assert model.estimator_ is not estimator
+    if with_sample_weight:
+        sw_train = sample_weight[cv[0][0]]
+    else:
+        sw_train = None
+    estimator.fit(X[cv[0][0]], y[cv[0][0]], sample_weight=sw_train)
+    assert_allclose(model.estimator_.coef_, estimator.coef_)
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize("fit_params_type", ["list", "array"])
+def test_tuned_threshold_classifier_fit_params(fit_params_type):
+    """Check that we pass `fit_params` to the classifier when calling `fit`."""
+    X, y = make_classification(n_samples=100, random_state=0)
+    fit_params = {
+        "a": _convert_container(y, fit_params_type),
+        "b": _convert_container(y, fit_params_type),
+    }
+
+    classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
+    classifier.set_fit_request(a=True, b=True)
+    model = TunedThresholdClassifierCV(classifier)
+    model.fit(X, y, **fit_params)
+
+
+@pytest.mark.usefixtures("enable_slep006")
+def test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence():
+    """Check that passing removing some sample from the dataset `X` is
+    equivalent to passing a `sample_weight` with a factor 0."""
+    X, y = load_iris(return_X_y=True)
+    # Scale the data to avoid any convergence issue
+    X = StandardScaler().fit_transform(X)
+    # Only use 2 classes and select samples such that 2-fold cross-validation
+    # split will lead to an equivalence with a `sample_weight` of 0
+    X = np.vstack((X[:40], X[50:90]))
+    y = np.hstack((y[:40], y[50:90]))
+    sample_weight = np.zeros_like(y)
+    sample_weight[::2] = 1
+
+    estimator = LogisticRegression().set_fit_request(sample_weight=True)
+    model_without_weights = TunedThresholdClassifierCV(estimator, cv=2)
+    model_with_weights = clone(model_without_weights)
+
+    model_with_weights.fit(X, y, sample_weight=sample_weight)
+    model_without_weights.fit(X[::2], y[::2])
+
+    assert_allclose(
+        model_with_weights.estimator_.coef_, model_without_weights.estimator_.coef_
+    )
+
+    y_pred_with_weights = model_with_weights.predict_proba(X)
+    y_pred_without_weights = model_without_weights.predict_proba(X)
+    assert_allclose(y_pred_with_weights, y_pred_without_weights)
+
+
+def test_tuned_threshold_classifier_thresholds_array():
+    """Check that we can pass an array to `thresholds` and it is used as candidate
+    threshold internally."""
+    X, y = make_classification(random_state=0)
+    estimator = LogisticRegression()
+    thresholds = np.linspace(0, 1, 11)
+    tuned_model = TunedThresholdClassifierCV(
+        estimator,
+        thresholds=thresholds,
+        response_method="predict_proba",
+        store_cv_results=True,
+    ).fit(X, y)
+    assert_allclose(tuned_model.cv_results_["thresholds"], thresholds)
+
+
+@pytest.mark.parametrize("store_cv_results", [True, False])
+def test_tuned_threshold_classifier_store_cv_results(store_cv_results):
+    """Check that if `cv_results_` exists depending on `store_cv_results`."""
+    X, y = make_classification(random_state=0)
+    estimator = LogisticRegression()
+    tuned_model = TunedThresholdClassifierCV(
+        estimator, store_cv_results=store_cv_results
+    ).fit(X, y)
+    if store_cv_results:
+        assert hasattr(tuned_model, "cv_results_")
+    else:
+        assert not hasattr(tuned_model, "cv_results_")
+
+
+def test_tuned_threshold_classifier_cv_float():
+    """Check the behaviour when `cv` is set to a float."""
+    X, y = make_classification(random_state=0)
+
+    # case where `refit=False` and cv is a float: the underlying estimator will be fit
+    # on the training set given by a ShuffleSplit. We check that we get the same model
+    # coefficients.
+    test_size = 0.3
+    estimator = LogisticRegression()
+    tuned_model = TunedThresholdClassifierCV(
+        estimator, cv=test_size, refit=False, random_state=0
+    ).fit(X, y)
+    tuned_model.fit(X, y)
+
+    cv = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=0)
+    train_idx, val_idx = next(cv.split(X, y))
+    cloned_estimator = clone(estimator).fit(X[train_idx], y[train_idx])
+
+    assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
+
+    # case where `refit=True`, then the underlying estimator is fitted on the full
+    # dataset.
+    tuned_model.set_params(refit=True).fit(X, y)
+    cloned_estimator = clone(estimator).fit(X, y)
+
+    assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
+
+
+def test_tuned_threshold_classifier_error_constant_predictor():
+    """Check that we raise a ValueError if the underlying classifier returns constant
+    probabilities such that we cannot find any threshold.
+    """
+    X, y = make_classification(random_state=0)
+    estimator = DummyClassifier(strategy="constant", constant=1)
+    tuned_model = TunedThresholdClassifierCV(estimator, response_method="predict_proba")
+    err_msg = "The provided estimator makes constant predictions"
+    with pytest.raises(ValueError, match=err_msg):
+        tuned_model.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "response_method", ["auto", "predict_proba", "decision_function"]
+)
+def test_fixed_threshold_classifier_equivalence_default(response_method):
+    """Check that `FixedThresholdClassifier` has the same behaviour as the vanilla
+    classifier.
+    """
+    X, y = make_classification(random_state=0)
+    classifier = LogisticRegression().fit(X, y)
+    classifier_default_threshold = FixedThresholdClassifier(
+        estimator=clone(classifier), response_method=response_method
+    )
+    classifier_default_threshold.fit(X, y)
+
+    # emulate the response method that should take into account the `pos_label`
+    if response_method in ("auto", "predict_proba"):
+        y_score = classifier_default_threshold.predict_proba(X)[:, 1]
+        threshold = 0.5
+    else:  # response_method == "decision_function"
+        y_score = classifier_default_threshold.decision_function(X)
+        threshold = 0.0
+
+    y_pred_lr = (y_score >= threshold).astype(int)
+    assert_allclose(classifier_default_threshold.predict(X), y_pred_lr)
+
+
+@pytest.mark.parametrize(
+    "response_method, threshold", [("predict_proba", 0.7), ("decision_function", 2.0)]
+)
+@pytest.mark.parametrize("pos_label", [0, 1])
+def test_fixed_threshold_classifier(response_method, threshold, pos_label):
+    """Check that applying `predict` lead to the same prediction as applying the
+    threshold to the output of the response method.
+    """
+    X, y = make_classification(n_samples=50, random_state=0)
+    logistic_regression = LogisticRegression().fit(X, y)
+    model = FixedThresholdClassifier(
+        estimator=clone(logistic_regression),
+        threshold=threshold,
+        response_method=response_method,
+        pos_label=pos_label,
+    ).fit(X, y)
+
+    # check that the underlying estimator is the same
+    assert_allclose(model.estimator_.coef_, logistic_regression.coef_)
+
+    # emulate the response method that should take into account the `pos_label`
+    if response_method == "predict_proba":
+        y_score = model.predict_proba(X)[:, pos_label]
+    else:  # response_method == "decision_function"
+        y_score = model.decision_function(X)
+        y_score = y_score if pos_label == 1 else -y_score
+
+    # create a mapping from boolean values to class labels
+    map_to_label = np.array([0, 1]) if pos_label == 1 else np.array([1, 0])
+    y_pred_lr = map_to_label[(y_score >= threshold).astype(int)]
+    assert_allclose(model.predict(X), y_pred_lr)
+
+    for method in ("predict_proba", "predict_log_proba", "decision_function"):
+        assert_allclose(
+            getattr(model, method)(X), getattr(logistic_regression, method)(X)
+        )
+        assert_allclose(
+            getattr(model.estimator_, method)(X),
+            getattr(logistic_regression, method)(X),
+        )
+
+
+@pytest.mark.usefixtures("enable_slep006")
+def test_fixed_threshold_classifier_metadata_routing():
+    """Check that everything works with metadata routing."""
+    X, y = make_classification(random_state=0)
+    sample_weight = np.ones_like(y)
+    sample_weight[::2] = 2
+    classifier = LogisticRegression().set_fit_request(sample_weight=True)
+    classifier.fit(X, y, sample_weight=sample_weight)
+    classifier_default_threshold = FixedThresholdClassifier(estimator=clone(classifier))
+    classifier_default_threshold.fit(X, y, sample_weight=sample_weight)
+    assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_plot.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_plot.py
@@ -0,0 +1,572 @@
+import numpy as np
+import pytest
+
+from sklearn.datasets import load_iris
+from sklearn.model_selection import (
+    LearningCurveDisplay,
+    ValidationCurveDisplay,
+    learning_curve,
+    validation_curve,
+)
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils import shuffle
+from sklearn.utils._testing import assert_allclose, assert_array_equal
+
+
+@pytest.fixture
+def data():
+    return shuffle(*load_iris(return_X_y=True), random_state=0)
+
+
+@pytest.mark.parametrize(
+    "params, err_type, err_msg",
+    [
+        ({"std_display_style": "invalid"}, ValueError, "Unknown std_display_style:"),
+        ({"score_type": "invalid"}, ValueError, "Unknown score_type:"),
+    ],
+)
+@pytest.mark.parametrize(
+    "CurveDisplay, specific_params",
+    [
+        (ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
+        (LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
+    ],
+)
+def test_curve_display_parameters_validation(
+    pyplot, data, params, err_type, err_msg, CurveDisplay, specific_params
+):
+    """Check that we raise a proper error when passing invalid parameters."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    with pytest.raises(err_type, match=err_msg):
+        CurveDisplay.from_estimator(estimator, X, y, **specific_params, **params)
+
+
+def test_learning_curve_display_default_usage(pyplot, data):
+    """Check the default usage of the LearningCurveDisplay class."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    train_sizes = [0.3, 0.6, 0.9]
+    display = LearningCurveDisplay.from_estimator(
+        estimator, X, y, train_sizes=train_sizes
+    )
+
+    import matplotlib as mpl
+
+    assert display.errorbar_ is None
+
+    assert isinstance(display.lines_, list)
+    for line in display.lines_:
+        assert isinstance(line, mpl.lines.Line2D)
+
+    assert isinstance(display.fill_between_, list)
+    for fill in display.fill_between_:
+        assert isinstance(fill, mpl.collections.PolyCollection)
+        assert fill.get_alpha() == 0.5
+
+    assert display.score_name == "Score"
+    assert display.ax_.get_xlabel() == "Number of samples in the training set"
+    assert display.ax_.get_ylabel() == "Score"
+
+    _, legend_labels = display.ax_.get_legend_handles_labels()
+    assert legend_labels == ["Train", "Test"]
+
+    train_sizes_abs, train_scores, test_scores = learning_curve(
+        estimator, X, y, train_sizes=train_sizes
+    )
+
+    assert_array_equal(display.train_sizes, train_sizes_abs)
+    assert_allclose(display.train_scores, train_scores)
+    assert_allclose(display.test_scores, test_scores)
+
+
+def test_validation_curve_display_default_usage(pyplot, data):
+    """Check the default usage of the ValidationCurveDisplay class."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    param_name, param_range = "max_depth", [1, 3, 5]
+    display = ValidationCurveDisplay.from_estimator(
+        estimator, X, y, param_name=param_name, param_range=param_range
+    )
+
+    import matplotlib as mpl
+
+    assert display.errorbar_ is None
+
+    assert isinstance(display.lines_, list)
+    for line in display.lines_:
+        assert isinstance(line, mpl.lines.Line2D)
+
+    assert isinstance(display.fill_between_, list)
+    for fill in display.fill_between_:
+        assert isinstance(fill, mpl.collections.PolyCollection)
+        assert fill.get_alpha() == 0.5
+
+    assert display.score_name == "Score"
+    assert display.ax_.get_xlabel() == f"{param_name}"
+    assert display.ax_.get_ylabel() == "Score"
+
+    _, legend_labels = display.ax_.get_legend_handles_labels()
+    assert legend_labels == ["Train", "Test"]
+
+    train_scores, test_scores = validation_curve(
+        estimator, X, y, param_name=param_name, param_range=param_range
+    )
+
+    assert_array_equal(display.param_range, param_range)
+    assert_allclose(display.train_scores, train_scores)
+    assert_allclose(display.test_scores, test_scores)
+
+
+@pytest.mark.parametrize(
+    "CurveDisplay, specific_params",
+    [
+        (ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
+        (LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
+    ],
+)
+def test_curve_display_negate_score(pyplot, data, CurveDisplay, specific_params):
+    """Check the behaviour of the `negate_score` parameter calling `from_estimator` and
+    `plot`.
+    """
+    X, y = data
+    estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
+
+    negate_score = False
+    display = CurveDisplay.from_estimator(
+        estimator, X, y, **specific_params, negate_score=negate_score
+    )
+
+    positive_scores = display.lines_[0].get_data()[1]
+    assert (positive_scores >= 0).all()
+    assert display.ax_.get_ylabel() == "Score"
+
+    negate_score = True
+    display = CurveDisplay.from_estimator(
+        estimator, X, y, **specific_params, negate_score=negate_score
+    )
+
+    negative_scores = display.lines_[0].get_data()[1]
+    assert (negative_scores <= 0).all()
+    assert_allclose(negative_scores, -positive_scores)
+    assert display.ax_.get_ylabel() == "Negative score"
+
+    negate_score = False
+    display = CurveDisplay.from_estimator(
+        estimator, X, y, **specific_params, negate_score=negate_score
+    )
+    assert display.ax_.get_ylabel() == "Score"
+    display.plot(negate_score=not negate_score)
+    assert display.ax_.get_ylabel() == "Score"
+    assert (display.lines_[0].get_data()[1] < 0).all()
+
+
+@pytest.mark.parametrize(
+    "score_name, ylabel", [(None, "Score"), ("Accuracy", "Accuracy")]
+)
+@pytest.mark.parametrize(
+    "CurveDisplay, specific_params",
+    [
+        (ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
+        (LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
+    ],
+)
+def test_curve_display_score_name(
+    pyplot, data, score_name, ylabel, CurveDisplay, specific_params
+):
+    """Check that we can overwrite the default score name shown on the y-axis."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    display = CurveDisplay.from_estimator(
+        estimator, X, y, **specific_params, score_name=score_name
+    )
+
+    assert display.ax_.get_ylabel() == ylabel
+    X, y = data
+    estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
+
+    display = CurveDisplay.from_estimator(
+        estimator, X, y, **specific_params, score_name=score_name
+    )
+
+    assert display.score_name == ylabel
+
+
+@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
+def test_learning_curve_display_score_type(pyplot, data, std_display_style):
+    """Check the behaviour of setting the `score_type` parameter."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    train_sizes = [0.3, 0.6, 0.9]
+    train_sizes_abs, train_scores, test_scores = learning_curve(
+        estimator, X, y, train_sizes=train_sizes
+    )
+
+    score_type = "train"
+    display = LearningCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        train_sizes=train_sizes,
+        score_type=score_type,
+        std_display_style=std_display_style,
+    )
+
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert legend_label == ["Train"]
+
+    if std_display_style is None:
+        assert len(display.lines_) == 1
+        assert display.errorbar_ is None
+        x_data, y_data = display.lines_[0].get_data()
+    else:
+        assert display.lines_ is None
+        assert len(display.errorbar_) == 1
+        x_data, y_data = display.errorbar_[0].lines[0].get_data()
+
+    assert_array_equal(x_data, train_sizes_abs)
+    assert_allclose(y_data, train_scores.mean(axis=1))
+
+    score_type = "test"
+    display = LearningCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        train_sizes=train_sizes,
+        score_type=score_type,
+        std_display_style=std_display_style,
+    )
+
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert legend_label == ["Test"]
+
+    if std_display_style is None:
+        assert len(display.lines_) == 1
+        assert display.errorbar_ is None
+        x_data, y_data = display.lines_[0].get_data()
+    else:
+        assert display.lines_ is None
+        assert len(display.errorbar_) == 1
+        x_data, y_data = display.errorbar_[0].lines[0].get_data()
+
+    assert_array_equal(x_data, train_sizes_abs)
+    assert_allclose(y_data, test_scores.mean(axis=1))
+
+    score_type = "both"
+    display = LearningCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        train_sizes=train_sizes,
+        score_type=score_type,
+        std_display_style=std_display_style,
+    )
+
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert legend_label == ["Train", "Test"]
+
+    if std_display_style is None:
+        assert len(display.lines_) == 2
+        assert display.errorbar_ is None
+        x_data_train, y_data_train = display.lines_[0].get_data()
+        x_data_test, y_data_test = display.lines_[1].get_data()
+    else:
+        assert display.lines_ is None
+        assert len(display.errorbar_) == 2
+        x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
+        x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
+
+    assert_array_equal(x_data_train, train_sizes_abs)
+    assert_allclose(y_data_train, train_scores.mean(axis=1))
+    assert_array_equal(x_data_test, train_sizes_abs)
+    assert_allclose(y_data_test, test_scores.mean(axis=1))
+
+
+@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
+def test_validation_curve_display_score_type(pyplot, data, std_display_style):
+    """Check the behaviour of setting the `score_type` parameter."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    param_name, param_range = "max_depth", [1, 3, 5]
+    train_scores, test_scores = validation_curve(
+        estimator, X, y, param_name=param_name, param_range=param_range
+    )
+
+    score_type = "train"
+    display = ValidationCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        param_name=param_name,
+        param_range=param_range,
+        score_type=score_type,
+        std_display_style=std_display_style,
+    )
+
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert legend_label == ["Train"]
+
+    if std_display_style is None:
+        assert len(display.lines_) == 1
+        assert display.errorbar_ is None
+        x_data, y_data = display.lines_[0].get_data()
+    else:
+        assert display.lines_ is None
+        assert len(display.errorbar_) == 1
+        x_data, y_data = display.errorbar_[0].lines[0].get_data()
+
+    assert_array_equal(x_data, param_range)
+    assert_allclose(y_data, train_scores.mean(axis=1))
+
+    score_type = "test"
+    display = ValidationCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        param_name=param_name,
+        param_range=param_range,
+        score_type=score_type,
+        std_display_style=std_display_style,
+    )
+
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert legend_label == ["Test"]
+
+    if std_display_style is None:
+        assert len(display.lines_) == 1
+        assert display.errorbar_ is None
+        x_data, y_data = display.lines_[0].get_data()
+    else:
+        assert display.lines_ is None
+        assert len(display.errorbar_) == 1
+        x_data, y_data = display.errorbar_[0].lines[0].get_data()
+
+    assert_array_equal(x_data, param_range)
+    assert_allclose(y_data, test_scores.mean(axis=1))
+
+    score_type = "both"
+    display = ValidationCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        param_name=param_name,
+        param_range=param_range,
+        score_type=score_type,
+        std_display_style=std_display_style,
+    )
+
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert legend_label == ["Train", "Test"]
+
+    if std_display_style is None:
+        assert len(display.lines_) == 2
+        assert display.errorbar_ is None
+        x_data_train, y_data_train = display.lines_[0].get_data()
+        x_data_test, y_data_test = display.lines_[1].get_data()
+    else:
+        assert display.lines_ is None
+        assert len(display.errorbar_) == 2
+        x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
+        x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
+
+    assert_array_equal(x_data_train, param_range)
+    assert_allclose(y_data_train, train_scores.mean(axis=1))
+    assert_array_equal(x_data_test, param_range)
+    assert_allclose(y_data_test, test_scores.mean(axis=1))
+
+
+@pytest.mark.parametrize(
+    "CurveDisplay, specific_params, expected_xscale",
+    [
+        (
+            ValidationCurveDisplay,
+            {"param_name": "max_depth", "param_range": np.arange(1, 5)},
+            "linear",
+        ),
+        (LearningCurveDisplay, {"train_sizes": np.linspace(0.1, 0.9, num=5)}, "linear"),
+        (
+            ValidationCurveDisplay,
+            {
+                "param_name": "max_depth",
+                "param_range": np.round(np.logspace(0, 2, num=5)).astype(np.int64),
+            },
+            "log",
+        ),
+        (LearningCurveDisplay, {"train_sizes": np.logspace(-1, 0, num=5)}, "log"),
+    ],
+)
+def test_curve_display_xscale_auto(
+    pyplot, data, CurveDisplay, specific_params, expected_xscale
+):
+    """Check the behaviour of the x-axis scaling depending on the data provided."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    display = CurveDisplay.from_estimator(estimator, X, y, **specific_params)
+    assert display.ax_.get_xscale() == expected_xscale
+
+
+@pytest.mark.parametrize(
+    "CurveDisplay, specific_params",
+    [
+        (ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
+        (LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
+    ],
+)
+def test_curve_display_std_display_style(pyplot, data, CurveDisplay, specific_params):
+    """Check the behaviour of the parameter `std_display_style`."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    import matplotlib as mpl
+
+    std_display_style = None
+    display = CurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        **specific_params,
+        std_display_style=std_display_style,
+    )
+
+    assert len(display.lines_) == 2
+    for line in display.lines_:
+        assert isinstance(line, mpl.lines.Line2D)
+    assert display.errorbar_ is None
+    assert display.fill_between_ is None
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert len(legend_label) == 2
+
+    std_display_style = "fill_between"
+    display = CurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        **specific_params,
+        std_display_style=std_display_style,
+    )
+
+    assert len(display.lines_) == 2
+    for line in display.lines_:
+        assert isinstance(line, mpl.lines.Line2D)
+    assert display.errorbar_ is None
+    assert len(display.fill_between_) == 2
+    for fill_between in display.fill_between_:
+        assert isinstance(fill_between, mpl.collections.PolyCollection)
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert len(legend_label) == 2
+
+    std_display_style = "errorbar"
+    display = CurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        **specific_params,
+        std_display_style=std_display_style,
+    )
+
+    assert display.lines_ is None
+    assert len(display.errorbar_) == 2
+    for errorbar in display.errorbar_:
+        assert isinstance(errorbar, mpl.container.ErrorbarContainer)
+    assert display.fill_between_ is None
+    _, legend_label = display.ax_.get_legend_handles_labels()
+    assert len(legend_label) == 2
+
+
+@pytest.mark.parametrize(
+    "CurveDisplay, specific_params",
+    [
+        (ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
+        (LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
+    ],
+)
+def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
+    """Check the behaviour of the different plotting keyword arguments: `line_kw`,
+    `fill_between_kw`, and `errorbar_kw`."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    std_display_style = "fill_between"
+    line_kw = {"color": "red"}
+    fill_between_kw = {"color": "red", "alpha": 1.0}
+    display = CurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        **specific_params,
+        std_display_style=std_display_style,
+        line_kw=line_kw,
+        fill_between_kw=fill_between_kw,
+    )
+
+    assert display.lines_[0].get_color() == "red"
+    assert_allclose(
+        display.fill_between_[0].get_facecolor(),
+        [[1.0, 0.0, 0.0, 1.0]],  # trust me, it's red
+    )
+
+    std_display_style = "errorbar"
+    errorbar_kw = {"color": "red"}
+    display = CurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        **specific_params,
+        std_display_style=std_display_style,
+        errorbar_kw=errorbar_kw,
+    )
+
+    assert display.errorbar_[0].lines[0].get_color() == "red"
+
+
+@pytest.mark.parametrize(
+    "param_range, xscale",
+    [([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
+)
+def test_validation_curve_xscale_from_param_range_provided_as_a_list(
+    pyplot, data, param_range, xscale
+):
+    """Check the induced xscale from the provided param_range values."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    param_name = "max_depth"
+    display = ValidationCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        param_name=param_name,
+        param_range=param_range,
+    )
+
+    assert display.ax_.get_xscale() == xscale
+
+
+@pytest.mark.parametrize(
+    "Display, params",
+    [
+        (LearningCurveDisplay, {}),
+        (ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
+    ],
+)
+def test_subclassing_displays(pyplot, data, Display, params):
+    """Check that named constructors return the correct type when subclassed.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/pull/27675
+    """
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    class SubclassOfDisplay(Display):
+        pass
+
+    display = SubclassOfDisplay.from_estimator(estimator, X, y, **params)
+    assert isinstance(display, SubclassOfDisplay)
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_search.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_search.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_split.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_split.py
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_successive_halving.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_successive_halving.py
@@ -0,0 +1,856 @@
+from math import ceil
+
+import numpy as np
+import pytest
+from scipy.stats import expon, norm, randint
+
+from sklearn.datasets import make_classification
+from sklearn.dummy import DummyClassifier
+from sklearn.experimental import enable_halving_search_cv  # noqa
+from sklearn.model_selection import (
+    GroupKFold,
+    GroupShuffleSplit,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    KFold,
+    LeaveOneGroupOut,
+    LeavePGroupsOut,
+    ShuffleSplit,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+)
+from sklearn.model_selection._search_successive_halving import (
+    _SubsampleMetaSplitter,
+    _top_k,
+)
+from sklearn.model_selection.tests.test_search import (
+    check_cv_results_array_types,
+    check_cv_results_keys,
+)
+from sklearn.svm import SVC, LinearSVC
+
+
+class FastClassifier(DummyClassifier):
+    """Dummy classifier that accepts parameters a, b, ... z.
+
+    These parameter don't affect the predictions and are useful for fast
+    grid searching."""
+
+    # update the constraints such that we accept all parameters from a to z
+    _parameter_constraints: dict = {
+        **DummyClassifier._parameter_constraints,
+        **{
+            chr(key): "no_validation"  # type: ignore
+            for key in range(ord("a"), ord("z") + 1)
+        },
+    }
+
+    def __init__(
+        self, strategy="stratified", random_state=None, constant=None, **kwargs
+    ):
+        super().__init__(
+            strategy=strategy, random_state=random_state, constant=constant
+        )
+
+    def get_params(self, deep=False):
+        params = super().get_params(deep=deep)
+        for char in range(ord("a"), ord("z") + 1):
+            params[chr(char)] = "whatever"
+        return params
+
+
+class SometimesFailClassifier(DummyClassifier):
+    def __init__(
+        self,
+        strategy="stratified",
+        random_state=None,
+        constant=None,
+        n_estimators=10,
+        fail_fit=False,
+        fail_predict=False,
+        a=0,
+    ):
+        self.fail_fit = fail_fit
+        self.fail_predict = fail_predict
+        self.n_estimators = n_estimators
+        self.a = a
+
+        super().__init__(
+            strategy=strategy, random_state=random_state, constant=constant
+        )
+
+    def fit(self, X, y):
+        if self.fail_fit:
+            raise Exception("fitting failed")
+        return super().fit(X, y)
+
+    def predict(self, X):
+        if self.fail_predict:
+            raise Exception("predict failed")
+        return super().predict(X)
+
+
+@pytest.mark.filterwarnings("ignore::sklearn.exceptions.FitFailedWarning")
+@pytest.mark.filterwarnings("ignore:Scoring failed:UserWarning")
+@pytest.mark.filterwarnings("ignore:One or more of the:UserWarning")
+@pytest.mark.parametrize("HalvingSearch", (HalvingGridSearchCV, HalvingRandomSearchCV))
+@pytest.mark.parametrize("fail_at", ("fit", "predict"))
+def test_nan_handling(HalvingSearch, fail_at):
+    """Check the selection of the best scores in presence of failure represented by
+    NaN values."""
+    n_samples = 1_000
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+
+    search = HalvingSearch(
+        SometimesFailClassifier(),
+        {f"fail_{fail_at}": [False, True], "a": range(3)},
+        resource="n_estimators",
+        max_resources=6,
+        min_resources=1,
+        factor=2,
+    )
+
+    search.fit(X, y)
+
+    # estimators that failed during fit/predict should always rank lower
+    # than ones where the fit/predict succeeded
+    assert not search.best_params_[f"fail_{fail_at}"]
+    scores = search.cv_results_["mean_test_score"]
+    ranks = search.cv_results_["rank_test_score"]
+
+    # some scores should be NaN
+    assert np.isnan(scores).any()
+
+    unique_nan_ranks = np.unique(ranks[np.isnan(scores)])
+    # all NaN scores should have the same rank
+    assert unique_nan_ranks.shape[0] == 1
+    # NaNs should have the lowest rank
+    assert (unique_nan_ranks[0] >= ranks).all()
+
+
+@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
+@pytest.mark.parametrize(
+    (
+        "aggressive_elimination,"
+        "max_resources,"
+        "expected_n_iterations,"
+        "expected_n_required_iterations,"
+        "expected_n_possible_iterations,"
+        "expected_n_remaining_candidates,"
+        "expected_n_candidates,"
+        "expected_n_resources,"
+    ),
+    [
+        # notice how it loops at the beginning
+        # also, the number of candidates evaluated at the last iteration is
+        # <= factor
+        (True, "limited", 4, 4, 3, 1, [60, 20, 7, 3], [20, 20, 60, 180]),
+        # no aggressive elimination: we end up with less iterations, and
+        # the number of candidates at the last iter is > factor, which isn't
+        # ideal
+        (False, "limited", 3, 4, 3, 3, [60, 20, 7], [20, 60, 180]),
+        #  # When the amount of resource isn't limited, aggressive_elimination
+        #  # has no effect. Here the default min_resources='exhaust' will take
+        #  # over.
+        (True, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
+        (False, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
+    ],
+)
+def test_aggressive_elimination(
+    Est,
+    aggressive_elimination,
+    max_resources,
+    expected_n_iterations,
+    expected_n_required_iterations,
+    expected_n_possible_iterations,
+    expected_n_remaining_candidates,
+    expected_n_candidates,
+    expected_n_resources,
+):
+    # Test the aggressive_elimination parameter.
+
+    n_samples = 1000
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
+    base_estimator = FastClassifier()
+
+    if max_resources == "limited":
+        max_resources = 180
+    else:
+        max_resources = n_samples
+
+    sh = Est(
+        base_estimator,
+        param_grid,
+        aggressive_elimination=aggressive_elimination,
+        max_resources=max_resources,
+        factor=3,
+    )
+    sh.set_params(verbose=True)  # just for test coverage
+
+    if Est is HalvingRandomSearchCV:
+        # same number of candidates as with the grid
+        sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
+
+    sh.fit(X, y)
+
+    assert sh.n_iterations_ == expected_n_iterations
+    assert sh.n_required_iterations_ == expected_n_required_iterations
+    assert sh.n_possible_iterations_ == expected_n_possible_iterations
+    assert sh.n_resources_ == expected_n_resources
+    assert sh.n_candidates_ == expected_n_candidates
+    assert sh.n_remaining_candidates_ == expected_n_remaining_candidates
+    assert ceil(sh.n_candidates_[-1] / sh.factor) == sh.n_remaining_candidates_
+
+
+@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
+@pytest.mark.parametrize(
+    (
+        "min_resources,"
+        "max_resources,"
+        "expected_n_iterations,"
+        "expected_n_possible_iterations,"
+        "expected_n_resources,"
+    ),
+    [
+        # with enough resources
+        ("smallest", "auto", 2, 4, [20, 60]),
+        # with enough resources but min_resources set manually
+        (50, "auto", 2, 3, [50, 150]),
+        # without enough resources, only one iteration can be done
+        ("smallest", 30, 1, 1, [20]),
+        # with exhaust: use as much resources as possible at the last iter
+        ("exhaust", "auto", 2, 2, [333, 999]),
+        ("exhaust", 1000, 2, 2, [333, 999]),
+        ("exhaust", 999, 2, 2, [333, 999]),
+        ("exhaust", 600, 2, 2, [200, 600]),
+        ("exhaust", 599, 2, 2, [199, 597]),
+        ("exhaust", 300, 2, 2, [100, 300]),
+        ("exhaust", 60, 2, 2, [20, 60]),
+        ("exhaust", 50, 1, 1, [20]),
+        ("exhaust", 20, 1, 1, [20]),
+    ],
+)
+def test_min_max_resources(
+    Est,
+    min_resources,
+    max_resources,
+    expected_n_iterations,
+    expected_n_possible_iterations,
+    expected_n_resources,
+):
+    # Test the min_resources and max_resources parameters, and how they affect
+    # the number of resources used at each iteration
+    n_samples = 1000
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    param_grid = {"a": [1, 2], "b": [1, 2, 3]}
+    base_estimator = FastClassifier()
+
+    sh = Est(
+        base_estimator,
+        param_grid,
+        factor=3,
+        min_resources=min_resources,
+        max_resources=max_resources,
+    )
+    if Est is HalvingRandomSearchCV:
+        sh.set_params(n_candidates=6)  # same number as with the grid
+
+    sh.fit(X, y)
+
+    expected_n_required_iterations = 2  # given 6 combinations and factor = 3
+    assert sh.n_iterations_ == expected_n_iterations
+    assert sh.n_required_iterations_ == expected_n_required_iterations
+    assert sh.n_possible_iterations_ == expected_n_possible_iterations
+    assert sh.n_resources_ == expected_n_resources
+    if min_resources == "exhaust":
+        assert sh.n_possible_iterations_ == sh.n_iterations_ == len(sh.n_resources_)
+
+
+@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
+@pytest.mark.parametrize(
+    "max_resources, n_iterations, n_possible_iterations",
+    [
+        ("auto", 5, 9),  # all resources are used
+        (1024, 5, 9),
+        (700, 5, 8),
+        (512, 5, 8),
+        (511, 5, 7),
+        (32, 4, 4),
+        (31, 3, 3),
+        (16, 3, 3),
+        (4, 1, 1),  # max_resources == min_resources, only one iteration is
+        # possible
+    ],
+)
+def test_n_iterations(Est, max_resources, n_iterations, n_possible_iterations):
+    # test the number of actual iterations that were run depending on
+    # max_resources
+
+    n_samples = 1024
+    X, y = make_classification(n_samples=n_samples, random_state=1)
+    param_grid = {"a": [1, 2], "b": list(range(10))}
+    base_estimator = FastClassifier()
+    factor = 2
+
+    sh = Est(
+        base_estimator,
+        param_grid,
+        cv=2,
+        factor=factor,
+        max_resources=max_resources,
+        min_resources=4,
+    )
+    if Est is HalvingRandomSearchCV:
+        sh.set_params(n_candidates=20)  # same as for HalvingGridSearchCV
+    sh.fit(X, y)
+    assert sh.n_required_iterations_ == 5
+    assert sh.n_iterations_ == n_iterations
+    assert sh.n_possible_iterations_ == n_possible_iterations
+
+
+@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
+def test_resource_parameter(Est):
+    # Test the resource parameter
+
+    n_samples = 1000
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    param_grid = {"a": [1, 2], "b": list(range(10))}
+    base_estimator = FastClassifier()
+    sh = Est(base_estimator, param_grid, cv=2, resource="c", max_resources=10, factor=3)
+    sh.fit(X, y)
+    assert set(sh.n_resources_) == set([1, 3, 9])
+    for r_i, params, param_c in zip(
+        sh.cv_results_["n_resources"],
+        sh.cv_results_["params"],
+        sh.cv_results_["param_c"],
+    ):
+        assert r_i == params["c"] == param_c
+
+    with pytest.raises(
+        ValueError, match="Cannot use resource=1234 which is not supported "
+    ):
+        sh = HalvingGridSearchCV(
+            base_estimator, param_grid, cv=2, resource="1234", max_resources=10
+        )
+        sh.fit(X, y)
+
+    with pytest.raises(
+        ValueError,
+        match=(
+            "Cannot use parameter c as the resource since it is part "
+            "of the searched parameters."
+        ),
+    ):
+        param_grid = {"a": [1, 2], "b": [1, 2], "c": [1, 3]}
+        sh = HalvingGridSearchCV(
+            base_estimator, param_grid, cv=2, resource="c", max_resources=10
+        )
+        sh.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "max_resources, n_candidates, expected_n_candidates",
+    [
+        (512, "exhaust", 128),  # generate exactly as much as needed
+        (32, "exhaust", 8),
+        (32, 8, 8),
+        (32, 7, 7),  # ask for less than what we could
+        (32, 9, 9),  # ask for more than 'reasonable'
+    ],
+)
+def test_random_search(max_resources, n_candidates, expected_n_candidates):
+    # Test random search and make sure the number of generated candidates is
+    # as expected
+
+    n_samples = 1024
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    param_grid = {"a": norm, "b": norm}
+    base_estimator = FastClassifier()
+    sh = HalvingRandomSearchCV(
+        base_estimator,
+        param_grid,
+        n_candidates=n_candidates,
+        cv=2,
+        max_resources=max_resources,
+        factor=2,
+        min_resources=4,
+    )
+    sh.fit(X, y)
+    assert sh.n_candidates_[0] == expected_n_candidates
+    if n_candidates == "exhaust":
+        # Make sure 'exhaust' makes the last iteration use as much resources as
+        # we can
+        assert sh.n_resources_[-1] == max_resources
+
+
+@pytest.mark.parametrize(
+    "param_distributions, expected_n_candidates",
+    [
+        ({"a": [1, 2]}, 2),  # all lists, sample less than n_candidates
+        ({"a": randint(1, 3)}, 10),  # not all list, respect n_candidates
+    ],
+)
+def test_random_search_discrete_distributions(
+    param_distributions, expected_n_candidates
+):
+    # Make sure random search samples the appropriate number of candidates when
+    # we ask for more than what's possible. How many parameters are sampled
+    # depends whether the distributions are 'all lists' or not (see
+    # ParameterSampler for details). This is somewhat redundant with the checks
+    # in ParameterSampler but interaction bugs were discovered during
+    # development of SH
+
+    n_samples = 1024
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    base_estimator = FastClassifier()
+    sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10)
+    sh.fit(X, y)
+    assert sh.n_candidates_[0] == expected_n_candidates
+
+
+@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
+@pytest.mark.parametrize(
+    "params, expected_error_message",
+    [
+        (
+            {"resource": "not_a_parameter"},
+            "Cannot use resource=not_a_parameter which is not supported",
+        ),
+        (
+            {"resource": "a", "max_resources": 100},
+            "Cannot use parameter a as the resource since it is part of",
+        ),
+        (
+            {"max_resources": "auto", "resource": "b"},
+            "resource can only be 'n_samples' when max_resources='auto'",
+        ),
+        (
+            {"min_resources": 15, "max_resources": 14},
+            "min_resources_=15 is greater than max_resources_=14",
+        ),
+        ({"cv": KFold(shuffle=True)}, "must yield consistent folds"),
+        ({"cv": ShuffleSplit()}, "must yield consistent folds"),
+    ],
+)
+def test_input_errors(Est, params, expected_error_message):
+    base_estimator = FastClassifier()
+    param_grid = {"a": [1]}
+    X, y = make_classification(100)
+
+    sh = Est(base_estimator, param_grid, **params)
+
+    with pytest.raises(ValueError, match=expected_error_message):
+        sh.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "params, expected_error_message",
+    [
+        (
+            {"n_candidates": "exhaust", "min_resources": "exhaust"},
+            "cannot be both set to 'exhaust'",
+        ),
+    ],
+)
+def test_input_errors_randomized(params, expected_error_message):
+    # tests specific to HalvingRandomSearchCV
+
+    base_estimator = FastClassifier()
+    param_grid = {"a": [1]}
+    X, y = make_classification(100)
+
+    sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)
+
+    with pytest.raises(ValueError, match=expected_error_message):
+        sh.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "fraction, subsample_test, expected_train_size, expected_test_size",
+    [
+        (0.5, True, 40, 10),
+        (0.5, False, 40, 20),
+        (0.2, True, 16, 4),
+        (0.2, False, 16, 20),
+    ],
+)
+def test_subsample_splitter_shapes(
+    fraction, subsample_test, expected_train_size, expected_test_size
+):
+    # Make sure splits returned by SubsampleMetaSplitter are of appropriate
+    # size
+
+    n_samples = 100
+    X, y = make_classification(n_samples)
+    cv = _SubsampleMetaSplitter(
+        base_cv=KFold(5),
+        fraction=fraction,
+        subsample_test=subsample_test,
+        random_state=None,
+    )
+
+    for train, test in cv.split(X, y):
+        assert train.shape[0] == expected_train_size
+        assert test.shape[0] == expected_test_size
+        if subsample_test:
+            assert train.shape[0] + test.shape[0] == int(n_samples * fraction)
+        else:
+            assert test.shape[0] == n_samples // cv.base_cv.get_n_splits()
+
+
+@pytest.mark.parametrize("subsample_test", (True, False))
+def test_subsample_splitter_determinism(subsample_test):
+    # Make sure _SubsampleMetaSplitter is consistent across calls to split():
+    # - we're OK having training sets differ (they're always sampled with a
+    #   different fraction anyway)
+    # - when we don't subsample the test set, we want it to be always the same.
+    #   This check is the most important. This is ensured by the determinism
+    #   of the base_cv.
+
+    # Note: we could force both train and test splits to be always the same if
+    # we drew an int seed in _SubsampleMetaSplitter.__init__
+
+    n_samples = 100
+    X, y = make_classification(n_samples)
+    cv = _SubsampleMetaSplitter(
+        base_cv=KFold(5), fraction=0.5, subsample_test=subsample_test, random_state=None
+    )
+
+    folds_a = list(cv.split(X, y, groups=None))
+    folds_b = list(cv.split(X, y, groups=None))
+
+    for (train_a, test_a), (train_b, test_b) in zip(folds_a, folds_b):
+        assert not np.all(train_a == train_b)
+
+        if subsample_test:
+            assert not np.all(test_a == test_b)
+        else:
+            assert np.all(test_a == test_b)
+            assert np.all(X[test_a] == X[test_b])
+
+
+@pytest.mark.parametrize(
+    "k, itr, expected",
+    [
+        (1, 0, ["c"]),
+        (2, 0, ["a", "c"]),
+        (4, 0, ["d", "b", "a", "c"]),
+        (10, 0, ["d", "b", "a", "c"]),
+        (1, 1, ["e"]),
+        (2, 1, ["f", "e"]),
+        (10, 1, ["f", "e"]),
+        (1, 2, ["i"]),
+        (10, 2, ["g", "h", "i"]),
+    ],
+)
+def test_top_k(k, itr, expected):
+    results = {  # this isn't a 'real world' result dict
+        "iter": [0, 0, 0, 0, 1, 1, 2, 2, 2],
+        "mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9],
+        "params": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
+    }
+    got = _top_k(results, k=k, itr=itr)
+    assert np.all(got == expected)
+
+
+@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
+def test_cv_results(Est):
+    # test that the cv_results_ matches correctly the logic of the
+    # tournament: in particular that the candidates continued in each
+    # successive iteration are those that were best in the previous iteration
+    pd = pytest.importorskip("pandas")
+
+    rng = np.random.RandomState(0)
+
+    n_samples = 1000
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
+    base_estimator = FastClassifier()
+
+    # generate random scores: we want to avoid ties, which would otherwise
+    # mess with the ordering and make testing harder
+    def scorer(est, X, y):
+        return rng.rand()
+
+    sh = Est(base_estimator, param_grid, factor=2, scoring=scorer)
+    if Est is HalvingRandomSearchCV:
+        # same number of candidates as with the grid
+        sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
+
+    sh.fit(X, y)
+
+    # non-regression check for
+    # https://github.com/scikit-learn/scikit-learn/issues/19203
+    assert isinstance(sh.cv_results_["iter"], np.ndarray)
+    assert isinstance(sh.cv_results_["n_resources"], np.ndarray)
+
+    cv_results_df = pd.DataFrame(sh.cv_results_)
+
+    # just make sure we don't have ties
+    assert len(cv_results_df["mean_test_score"].unique()) == len(cv_results_df)
+
+    cv_results_df["params_str"] = cv_results_df["params"].apply(str)
+    table = cv_results_df.pivot(
+        index="params_str", columns="iter", values="mean_test_score"
+    )
+
+    # table looks like something like this:
+    # iter                    0      1       2        3   4   5
+    # params_str
+    # {'a': 'l2', 'b': 23} 0.75    NaN     NaN      NaN NaN NaN
+    # {'a': 'l1', 'b': 30} 0.90  0.875     NaN      NaN NaN NaN
+    # {'a': 'l1', 'b': 0}  0.75    NaN     NaN      NaN NaN NaN
+    # {'a': 'l2', 'b': 3}  0.85  0.925  0.9125  0.90625 NaN NaN
+    # {'a': 'l1', 'b': 5}  0.80    NaN     NaN      NaN NaN NaN
+    # ...
+
+    # where a NaN indicates that the candidate wasn't evaluated at a given
+    # iteration, because it wasn't part of the top-K at some previous
+    # iteration. We here make sure that candidates that aren't in the top-k at
+    # any given iteration are indeed not evaluated at the subsequent
+    # iterations.
+    nan_mask = pd.isna(table)
+    n_iter = sh.n_iterations_
+    for it in range(n_iter - 1):
+        already_discarded_mask = nan_mask[it]
+
+        # make sure that if a candidate is already discarded, we don't evaluate
+        # it later
+        assert (
+            already_discarded_mask & nan_mask[it + 1] == already_discarded_mask
+        ).all()
+
+        # make sure that the number of discarded candidate is correct
+        discarded_now_mask = ~already_discarded_mask & nan_mask[it + 1]
+        kept_mask = ~already_discarded_mask & ~discarded_now_mask
+        assert kept_mask.sum() == sh.n_candidates_[it + 1]
+
+        # make sure that all discarded candidates have a lower score than the
+        # kept candidates
+        discarded_max_score = table[it].where(discarded_now_mask).max()
+        kept_min_score = table[it].where(kept_mask).min()
+        assert discarded_max_score < kept_min_score
+
+    # We now make sure that the best candidate is chosen only from the last
+    # iteration.
+    # We also make sure this is true even if there were higher scores in
+    # earlier rounds (this isn't generally the case, but worth ensuring it's
+    # possible).
+
+    last_iter = cv_results_df["iter"].max()
+    idx_best_last_iter = cv_results_df[cv_results_df["iter"] == last_iter][
+        "mean_test_score"
+    ].idxmax()
+    idx_best_all_iters = cv_results_df["mean_test_score"].idxmax()
+
+    assert sh.best_params_ == cv_results_df.iloc[idx_best_last_iter]["params"]
+    assert (
+        cv_results_df.iloc[idx_best_last_iter]["mean_test_score"]
+        < cv_results_df.iloc[idx_best_all_iters]["mean_test_score"]
+    )
+    assert (
+        cv_results_df.iloc[idx_best_last_iter]["params"]
+        != cv_results_df.iloc[idx_best_all_iters]["params"]
+    )
+
+
+@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
+def test_base_estimator_inputs(Est):
+    # make sure that the base estimators are passed the correct parameters and
+    # number of samples at each iteration.
+    pd = pytest.importorskip("pandas")
+
+    passed_n_samples_fit = []
+    passed_n_samples_predict = []
+    passed_params = []
+
+    class FastClassifierBookKeeping(FastClassifier):
+        def fit(self, X, y):
+            passed_n_samples_fit.append(X.shape[0])
+            return super().fit(X, y)
+
+        def predict(self, X):
+            passed_n_samples_predict.append(X.shape[0])
+            return super().predict(X)
+
+        def set_params(self, **params):
+            passed_params.append(params)
+            return super().set_params(**params)
+
+    n_samples = 1024
+    n_splits = 2
+    X, y = make_classification(n_samples=n_samples, random_state=0)
+    param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
+    base_estimator = FastClassifierBookKeeping()
+
+    sh = Est(
+        base_estimator,
+        param_grid,
+        factor=2,
+        cv=n_splits,
+        return_train_score=False,
+        refit=False,
+    )
+    if Est is HalvingRandomSearchCV:
+        # same number of candidates as with the grid
+        sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
+
+    sh.fit(X, y)
+
+    assert len(passed_n_samples_fit) == len(passed_n_samples_predict)
+    passed_n_samples = [
+        x + y for (x, y) in zip(passed_n_samples_fit, passed_n_samples_predict)
+    ]
+
+    # Lists are of length n_splits * n_iter * n_candidates_at_i.
+    # Each chunk of size n_splits corresponds to the n_splits folds for the
+    # same candidate at the same iteration, so they contain equal values. We
+    # subsample such that the lists are of length n_iter * n_candidates_at_it
+    passed_n_samples = passed_n_samples[::n_splits]
+    passed_params = passed_params[::n_splits]
+
+    cv_results_df = pd.DataFrame(sh.cv_results_)
+
+    assert len(passed_params) == len(passed_n_samples) == len(cv_results_df)
+
+    uniques, counts = np.unique(passed_n_samples, return_counts=True)
+    assert (sh.n_resources_ == uniques).all()
+    assert (sh.n_candidates_ == counts).all()
+
+    assert (cv_results_df["params"] == passed_params).all()
+    assert (cv_results_df["n_resources"] == passed_n_samples).all()
+
+
+@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
+def test_groups_support(Est):
+    # Check if ValueError (when groups is None) propagates to
+    # HalvingGridSearchCV and HalvingRandomSearchCV
+    # And also check if groups is correctly passed to the cv object
+    rng = np.random.RandomState(0)
+
+    X, y = make_classification(n_samples=50, n_classes=2, random_state=0)
+    groups = rng.randint(0, 3, 50)
+
+    clf = LinearSVC(random_state=0)
+    grid = {"C": [1]}
+
+    group_cvs = [
+        LeaveOneGroupOut(),
+        LeavePGroupsOut(2),
+        GroupKFold(n_splits=3),
+        GroupShuffleSplit(random_state=0),
+    ]
+    error_msg = "The 'groups' parameter should not be None."
+    for cv in group_cvs:
+        gs = Est(clf, grid, cv=cv, random_state=0)
+        with pytest.raises(ValueError, match=error_msg):
+            gs.fit(X, y)
+        gs.fit(X, y, groups=groups)
+
+    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit(random_state=0)]
+    for cv in non_group_cvs:
+        gs = Est(clf, grid, cv=cv)
+        # Should not raise an error
+        gs.fit(X, y)
+
+
+@pytest.mark.parametrize("SearchCV", [HalvingRandomSearchCV, HalvingGridSearchCV])
+def test_min_resources_null(SearchCV):
+    """Check that we raise an error if the minimum resources is set to 0."""
+    base_estimator = FastClassifier()
+    param_grid = {"a": [1]}
+    X = np.empty(0).reshape(0, 3)
+
+    search = SearchCV(base_estimator, param_grid, min_resources="smallest")
+
+    err_msg = "min_resources_=0: you might have passed an empty dataset X."
+    with pytest.raises(ValueError, match=err_msg):
+        search.fit(X, [])
+
+
+@pytest.mark.parametrize("SearchCV", [HalvingGridSearchCV, HalvingRandomSearchCV])
+def test_select_best_index(SearchCV):
+    """Check the selection strategy of the halving search."""
+    results = {  # this isn't a 'real world' result dict
+        "iter": np.array([0, 0, 0, 0, 1, 1, 2, 2, 2]),
+        "mean_test_score": np.array([4, 3, 5, 1, 11, 10, 5, 6, 9]),
+        "params": np.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
+    }
+
+    # we expect the index of 'i'
+    best_index = SearchCV._select_best_index(None, None, results)
+    assert best_index == 8
+
+
+def test_halving_random_search_list_of_dicts():
+    """Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
+    being a list of dictionary.
+    """
+    X, y = make_classification(n_samples=150, n_features=4, random_state=42)
+
+    params = [
+        {"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
+        {"kernel": ["poly"], "degree": [2, 3]},
+    ]
+    param_keys = (
+        "param_C",
+        "param_degree",
+        "param_gamma",
+        "param_kernel",
+    )
+    score_keys = (
+        "mean_test_score",
+        "mean_train_score",
+        "rank_test_score",
+        "split0_test_score",
+        "split1_test_score",
+        "split2_test_score",
+        "split0_train_score",
+        "split1_train_score",
+        "split2_train_score",
+        "std_test_score",
+        "std_train_score",
+        "mean_fit_time",
+        "std_fit_time",
+        "mean_score_time",
+        "std_score_time",
+    )
+    extra_keys = ("n_resources", "iter")
+
+    search = HalvingRandomSearchCV(
+        SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
+    )
+    search.fit(X, y)
+    n_candidates = sum(search.n_candidates_)
+    cv_results = search.cv_results_
+    # Check results structure
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
+    expected_cv_results_kinds = {
+        "param_C": "f",
+        "param_degree": "i",
+        "param_gamma": "f",
+        "param_kernel": "O",
+    }
+    check_cv_results_array_types(
+        search, param_keys, score_keys, expected_cv_results_kinds
+    )
+
+    assert all(
+        (
+            cv_results["param_C"].mask[i]
+            and cv_results["param_gamma"].mask[i]
+            and not cv_results["param_degree"].mask[i]
+        )
+        for i in range(n_candidates)
+        if cv_results["param_kernel"][i] == "poly"
+    )
+    assert all(
+        (
+            not cv_results["param_C"].mask[i]
+            and not cv_results["param_gamma"].mask[i]
+            and cv_results["param_degree"].mask[i]
+        )
+        for i in range(n_candidates)
+        if cv_results["param_kernel"][i] == "rbf"
+    )
--- a/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_validation.py
+++ b/.venv/lib/python3.12/site-packages/sklearn/model_selection/tests/test_validation.py