library packages

This commit is contained in:
2024-09-28 22:56:00 -07:00
parent 64d9b78b3a
commit 1973934e95
4893 changed files with 1184173 additions and 31 deletions

View File

@@ -0,0 +1,96 @@
"""Tools for model selection, such as cross validation and hyper-parameter tuning."""
import typing
from ._classification_threshold import (
FixedThresholdClassifier,
TunedThresholdClassifierCV,
)
from ._plot import LearningCurveDisplay, ValidationCurveDisplay
from ._search import GridSearchCV, ParameterGrid, ParameterSampler, RandomizedSearchCV
from ._split import (
BaseCrossValidator,
BaseShuffleSplit,
GroupKFold,
GroupShuffleSplit,
KFold,
LeaveOneGroupOut,
LeaveOneOut,
LeavePGroupsOut,
LeavePOut,
PredefinedSplit,
RepeatedKFold,
RepeatedStratifiedKFold,
ShuffleSplit,
StratifiedGroupKFold,
StratifiedKFold,
StratifiedShuffleSplit,
TimeSeriesSplit,
check_cv,
train_test_split,
)
from ._validation import (
cross_val_predict,
cross_val_score,
cross_validate,
learning_curve,
permutation_test_score,
validation_curve,
)
if typing.TYPE_CHECKING:
# Avoid errors in type checkers (e.g. mypy) for experimental estimators.
# TODO: remove this check once the estimator is no longer experimental.
from ._search_successive_halving import ( # noqa
HalvingGridSearchCV,
HalvingRandomSearchCV,
)
__all__ = [
"BaseCrossValidator",
"BaseShuffleSplit",
"GridSearchCV",
"TimeSeriesSplit",
"KFold",
"GroupKFold",
"GroupShuffleSplit",
"LeaveOneGroupOut",
"LeaveOneOut",
"LeavePGroupsOut",
"LeavePOut",
"RepeatedKFold",
"RepeatedStratifiedKFold",
"ParameterGrid",
"ParameterSampler",
"PredefinedSplit",
"RandomizedSearchCV",
"ShuffleSplit",
"StratifiedKFold",
"StratifiedGroupKFold",
"StratifiedShuffleSplit",
"FixedThresholdClassifier",
"TunedThresholdClassifierCV",
"check_cv",
"cross_val_predict",
"cross_val_score",
"cross_validate",
"learning_curve",
"LearningCurveDisplay",
"permutation_test_score",
"train_test_split",
"validation_curve",
"ValidationCurveDisplay",
]
# TODO: remove this check once the estimator is no longer experimental.
def __getattr__(name):
if name in {"HalvingGridSearchCV", "HalvingRandomSearchCV"}:
raise ImportError(
f"{name} is experimental and the API might change without any "
"deprecation cycle. To use it, you need to explicitly import "
"enable_halving_search_cv:\n"
"from sklearn.experimental import enable_halving_search_cv"
)
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@@ -0,0 +1,874 @@
import numpy as np
from ..utils._optional_dependencies import check_matplotlib_support
from ..utils._plotting import _interval_max_min_ratio, _validate_score_name
from ._validation import learning_curve, validation_curve
class _BaseCurveDisplay:
def _plot_curve(
self,
x_data,
*,
ax=None,
negate_score=False,
score_name=None,
score_type="test",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
check_matplotlib_support(f"{self.__class__.__name__}.plot")
import matplotlib.pyplot as plt
if ax is None:
_, ax = plt.subplots()
if negate_score:
train_scores, test_scores = -self.train_scores, -self.test_scores
else:
train_scores, test_scores = self.train_scores, self.test_scores
if std_display_style not in ("errorbar", "fill_between", None):
raise ValueError(
f"Unknown std_display_style: {std_display_style}. Should be one of"
" 'errorbar', 'fill_between', or None."
)
if score_type not in ("test", "train", "both"):
raise ValueError(
f"Unknown score_type: {score_type}. Should be one of 'test', "
"'train', or 'both'."
)
if score_type == "train":
scores = {"Train": train_scores}
elif score_type == "test":
scores = {"Test": test_scores}
else: # score_type == "both"
scores = {"Train": train_scores, "Test": test_scores}
if std_display_style in ("fill_between", None):
# plot the mean score
if line_kw is None:
line_kw = {}
self.lines_ = []
for line_label, score in scores.items():
self.lines_.append(
*ax.plot(
x_data,
score.mean(axis=1),
label=line_label,
**line_kw,
)
)
self.errorbar_ = None
self.fill_between_ = None # overwritten below by fill_between
if std_display_style == "errorbar":
if errorbar_kw is None:
errorbar_kw = {}
self.errorbar_ = []
for line_label, score in scores.items():
self.errorbar_.append(
ax.errorbar(
x_data,
score.mean(axis=1),
score.std(axis=1),
label=line_label,
**errorbar_kw,
)
)
self.lines_, self.fill_between_ = None, None
elif std_display_style == "fill_between":
if fill_between_kw is None:
fill_between_kw = {}
default_fill_between_kw = {"alpha": 0.5}
fill_between_kw = {**default_fill_between_kw, **fill_between_kw}
self.fill_between_ = []
for line_label, score in scores.items():
self.fill_between_.append(
ax.fill_between(
x_data,
score.mean(axis=1) - score.std(axis=1),
score.mean(axis=1) + score.std(axis=1),
**fill_between_kw,
)
)
score_name = self.score_name if score_name is None else score_name
ax.legend()
# We found that a ratio, smaller or bigger than 5, between the largest and
# smallest gap of the x values is a good indicator to choose between linear
# and log scale.
if _interval_max_min_ratio(x_data) > 5:
xscale = "symlog" if x_data.min() <= 0 else "log"
else:
xscale = "linear"
ax.set_xscale(xscale)
ax.set_ylabel(f"{score_name}")
self.ax_ = ax
self.figure_ = ax.figure
class LearningCurveDisplay(_BaseCurveDisplay):
"""Learning Curve visualization.
It is recommended to use
:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to
create a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.
All parameters are stored as attributes.
Read more in the :ref:`User Guide <visualizations>` for general information
about the visualization API and
:ref:`detailed documentation <learning_curve>` regarding the learning
curve visualization.
.. versionadded:: 1.2
Parameters
----------
train_sizes : ndarray of shape (n_unique_ticks,)
Numbers of training examples that has been used to generate the
learning curve.
train_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on training sets.
test_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on test set.
score_name : str, default=None
The name of the score used in `learning_curve`. It will override the name
inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
`negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
string or a callable, we infer the name. We replace `_` by spaces and capitalize
the first letter. We remove `neg_` and replace it by `"Negative"` if
`negate_score` is `False` or just remove it otherwise.
Attributes
----------
ax_ : matplotlib Axes
Axes with the learning curve.
figure_ : matplotlib Figure
Figure containing the learning curve.
errorbar_ : list of matplotlib Artist or None
When the `std_display_style` is `"errorbar"`, this is a list of
`matplotlib.container.ErrorbarContainer` objects. If another style is
used, `errorbar_` is `None`.
lines_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.lines.Line2D` objects corresponding to the mean train and
test scores. If another style is used, `line_` is `None`.
fill_between_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.collections.PolyCollection` objects. If another style is
used, `fill_between_` is `None`.
See Also
--------
sklearn.model_selection.learning_curve : Compute the learning curve.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import load_iris
>>> from sklearn.model_selection import LearningCurveDisplay, learning_curve
>>> from sklearn.tree import DecisionTreeClassifier
>>> X, y = load_iris(return_X_y=True)
>>> tree = DecisionTreeClassifier(random_state=0)
>>> train_sizes, train_scores, test_scores = learning_curve(
... tree, X, y)
>>> display = LearningCurveDisplay(train_sizes=train_sizes,
... train_scores=train_scores, test_scores=test_scores, score_name="Score")
>>> display.plot()
<...>
>>> plt.show()
"""
def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None):
self.train_sizes = train_sizes
self.train_scores = train_scores
self.test_scores = test_scores
self.score_name = score_name
def plot(
self,
ax=None,
*,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Plot visualization.
Parameters
----------
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.learning_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If None, no standard deviation representation is
displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.LearningCurveDisplay`
Object that stores computed values.
"""
self._plot_curve(
self.train_sizes,
ax=ax,
negate_score=negate_score,
score_name=score_name,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)
self.ax_.set_xlabel("Number of samples in the training set")
return self
@classmethod
def from_estimator(
cls,
estimator,
X,
y,
*,
groups=None,
train_sizes=np.linspace(0.1, 1.0, 5),
cv=None,
scoring=None,
exploit_incremental_learning=False,
n_jobs=None,
pre_dispatch="all",
verbose=0,
shuffle=False,
random_state=None,
error_score=np.nan,
fit_params=None,
ax=None,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Create a learning curve display from an estimator.
Read more in the :ref:`User Guide <visualizations>` for general
information about the visualization API and :ref:`detailed
documentation <learning_curve>` regarding the learning curve
visualization.
Parameters
----------
estimator : object type that implements the "fit" and "predict" methods
An object of that type which is cloned for each validation.
X : array-like of shape (n_samples, n_features)
Training data, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
Target relative to X for classification or regression;
None for unsupervised learning.
groups : array-like of shape (n_samples,), default=None
Group labels for the samples used while splitting the dataset into
train/test set. Only used in conjunction with a "Group" :term:`cv`
instance (e.g., :class:`GroupKFold`).
train_sizes : array-like of shape (n_ticks,), \
default=np.linspace(0.1, 1.0, 5)
Relative or absolute numbers of training examples that will be used
to generate the learning curve. If the dtype is float, it is
regarded as a fraction of the maximum size of the training set
(that is determined by the selected validation method), i.e. it has
to be within (0, 1]. Otherwise it is interpreted as absolute sizes
of the training sets. Note that for classification the number of
samples usually have to be big enough to contain at least one
sample from each class.
cv : int, cross-validation generator or an iterable, default=None
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 5-fold cross validation,
- int, to specify the number of folds in a `(Stratified)KFold`,
- :term:`CV splitter`,
- An iterable yielding (train, test) splits as arrays of indices.
For int/None inputs, if the estimator is a classifier and `y` is
either binary or multiclass,
:class:`~sklearn.model_selection.StratifiedKFold` is used. In all
other cases, :class:`~sklearn.model_selection.KFold` is used. These
splitters are instantiated with `shuffle=False` so the splits will
be the same across calls.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
scoring : str or callable, default=None
A string (see :ref:`scoring_parameter`) or
a scorer callable object / function with signature
`scorer(estimator, X, y)` (see :ref:`scoring`).
exploit_incremental_learning : bool, default=False
If the estimator supports incremental learning, this will be
used to speed up fitting for different training set sizes.
n_jobs : int, default=None
Number of jobs to run in parallel. Training the estimator and
computing the score are parallelized over the different training
and test sets. `None` means 1 unless in a
:obj:`joblib.parallel_backend` context. `-1` means using all
processors. See :term:`Glossary <n_jobs>` for more details.
pre_dispatch : int or str, default='all'
Number of predispatched jobs for parallel execution (default is
all). The option can reduce the allocated memory. The str can
be an expression like '2*n_jobs'.
verbose : int, default=0
Controls the verbosity: the higher, the more messages.
shuffle : bool, default=False
Whether to shuffle training data before taking prefixes of it
based on`train_sizes`.
random_state : int, RandomState instance or None, default=None
Used when `shuffle` is True. Pass an int for reproducible
output across multiple function calls.
See :term:`Glossary <random_state>`.
error_score : 'raise' or numeric, default=np.nan
Value to assign to the score if an error occurs in estimator
fitting. If set to 'raise', the error is raised. If a numeric value
is given, FitFailedWarning is raised.
fit_params : dict, default=None
Parameters to pass to the fit method of the estimator.
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.learning_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If `None`, no representation of the standard deviation
is displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.LearningCurveDisplay`
Object that stores computed values.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import load_iris
>>> from sklearn.model_selection import LearningCurveDisplay
>>> from sklearn.tree import DecisionTreeClassifier
>>> X, y = load_iris(return_X_y=True)
>>> tree = DecisionTreeClassifier(random_state=0)
>>> LearningCurveDisplay.from_estimator(tree, X, y)
<...>
>>> plt.show()
"""
check_matplotlib_support(f"{cls.__name__}.from_estimator")
score_name = _validate_score_name(score_name, scoring, negate_score)
train_sizes, train_scores, test_scores = learning_curve(
estimator,
X,
y,
groups=groups,
train_sizes=train_sizes,
cv=cv,
scoring=scoring,
exploit_incremental_learning=exploit_incremental_learning,
n_jobs=n_jobs,
pre_dispatch=pre_dispatch,
verbose=verbose,
shuffle=shuffle,
random_state=random_state,
error_score=error_score,
return_times=False,
fit_params=fit_params,
)
viz = cls(
train_sizes=train_sizes,
train_scores=train_scores,
test_scores=test_scores,
score_name=score_name,
)
return viz.plot(
ax=ax,
negate_score=negate_score,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)
class ValidationCurveDisplay(_BaseCurveDisplay):
"""Validation Curve visualization.
It is recommended to use
:meth:`~sklearn.model_selection.ValidationCurveDisplay.from_estimator` to
create a :class:`~sklearn.model_selection.ValidationCurveDisplay` instance.
All parameters are stored as attributes.
Read more in the :ref:`User Guide <visualizations>` for general information
about the visualization API and :ref:`detailed documentation
<validation_curve>` regarding the validation curve visualization.
.. versionadded:: 1.3
Parameters
----------
param_name : str
Name of the parameter that has been varied.
param_range : array-like of shape (n_ticks,)
The values of the parameter that have been evaluated.
train_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on training sets.
test_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on test set.
score_name : str, default=None
The name of the score used in `validation_curve`. It will override the name
inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
`negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
string or a callable, we infer the name. We replace `_` by spaces and capitalize
the first letter. We remove `neg_` and replace it by `"Negative"` if
`negate_score` is `False` or just remove it otherwise.
Attributes
----------
ax_ : matplotlib Axes
Axes with the validation curve.
figure_ : matplotlib Figure
Figure containing the validation curve.
errorbar_ : list of matplotlib Artist or None
When the `std_display_style` is `"errorbar"`, this is a list of
`matplotlib.container.ErrorbarContainer` objects. If another style is
used, `errorbar_` is `None`.
lines_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.lines.Line2D` objects corresponding to the mean train and
test scores. If another style is used, `line_` is `None`.
fill_between_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.collections.PolyCollection` objects. If another style is
used, `fill_between_` is `None`.
See Also
--------
sklearn.model_selection.validation_curve : Compute the validation curve.
Examples
--------
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import ValidationCurveDisplay, validation_curve
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = make_classification(n_samples=1_000, random_state=0)
>>> logistic_regression = LogisticRegression()
>>> param_name, param_range = "C", np.logspace(-8, 3, 10)
>>> train_scores, test_scores = validation_curve(
... logistic_regression, X, y, param_name=param_name, param_range=param_range
... )
>>> display = ValidationCurveDisplay(
... param_name=param_name, param_range=param_range,
... train_scores=train_scores, test_scores=test_scores, score_name="Score"
... )
>>> display.plot()
<...>
>>> plt.show()
"""
def __init__(
self, *, param_name, param_range, train_scores, test_scores, score_name=None
):
self.param_name = param_name
self.param_range = param_range
self.train_scores = train_scores
self.test_scores = test_scores
self.score_name = score_name
def plot(
self,
ax=None,
*,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Plot visualization.
Parameters
----------
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.validation_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If None, no standard deviation representation is
displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
Object that stores computed values.
"""
self._plot_curve(
self.param_range,
ax=ax,
negate_score=negate_score,
score_name=score_name,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)
self.ax_.set_xlabel(f"{self.param_name}")
return self
@classmethod
def from_estimator(
cls,
estimator,
X,
y,
*,
param_name,
param_range,
groups=None,
cv=None,
scoring=None,
n_jobs=None,
pre_dispatch="all",
verbose=0,
error_score=np.nan,
fit_params=None,
ax=None,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Create a validation curve display from an estimator.
Read more in the :ref:`User Guide <visualizations>` for general
information about the visualization API and :ref:`detailed
documentation <validation_curve>` regarding the validation curve
visualization.
Parameters
----------
estimator : object type that implements the "fit" and "predict" methods
An object of that type which is cloned for each validation.
X : array-like of shape (n_samples, n_features)
Training data, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
Target relative to X for classification or regression;
None for unsupervised learning.
param_name : str
Name of the parameter that will be varied.
param_range : array-like of shape (n_values,)
The values of the parameter that will be evaluated.
groups : array-like of shape (n_samples,), default=None
Group labels for the samples used while splitting the dataset into
train/test set. Only used in conjunction with a "Group" :term:`cv`
instance (e.g., :class:`GroupKFold`).
cv : int, cross-validation generator or an iterable, default=None
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 5-fold cross validation,
- int, to specify the number of folds in a `(Stratified)KFold`,
- :term:`CV splitter`,
- An iterable yielding (train, test) splits as arrays of indices.
For int/None inputs, if the estimator is a classifier and `y` is
either binary or multiclass,
:class:`~sklearn.model_selection.StratifiedKFold` is used. In all
other cases, :class:`~sklearn.model_selection.KFold` is used. These
splitters are instantiated with `shuffle=False` so the splits will
be the same across calls.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
scoring : str or callable, default=None
A string (see :ref:`scoring_parameter`) or
a scorer callable object / function with signature
`scorer(estimator, X, y)` (see :ref:`scoring`).
n_jobs : int, default=None
Number of jobs to run in parallel. Training the estimator and
computing the score are parallelized over the different training
and test sets. `None` means 1 unless in a
:obj:`joblib.parallel_backend` context. `-1` means using all
processors. See :term:`Glossary <n_jobs>` for more details.
pre_dispatch : int or str, default='all'
Number of predispatched jobs for parallel execution (default is
all). The option can reduce the allocated memory. The str can
be an expression like '2*n_jobs'.
verbose : int, default=0
Controls the verbosity: the higher, the more messages.
error_score : 'raise' or numeric, default=np.nan
Value to assign to the score if an error occurs in estimator
fitting. If set to 'raise', the error is raised. If a numeric value
is given, FitFailedWarning is raised.
fit_params : dict, default=None
Parameters to pass to the fit method of the estimator.
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.validation_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If `None`, no representation of the standard deviation
is displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
Object that stores computed values.
Examples
--------
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import ValidationCurveDisplay
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = make_classification(n_samples=1_000, random_state=0)
>>> logistic_regression = LogisticRegression()
>>> param_name, param_range = "C", np.logspace(-8, 3, 10)
>>> ValidationCurveDisplay.from_estimator(
... logistic_regression, X, y, param_name=param_name,
... param_range=param_range,
... )
<...>
>>> plt.show()
"""
check_matplotlib_support(f"{cls.__name__}.from_estimator")
score_name = _validate_score_name(score_name, scoring, negate_score)
train_scores, test_scores = validation_curve(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
groups=groups,
cv=cv,
scoring=scoring,
n_jobs=n_jobs,
pre_dispatch=pre_dispatch,
verbose=verbose,
error_score=error_score,
fit_params=fit_params,
)
viz = cls(
param_name=param_name,
param_range=np.asarray(param_range),
train_scores=train_scores,
test_scores=test_scores,
score_name=score_name,
)
return viz.plot(
ax=ax,
negate_score=negate_score,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
"""
Common utilities for testing model selection.
"""
import numpy as np
from sklearn.model_selection import KFold
class OneTimeSplitter:
"""A wrapper to make KFold single entry cv iterator"""
def __init__(self, n_splits=4, n_samples=99):
self.n_splits = n_splits
self.n_samples = n_samples
self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
def split(self, X=None, y=None, groups=None):
"""Split can be called only once"""
for index in self.indices:
yield index
def get_n_splits(self, X=None, y=None, groups=None):
return self.n_splits

View File

@@ -0,0 +1,684 @@
import numpy as np
import pytest
from sklearn.base import clone
from sklearn.datasets import (
load_breast_cancer,
load_iris,
make_classification,
make_multilabel_classification,
)
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
balanced_accuracy_score,
f1_score,
fbeta_score,
make_scorer,
recall_score,
)
from sklearn.model_selection import (
FixedThresholdClassifier,
StratifiedShuffleSplit,
TunedThresholdClassifierCV,
)
from sklearn.model_selection._classification_threshold import (
_CurveScorer,
_fit_and_score_over_thresholds,
)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils._mocking import CheckingClassifier
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_array_equal,
)
def test_curve_scorer():
"""Check the behaviour of the `_CurveScorer` class."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression().fit(X, y)
curve_scorer = _CurveScorer(
balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores, thresholds = curve_scorer(estimator, X, y)
assert thresholds.shape == scores.shape
# check that the thresholds are probabilities with extreme values close to 0 and 1.
# they are not exactly 0 and 1 because they are the extremum of the
# `estimator.predict_proba(X)` values.
assert 0 <= thresholds.min() <= 0.01
assert 0.99 <= thresholds.max() <= 1
# balanced accuracy should be between 0.5 and 1 when it is not adjusted
assert 0.5 <= scores.min() <= 1
# check that passing kwargs to the scorer works
curve_scorer = _CurveScorer(
balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={"adjusted": True},
)
scores, thresholds = curve_scorer(estimator, X, y)
# balanced accuracy should be between 0.5 and 1 when it is not adjusted
assert 0 <= scores.min() <= 0.5
# check that we can inverse the sign of the score when dealing with `neg_*` scorer
curve_scorer = _CurveScorer(
balanced_accuracy_score,
sign=-1,
response_method="predict_proba",
thresholds=10,
kwargs={"adjusted": True},
)
scores, thresholds = curve_scorer(estimator, X, y)
assert all(scores <= 0)
def test_curve_scorer_pos_label(global_random_seed):
"""Check that we propagate properly the `pos_label` parameter to the scorer."""
n_samples = 30
X, y = make_classification(
n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed
)
estimator = LogisticRegression().fit(X, y)
curve_scorer = _CurveScorer(
recall_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={"pos_label": 1},
)
scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y)
curve_scorer = _CurveScorer(
recall_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={"pos_label": 0},
)
scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y)
# Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal.
assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all()
# The min-max range for the thresholds is defined by the probabilities of the
# `pos_label` class (the column of `predict_proba`).
y_pred = estimator.predict_proba(X)
assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0])
assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0])
assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1])
assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1])
# The recall cannot be negative and `pos_label=1` should have a higher recall
# since there is less samples to be considered.
assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min()
assert scores_pos_label_0.max() == pytest.approx(1.0)
assert scores_pos_label_1.max() == pytest.approx(1.0)
def test_fit_and_score_over_thresholds_curve_scorers():
"""Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order
for the different accepted curve scorers."""
X, y = make_classification(n_samples=100, random_state=0)
train_idx, val_idx = np.arange(50), np.arange(50, 100)
classifier = LogisticRegression()
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores, thresholds = _fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params={},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
assert np.all(thresholds[:-1] <= thresholds[1:])
assert isinstance(scores, np.ndarray)
assert np.logical_and(scores >= 0, scores <= 1).all()
def test_fit_and_score_over_thresholds_prefit():
"""Check the behaviour with a prefit classifier."""
X, y = make_classification(n_samples=100, random_state=0)
# `train_idx is None` to indicate that the classifier is prefit
train_idx, val_idx = None, np.arange(50, 100)
classifier = DecisionTreeClassifier(random_state=0).fit(X, y)
# make sure that the classifier memorized the full dataset such that
# we get perfect predictions and thus match the expected score
assert classifier.score(X[val_idx], y[val_idx]) == pytest.approx(1.0)
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=2,
kwargs={},
)
scores, thresholds = _fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params={},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
assert np.all(thresholds[:-1] <= thresholds[1:])
assert_allclose(scores, [0.5, 1.0])
@pytest.mark.usefixtures("enable_slep006")
def test_fit_and_score_over_thresholds_sample_weight():
"""Check that we dispatch the sample-weight to fit and score the classifier."""
X, y = load_iris(return_X_y=True)
X, y = X[:100], y[:100] # only 2 classes
# create a dataset and repeat twice the sample of class #0
X_repeated, y_repeated = np.vstack([X, X[y == 0]]), np.hstack([y, y[y == 0]])
# create a sample weight vector that is equivalent to the repeated dataset
sample_weight = np.ones_like(y)
sample_weight[:50] *= 2
classifier = LogisticRegression()
train_repeated_idx = np.arange(X_repeated.shape[0])
val_repeated_idx = np.arange(X_repeated.shape[0])
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores_repeated, thresholds_repeated = _fit_and_score_over_thresholds(
classifier,
X_repeated,
y_repeated,
fit_params={},
train_idx=train_repeated_idx,
val_idx=val_repeated_idx,
curve_scorer=curve_scorer,
score_params={},
)
train_idx, val_idx = np.arange(X.shape[0]), np.arange(X.shape[0])
scores, thresholds = _fit_and_score_over_thresholds(
classifier.set_fit_request(sample_weight=True),
X,
y,
fit_params={"sample_weight": sample_weight},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer.set_score_request(sample_weight=True),
score_params={"sample_weight": sample_weight},
)
assert_allclose(thresholds_repeated, thresholds)
assert_allclose(scores_repeated, scores)
@pytest.mark.usefixtures("enable_slep006")
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
def test_fit_and_score_over_thresholds_fit_params(fit_params_type):
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
X, y = make_classification(n_samples=100, random_state=0)
fit_params = {
"a": _convert_container(y, fit_params_type),
"b": _convert_container(y, fit_params_type),
}
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
classifier.set_fit_request(a=True, b=True)
train_idx, val_idx = np.arange(50), np.arange(50, 100)
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
_fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params=fit_params,
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
@pytest.mark.parametrize(
"data",
[
make_classification(n_classes=3, n_clusters_per_class=1, random_state=0),
make_multilabel_classification(random_state=0),
],
)
def test_tuned_threshold_classifier_no_binary(data):
"""Check that we raise an informative error message for non-binary problem."""
err_msg = "Only binary classification is supported."
with pytest.raises(ValueError, match=err_msg):
TunedThresholdClassifierCV(LogisticRegression()).fit(*data)
@pytest.mark.parametrize(
"params, err_type, err_msg",
[
(
{"cv": "prefit", "refit": True},
ValueError,
"When cv='prefit', refit cannot be True.",
),
(
{"cv": 10, "refit": False},
ValueError,
"When cv has several folds, refit cannot be False.",
),
(
{"cv": "prefit", "refit": False},
NotFittedError,
"`estimator` must be fitted.",
),
],
)
def test_tuned_threshold_classifier_conflict_cv_refit(params, err_type, err_msg):
"""Check that we raise an informative error message when `cv` and `refit`
cannot be used together.
"""
X, y = make_classification(n_samples=100, random_state=0)
with pytest.raises(err_type, match=err_msg):
TunedThresholdClassifierCV(LogisticRegression(), **params).fit(X, y)
@pytest.mark.parametrize(
"estimator",
[LogisticRegression(), SVC(), GradientBoostingClassifier(n_estimators=4)],
)
@pytest.mark.parametrize(
"response_method", ["predict_proba", "predict_log_proba", "decision_function"]
)
@pytest.mark.parametrize(
"ThresholdClassifier", [FixedThresholdClassifier, TunedThresholdClassifierCV]
)
def test_threshold_classifier_estimator_response_methods(
ThresholdClassifier, estimator, response_method
):
"""Check that `TunedThresholdClassifierCV` exposes the same response methods as the
underlying estimator.
"""
X, y = make_classification(n_samples=100, random_state=0)
model = ThresholdClassifier(estimator=estimator)
assert hasattr(model, response_method) == hasattr(estimator, response_method)
model.fit(X, y)
assert hasattr(model, response_method) == hasattr(estimator, response_method)
if hasattr(model, response_method):
y_pred_cutoff = getattr(model, response_method)(X)
y_pred_underlying_estimator = getattr(model.estimator_, response_method)(X)
assert_allclose(y_pred_cutoff, y_pred_underlying_estimator)
@pytest.mark.parametrize(
"response_method", ["auto", "decision_function", "predict_proba"]
)
def test_tuned_threshold_classifier_without_constraint_value(response_method):
"""Check that `TunedThresholdClassifierCV` is optimizing a given objective
metric."""
X, y = load_breast_cancer(return_X_y=True)
# remove feature to degrade performances
X = X[:, :5]
# make the problem completely imbalanced such that the balanced accuracy is low
indices_pos = np.flatnonzero(y == 1)
indices_pos = indices_pos[: indices_pos.size // 50]
indices_neg = np.flatnonzero(y == 0)
X = np.vstack([X[indices_neg], X[indices_pos]])
y = np.hstack([y[indices_neg], y[indices_pos]])
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
thresholds = 100
model = TunedThresholdClassifierCV(
estimator=lr,
scoring="balanced_accuracy",
response_method=response_method,
thresholds=thresholds,
store_cv_results=True,
)
score_optimized = balanced_accuracy_score(y, model.fit(X, y).predict(X))
score_baseline = balanced_accuracy_score(y, lr.predict(X))
assert score_optimized > score_baseline
assert model.cv_results_["thresholds"].shape == (thresholds,)
assert model.cv_results_["scores"].shape == (thresholds,)
def test_tuned_threshold_classifier_metric_with_parameter():
"""Check that we can pass a metric with a parameter in addition check that
`f_beta` with `beta=1` is equivalent to `f1` and different from `f_beta` with
`beta=2`.
"""
X, y = load_breast_cancer(return_X_y=True)
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
model_fbeta_1 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(fbeta_score, beta=1)
).fit(X, y)
model_fbeta_2 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(fbeta_score, beta=2)
).fit(X, y)
model_f1 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(f1_score)
).fit(X, y)
assert model_fbeta_1.best_threshold_ == pytest.approx(model_f1.best_threshold_)
assert model_fbeta_1.best_threshold_ != pytest.approx(model_fbeta_2.best_threshold_)
@pytest.mark.parametrize(
"response_method", ["auto", "decision_function", "predict_proba"]
)
@pytest.mark.parametrize(
"metric",
[
make_scorer(balanced_accuracy_score),
make_scorer(f1_score, pos_label="cancer"),
],
)
def test_tuned_threshold_classifier_with_string_targets(response_method, metric):
"""Check that targets represented by str are properly managed.
Also, check with several metrics to be sure that `pos_label` is properly
dispatched.
"""
X, y = load_breast_cancer(return_X_y=True)
# Encode numeric targets by meaningful strings. We purposely designed the class
# names such that the `pos_label` is the first alphabetically sorted class and thus
# encoded as 0.
classes = np.array(["cancer", "healthy"], dtype=object)
y = classes[y]
model = TunedThresholdClassifierCV(
estimator=make_pipeline(StandardScaler(), LogisticRegression()),
scoring=metric,
response_method=response_method,
thresholds=100,
).fit(X, y)
assert_array_equal(model.classes_, np.sort(classes))
y_pred = model.predict(X)
assert_array_equal(np.unique(y_pred), np.sort(classes))
@pytest.mark.usefixtures("enable_slep006")
@pytest.mark.parametrize("with_sample_weight", [True, False])
def test_tuned_threshold_classifier_refit(with_sample_weight, global_random_seed):
"""Check the behaviour of the `refit` parameter."""
rng = np.random.RandomState(global_random_seed)
X, y = make_classification(n_samples=100, random_state=0)
if with_sample_weight:
sample_weight = rng.randn(X.shape[0])
sample_weight = np.abs(sample_weight, out=sample_weight)
else:
sample_weight = None
# check that `estimator_` if fitted on the full dataset when `refit=True`
estimator = LogisticRegression().set_fit_request(sample_weight=True)
model = TunedThresholdClassifierCV(estimator, refit=True).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is not estimator
estimator.fit(X, y, sample_weight=sample_weight)
assert_allclose(model.estimator_.coef_, estimator.coef_)
assert_allclose(model.estimator_.intercept_, estimator.intercept_)
# check that `estimator_` was not altered when `refit=False` and `cv="prefit"`
estimator = LogisticRegression().set_fit_request(sample_weight=True)
estimator.fit(X, y, sample_weight=sample_weight)
coef = estimator.coef_.copy()
model = TunedThresholdClassifierCV(estimator, cv="prefit", refit=False).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is estimator
assert_allclose(model.estimator_.coef_, coef)
# check that we train `estimator_` on the training split of a given cross-validation
estimator = LogisticRegression().set_fit_request(sample_weight=True)
cv = [
(np.arange(50), np.arange(50, 100)),
] # single split
model = TunedThresholdClassifierCV(estimator, cv=cv, refit=False).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is not estimator
if with_sample_weight:
sw_train = sample_weight[cv[0][0]]
else:
sw_train = None
estimator.fit(X[cv[0][0]], y[cv[0][0]], sample_weight=sw_train)
assert_allclose(model.estimator_.coef_, estimator.coef_)
@pytest.mark.usefixtures("enable_slep006")
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
def test_tuned_threshold_classifier_fit_params(fit_params_type):
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
X, y = make_classification(n_samples=100, random_state=0)
fit_params = {
"a": _convert_container(y, fit_params_type),
"b": _convert_container(y, fit_params_type),
}
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
classifier.set_fit_request(a=True, b=True)
model = TunedThresholdClassifierCV(classifier)
model.fit(X, y, **fit_params)
@pytest.mark.usefixtures("enable_slep006")
def test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence():
"""Check that passing removing some sample from the dataset `X` is
equivalent to passing a `sample_weight` with a factor 0."""
X, y = load_iris(return_X_y=True)
# Scale the data to avoid any convergence issue
X = StandardScaler().fit_transform(X)
# Only use 2 classes and select samples such that 2-fold cross-validation
# split will lead to an equivalence with a `sample_weight` of 0
X = np.vstack((X[:40], X[50:90]))
y = np.hstack((y[:40], y[50:90]))
sample_weight = np.zeros_like(y)
sample_weight[::2] = 1
estimator = LogisticRegression().set_fit_request(sample_weight=True)
model_without_weights = TunedThresholdClassifierCV(estimator, cv=2)
model_with_weights = clone(model_without_weights)
model_with_weights.fit(X, y, sample_weight=sample_weight)
model_without_weights.fit(X[::2], y[::2])
assert_allclose(
model_with_weights.estimator_.coef_, model_without_weights.estimator_.coef_
)
y_pred_with_weights = model_with_weights.predict_proba(X)
y_pred_without_weights = model_without_weights.predict_proba(X)
assert_allclose(y_pred_with_weights, y_pred_without_weights)
def test_tuned_threshold_classifier_thresholds_array():
"""Check that we can pass an array to `thresholds` and it is used as candidate
threshold internally."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression()
thresholds = np.linspace(0, 1, 11)
tuned_model = TunedThresholdClassifierCV(
estimator,
thresholds=thresholds,
response_method="predict_proba",
store_cv_results=True,
).fit(X, y)
assert_allclose(tuned_model.cv_results_["thresholds"], thresholds)
@pytest.mark.parametrize("store_cv_results", [True, False])
def test_tuned_threshold_classifier_store_cv_results(store_cv_results):
"""Check that if `cv_results_` exists depending on `store_cv_results`."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression()
tuned_model = TunedThresholdClassifierCV(
estimator, store_cv_results=store_cv_results
).fit(X, y)
if store_cv_results:
assert hasattr(tuned_model, "cv_results_")
else:
assert not hasattr(tuned_model, "cv_results_")
def test_tuned_threshold_classifier_cv_float():
"""Check the behaviour when `cv` is set to a float."""
X, y = make_classification(random_state=0)
# case where `refit=False` and cv is a float: the underlying estimator will be fit
# on the training set given by a ShuffleSplit. We check that we get the same model
# coefficients.
test_size = 0.3
estimator = LogisticRegression()
tuned_model = TunedThresholdClassifierCV(
estimator, cv=test_size, refit=False, random_state=0
).fit(X, y)
tuned_model.fit(X, y)
cv = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=0)
train_idx, val_idx = next(cv.split(X, y))
cloned_estimator = clone(estimator).fit(X[train_idx], y[train_idx])
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
# case where `refit=True`, then the underlying estimator is fitted on the full
# dataset.
tuned_model.set_params(refit=True).fit(X, y)
cloned_estimator = clone(estimator).fit(X, y)
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
def test_tuned_threshold_classifier_error_constant_predictor():
"""Check that we raise a ValueError if the underlying classifier returns constant
probabilities such that we cannot find any threshold.
"""
X, y = make_classification(random_state=0)
estimator = DummyClassifier(strategy="constant", constant=1)
tuned_model = TunedThresholdClassifierCV(estimator, response_method="predict_proba")
err_msg = "The provided estimator makes constant predictions"
with pytest.raises(ValueError, match=err_msg):
tuned_model.fit(X, y)
@pytest.mark.parametrize(
"response_method", ["auto", "predict_proba", "decision_function"]
)
def test_fixed_threshold_classifier_equivalence_default(response_method):
"""Check that `FixedThresholdClassifier` has the same behaviour as the vanilla
classifier.
"""
X, y = make_classification(random_state=0)
classifier = LogisticRegression().fit(X, y)
classifier_default_threshold = FixedThresholdClassifier(
estimator=clone(classifier), response_method=response_method
)
classifier_default_threshold.fit(X, y)
# emulate the response method that should take into account the `pos_label`
if response_method in ("auto", "predict_proba"):
y_score = classifier_default_threshold.predict_proba(X)[:, 1]
threshold = 0.5
else: # response_method == "decision_function"
y_score = classifier_default_threshold.decision_function(X)
threshold = 0.0
y_pred_lr = (y_score >= threshold).astype(int)
assert_allclose(classifier_default_threshold.predict(X), y_pred_lr)
@pytest.mark.parametrize(
"response_method, threshold", [("predict_proba", 0.7), ("decision_function", 2.0)]
)
@pytest.mark.parametrize("pos_label", [0, 1])
def test_fixed_threshold_classifier(response_method, threshold, pos_label):
"""Check that applying `predict` lead to the same prediction as applying the
threshold to the output of the response method.
"""
X, y = make_classification(n_samples=50, random_state=0)
logistic_regression = LogisticRegression().fit(X, y)
model = FixedThresholdClassifier(
estimator=clone(logistic_regression),
threshold=threshold,
response_method=response_method,
pos_label=pos_label,
).fit(X, y)
# check that the underlying estimator is the same
assert_allclose(model.estimator_.coef_, logistic_regression.coef_)
# emulate the response method that should take into account the `pos_label`
if response_method == "predict_proba":
y_score = model.predict_proba(X)[:, pos_label]
else: # response_method == "decision_function"
y_score = model.decision_function(X)
y_score = y_score if pos_label == 1 else -y_score
# create a mapping from boolean values to class labels
map_to_label = np.array([0, 1]) if pos_label == 1 else np.array([1, 0])
y_pred_lr = map_to_label[(y_score >= threshold).astype(int)]
assert_allclose(model.predict(X), y_pred_lr)
for method in ("predict_proba", "predict_log_proba", "decision_function"):
assert_allclose(
getattr(model, method)(X), getattr(logistic_regression, method)(X)
)
assert_allclose(
getattr(model.estimator_, method)(X),
getattr(logistic_regression, method)(X),
)
@pytest.mark.usefixtures("enable_slep006")
def test_fixed_threshold_classifier_metadata_routing():
"""Check that everything works with metadata routing."""
X, y = make_classification(random_state=0)
sample_weight = np.ones_like(y)
sample_weight[::2] = 2
classifier = LogisticRegression().set_fit_request(sample_weight=True)
classifier.fit(X, y, sample_weight=sample_weight)
classifier_default_threshold = FixedThresholdClassifier(estimator=clone(classifier))
classifier_default_threshold.fit(X, y, sample_weight=sample_weight)
assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)

View File

@@ -0,0 +1,572 @@
import numpy as np
import pytest
from sklearn.datasets import load_iris
from sklearn.model_selection import (
LearningCurveDisplay,
ValidationCurveDisplay,
learning_curve,
validation_curve,
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle
from sklearn.utils._testing import assert_allclose, assert_array_equal
@pytest.fixture
def data():
return shuffle(*load_iris(return_X_y=True), random_state=0)
@pytest.mark.parametrize(
"params, err_type, err_msg",
[
({"std_display_style": "invalid"}, ValueError, "Unknown std_display_style:"),
({"score_type": "invalid"}, ValueError, "Unknown score_type:"),
],
)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_parameters_validation(
pyplot, data, params, err_type, err_msg, CurveDisplay, specific_params
):
"""Check that we raise a proper error when passing invalid parameters."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
with pytest.raises(err_type, match=err_msg):
CurveDisplay.from_estimator(estimator, X, y, **specific_params, **params)
def test_learning_curve_display_default_usage(pyplot, data):
"""Check the default usage of the LearningCurveDisplay class."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
train_sizes = [0.3, 0.6, 0.9]
display = LearningCurveDisplay.from_estimator(
estimator, X, y, train_sizes=train_sizes
)
import matplotlib as mpl
assert display.errorbar_ is None
assert isinstance(display.lines_, list)
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert isinstance(display.fill_between_, list)
for fill in display.fill_between_:
assert isinstance(fill, mpl.collections.PolyCollection)
assert fill.get_alpha() == 0.5
assert display.score_name == "Score"
assert display.ax_.get_xlabel() == "Number of samples in the training set"
assert display.ax_.get_ylabel() == "Score"
_, legend_labels = display.ax_.get_legend_handles_labels()
assert legend_labels == ["Train", "Test"]
train_sizes_abs, train_scores, test_scores = learning_curve(
estimator, X, y, train_sizes=train_sizes
)
assert_array_equal(display.train_sizes, train_sizes_abs)
assert_allclose(display.train_scores, train_scores)
assert_allclose(display.test_scores, test_scores)
def test_validation_curve_display_default_usage(pyplot, data):
"""Check the default usage of the ValidationCurveDisplay class."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name, param_range = "max_depth", [1, 3, 5]
display = ValidationCurveDisplay.from_estimator(
estimator, X, y, param_name=param_name, param_range=param_range
)
import matplotlib as mpl
assert display.errorbar_ is None
assert isinstance(display.lines_, list)
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert isinstance(display.fill_between_, list)
for fill in display.fill_between_:
assert isinstance(fill, mpl.collections.PolyCollection)
assert fill.get_alpha() == 0.5
assert display.score_name == "Score"
assert display.ax_.get_xlabel() == f"{param_name}"
assert display.ax_.get_ylabel() == "Score"
_, legend_labels = display.ax_.get_legend_handles_labels()
assert legend_labels == ["Train", "Test"]
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range
)
assert_array_equal(display.param_range, param_range)
assert_allclose(display.train_scores, train_scores)
assert_allclose(display.test_scores, test_scores)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_negate_score(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the `negate_score` parameter calling `from_estimator` and
`plot`.
"""
X, y = data
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
negate_score = False
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
positive_scores = display.lines_[0].get_data()[1]
assert (positive_scores >= 0).all()
assert display.ax_.get_ylabel() == "Score"
negate_score = True
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
negative_scores = display.lines_[0].get_data()[1]
assert (negative_scores <= 0).all()
assert_allclose(negative_scores, -positive_scores)
assert display.ax_.get_ylabel() == "Negative score"
negate_score = False
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
assert display.ax_.get_ylabel() == "Score"
display.plot(negate_score=not negate_score)
assert display.ax_.get_ylabel() == "Score"
assert (display.lines_[0].get_data()[1] < 0).all()
@pytest.mark.parametrize(
"score_name, ylabel", [(None, "Score"), ("Accuracy", "Accuracy")]
)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_score_name(
pyplot, data, score_name, ylabel, CurveDisplay, specific_params
):
"""Check that we can overwrite the default score name shown on the y-axis."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, score_name=score_name
)
assert display.ax_.get_ylabel() == ylabel
X, y = data
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, score_name=score_name
)
assert display.score_name == ylabel
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
def test_learning_curve_display_score_type(pyplot, data, std_display_style):
"""Check the behaviour of setting the `score_type` parameter."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
train_sizes = [0.3, 0.6, 0.9]
train_sizes_abs, train_scores, test_scores = learning_curve(
estimator, X, y, train_sizes=train_sizes
)
score_type = "train"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, train_sizes_abs)
assert_allclose(y_data, train_scores.mean(axis=1))
score_type = "test"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Test"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, train_sizes_abs)
assert_allclose(y_data, test_scores.mean(axis=1))
score_type = "both"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train", "Test"]
if std_display_style is None:
assert len(display.lines_) == 2
assert display.errorbar_ is None
x_data_train, y_data_train = display.lines_[0].get_data()
x_data_test, y_data_test = display.lines_[1].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 2
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
assert_array_equal(x_data_train, train_sizes_abs)
assert_allclose(y_data_train, train_scores.mean(axis=1))
assert_array_equal(x_data_test, train_sizes_abs)
assert_allclose(y_data_test, test_scores.mean(axis=1))
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
def test_validation_curve_display_score_type(pyplot, data, std_display_style):
"""Check the behaviour of setting the `score_type` parameter."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name, param_range = "max_depth", [1, 3, 5]
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range
)
score_type = "train"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, param_range)
assert_allclose(y_data, train_scores.mean(axis=1))
score_type = "test"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Test"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, param_range)
assert_allclose(y_data, test_scores.mean(axis=1))
score_type = "both"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train", "Test"]
if std_display_style is None:
assert len(display.lines_) == 2
assert display.errorbar_ is None
x_data_train, y_data_train = display.lines_[0].get_data()
x_data_test, y_data_test = display.lines_[1].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 2
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
assert_array_equal(x_data_train, param_range)
assert_allclose(y_data_train, train_scores.mean(axis=1))
assert_array_equal(x_data_test, param_range)
assert_allclose(y_data_test, test_scores.mean(axis=1))
@pytest.mark.parametrize(
"CurveDisplay, specific_params, expected_xscale",
[
(
ValidationCurveDisplay,
{"param_name": "max_depth", "param_range": np.arange(1, 5)},
"linear",
),
(LearningCurveDisplay, {"train_sizes": np.linspace(0.1, 0.9, num=5)}, "linear"),
(
ValidationCurveDisplay,
{
"param_name": "max_depth",
"param_range": np.round(np.logspace(0, 2, num=5)).astype(np.int64),
},
"log",
),
(LearningCurveDisplay, {"train_sizes": np.logspace(-1, 0, num=5)}, "log"),
],
)
def test_curve_display_xscale_auto(
pyplot, data, CurveDisplay, specific_params, expected_xscale
):
"""Check the behaviour of the x-axis scaling depending on the data provided."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
display = CurveDisplay.from_estimator(estimator, X, y, **specific_params)
assert display.ax_.get_xscale() == expected_xscale
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_std_display_style(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the parameter `std_display_style`."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
import matplotlib as mpl
std_display_style = None
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert len(display.lines_) == 2
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert display.errorbar_ is None
assert display.fill_between_ is None
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
std_display_style = "fill_between"
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert len(display.lines_) == 2
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert display.errorbar_ is None
assert len(display.fill_between_) == 2
for fill_between in display.fill_between_:
assert isinstance(fill_between, mpl.collections.PolyCollection)
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
std_display_style = "errorbar"
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert display.lines_ is None
assert len(display.errorbar_) == 2
for errorbar in display.errorbar_:
assert isinstance(errorbar, mpl.container.ErrorbarContainer)
assert display.fill_between_ is None
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the different plotting keyword arguments: `line_kw`,
`fill_between_kw`, and `errorbar_kw`."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
std_display_style = "fill_between"
line_kw = {"color": "red"}
fill_between_kw = {"color": "red", "alpha": 1.0}
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
)
assert display.lines_[0].get_color() == "red"
assert_allclose(
display.fill_between_[0].get_facecolor(),
[[1.0, 0.0, 0.0, 1.0]], # trust me, it's red
)
std_display_style = "errorbar"
errorbar_kw = {"color": "red"}
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
errorbar_kw=errorbar_kw,
)
assert display.errorbar_[0].lines[0].get_color() == "red"
@pytest.mark.parametrize(
"param_range, xscale",
[([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
)
def test_validation_curve_xscale_from_param_range_provided_as_a_list(
pyplot, data, param_range, xscale
):
"""Check the induced xscale from the provided param_range values."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name = "max_depth"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
)
assert display.ax_.get_xscale() == xscale
@pytest.mark.parametrize(
"Display, params",
[
(LearningCurveDisplay, {}),
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
],
)
def test_subclassing_displays(pyplot, data, Display, params):
"""Check that named constructors return the correct type when subclassed.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/pull/27675
"""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
class SubclassOfDisplay(Display):
pass
display = SubclassOfDisplay.from_estimator(estimator, X, y, **params)
assert isinstance(display, SubclassOfDisplay)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,856 @@
from math import ceil
import numpy as np
import pytest
from scipy.stats import expon, norm, randint
from sklearn.datasets import make_classification
from sklearn.dummy import DummyClassifier
from sklearn.experimental import enable_halving_search_cv # noqa
from sklearn.model_selection import (
GroupKFold,
GroupShuffleSplit,
HalvingGridSearchCV,
HalvingRandomSearchCV,
KFold,
LeaveOneGroupOut,
LeavePGroupsOut,
ShuffleSplit,
StratifiedKFold,
StratifiedShuffleSplit,
)
from sklearn.model_selection._search_successive_halving import (
_SubsampleMetaSplitter,
_top_k,
)
from sklearn.model_selection.tests.test_search import (
check_cv_results_array_types,
check_cv_results_keys,
)
from sklearn.svm import SVC, LinearSVC
class FastClassifier(DummyClassifier):
"""Dummy classifier that accepts parameters a, b, ... z.
These parameter don't affect the predictions and are useful for fast
grid searching."""
# update the constraints such that we accept all parameters from a to z
_parameter_constraints: dict = {
**DummyClassifier._parameter_constraints,
**{
chr(key): "no_validation" # type: ignore
for key in range(ord("a"), ord("z") + 1)
},
}
def __init__(
self, strategy="stratified", random_state=None, constant=None, **kwargs
):
super().__init__(
strategy=strategy, random_state=random_state, constant=constant
)
def get_params(self, deep=False):
params = super().get_params(deep=deep)
for char in range(ord("a"), ord("z") + 1):
params[chr(char)] = "whatever"
return params
class SometimesFailClassifier(DummyClassifier):
def __init__(
self,
strategy="stratified",
random_state=None,
constant=None,
n_estimators=10,
fail_fit=False,
fail_predict=False,
a=0,
):
self.fail_fit = fail_fit
self.fail_predict = fail_predict
self.n_estimators = n_estimators
self.a = a
super().__init__(
strategy=strategy, random_state=random_state, constant=constant
)
def fit(self, X, y):
if self.fail_fit:
raise Exception("fitting failed")
return super().fit(X, y)
def predict(self, X):
if self.fail_predict:
raise Exception("predict failed")
return super().predict(X)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.FitFailedWarning")
@pytest.mark.filterwarnings("ignore:Scoring failed:UserWarning")
@pytest.mark.filterwarnings("ignore:One or more of the:UserWarning")
@pytest.mark.parametrize("HalvingSearch", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize("fail_at", ("fit", "predict"))
def test_nan_handling(HalvingSearch, fail_at):
"""Check the selection of the best scores in presence of failure represented by
NaN values."""
n_samples = 1_000
X, y = make_classification(n_samples=n_samples, random_state=0)
search = HalvingSearch(
SometimesFailClassifier(),
{f"fail_{fail_at}": [False, True], "a": range(3)},
resource="n_estimators",
max_resources=6,
min_resources=1,
factor=2,
)
search.fit(X, y)
# estimators that failed during fit/predict should always rank lower
# than ones where the fit/predict succeeded
assert not search.best_params_[f"fail_{fail_at}"]
scores = search.cv_results_["mean_test_score"]
ranks = search.cv_results_["rank_test_score"]
# some scores should be NaN
assert np.isnan(scores).any()
unique_nan_ranks = np.unique(ranks[np.isnan(scores)])
# all NaN scores should have the same rank
assert unique_nan_ranks.shape[0] == 1
# NaNs should have the lowest rank
assert (unique_nan_ranks[0] >= ranks).all()
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
(
"aggressive_elimination,"
"max_resources,"
"expected_n_iterations,"
"expected_n_required_iterations,"
"expected_n_possible_iterations,"
"expected_n_remaining_candidates,"
"expected_n_candidates,"
"expected_n_resources,"
),
[
# notice how it loops at the beginning
# also, the number of candidates evaluated at the last iteration is
# <= factor
(True, "limited", 4, 4, 3, 1, [60, 20, 7, 3], [20, 20, 60, 180]),
# no aggressive elimination: we end up with less iterations, and
# the number of candidates at the last iter is > factor, which isn't
# ideal
(False, "limited", 3, 4, 3, 3, [60, 20, 7], [20, 60, 180]),
# # When the amount of resource isn't limited, aggressive_elimination
# # has no effect. Here the default min_resources='exhaust' will take
# # over.
(True, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
(False, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
],
)
def test_aggressive_elimination(
Est,
aggressive_elimination,
max_resources,
expected_n_iterations,
expected_n_required_iterations,
expected_n_possible_iterations,
expected_n_remaining_candidates,
expected_n_candidates,
expected_n_resources,
):
# Test the aggressive_elimination parameter.
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifier()
if max_resources == "limited":
max_resources = 180
else:
max_resources = n_samples
sh = Est(
base_estimator,
param_grid,
aggressive_elimination=aggressive_elimination,
max_resources=max_resources,
factor=3,
)
sh.set_params(verbose=True) # just for test coverage
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
assert sh.n_iterations_ == expected_n_iterations
assert sh.n_required_iterations_ == expected_n_required_iterations
assert sh.n_possible_iterations_ == expected_n_possible_iterations
assert sh.n_resources_ == expected_n_resources
assert sh.n_candidates_ == expected_n_candidates
assert sh.n_remaining_candidates_ == expected_n_remaining_candidates
assert ceil(sh.n_candidates_[-1] / sh.factor) == sh.n_remaining_candidates_
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
(
"min_resources,"
"max_resources,"
"expected_n_iterations,"
"expected_n_possible_iterations,"
"expected_n_resources,"
),
[
# with enough resources
("smallest", "auto", 2, 4, [20, 60]),
# with enough resources but min_resources set manually
(50, "auto", 2, 3, [50, 150]),
# without enough resources, only one iteration can be done
("smallest", 30, 1, 1, [20]),
# with exhaust: use as much resources as possible at the last iter
("exhaust", "auto", 2, 2, [333, 999]),
("exhaust", 1000, 2, 2, [333, 999]),
("exhaust", 999, 2, 2, [333, 999]),
("exhaust", 600, 2, 2, [200, 600]),
("exhaust", 599, 2, 2, [199, 597]),
("exhaust", 300, 2, 2, [100, 300]),
("exhaust", 60, 2, 2, [20, 60]),
("exhaust", 50, 1, 1, [20]),
("exhaust", 20, 1, 1, [20]),
],
)
def test_min_max_resources(
Est,
min_resources,
max_resources,
expected_n_iterations,
expected_n_possible_iterations,
expected_n_resources,
):
# Test the min_resources and max_resources parameters, and how they affect
# the number of resources used at each iteration
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": [1, 2], "b": [1, 2, 3]}
base_estimator = FastClassifier()
sh = Est(
base_estimator,
param_grid,
factor=3,
min_resources=min_resources,
max_resources=max_resources,
)
if Est is HalvingRandomSearchCV:
sh.set_params(n_candidates=6) # same number as with the grid
sh.fit(X, y)
expected_n_required_iterations = 2 # given 6 combinations and factor = 3
assert sh.n_iterations_ == expected_n_iterations
assert sh.n_required_iterations_ == expected_n_required_iterations
assert sh.n_possible_iterations_ == expected_n_possible_iterations
assert sh.n_resources_ == expected_n_resources
if min_resources == "exhaust":
assert sh.n_possible_iterations_ == sh.n_iterations_ == len(sh.n_resources_)
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
@pytest.mark.parametrize(
"max_resources, n_iterations, n_possible_iterations",
[
("auto", 5, 9), # all resources are used
(1024, 5, 9),
(700, 5, 8),
(512, 5, 8),
(511, 5, 7),
(32, 4, 4),
(31, 3, 3),
(16, 3, 3),
(4, 1, 1), # max_resources == min_resources, only one iteration is
# possible
],
)
def test_n_iterations(Est, max_resources, n_iterations, n_possible_iterations):
# test the number of actual iterations that were run depending on
# max_resources
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=1)
param_grid = {"a": [1, 2], "b": list(range(10))}
base_estimator = FastClassifier()
factor = 2
sh = Est(
base_estimator,
param_grid,
cv=2,
factor=factor,
max_resources=max_resources,
min_resources=4,
)
if Est is HalvingRandomSearchCV:
sh.set_params(n_candidates=20) # same as for HalvingGridSearchCV
sh.fit(X, y)
assert sh.n_required_iterations_ == 5
assert sh.n_iterations_ == n_iterations
assert sh.n_possible_iterations_ == n_possible_iterations
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
def test_resource_parameter(Est):
# Test the resource parameter
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": [1, 2], "b": list(range(10))}
base_estimator = FastClassifier()
sh = Est(base_estimator, param_grid, cv=2, resource="c", max_resources=10, factor=3)
sh.fit(X, y)
assert set(sh.n_resources_) == set([1, 3, 9])
for r_i, params, param_c in zip(
sh.cv_results_["n_resources"],
sh.cv_results_["params"],
sh.cv_results_["param_c"],
):
assert r_i == params["c"] == param_c
with pytest.raises(
ValueError, match="Cannot use resource=1234 which is not supported "
):
sh = HalvingGridSearchCV(
base_estimator, param_grid, cv=2, resource="1234", max_resources=10
)
sh.fit(X, y)
with pytest.raises(
ValueError,
match=(
"Cannot use parameter c as the resource since it is part "
"of the searched parameters."
),
):
param_grid = {"a": [1, 2], "b": [1, 2], "c": [1, 3]}
sh = HalvingGridSearchCV(
base_estimator, param_grid, cv=2, resource="c", max_resources=10
)
sh.fit(X, y)
@pytest.mark.parametrize(
"max_resources, n_candidates, expected_n_candidates",
[
(512, "exhaust", 128), # generate exactly as much as needed
(32, "exhaust", 8),
(32, 8, 8),
(32, 7, 7), # ask for less than what we could
(32, 9, 9), # ask for more than 'reasonable'
],
)
def test_random_search(max_resources, n_candidates, expected_n_candidates):
# Test random search and make sure the number of generated candidates is
# as expected
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": norm, "b": norm}
base_estimator = FastClassifier()
sh = HalvingRandomSearchCV(
base_estimator,
param_grid,
n_candidates=n_candidates,
cv=2,
max_resources=max_resources,
factor=2,
min_resources=4,
)
sh.fit(X, y)
assert sh.n_candidates_[0] == expected_n_candidates
if n_candidates == "exhaust":
# Make sure 'exhaust' makes the last iteration use as much resources as
# we can
assert sh.n_resources_[-1] == max_resources
@pytest.mark.parametrize(
"param_distributions, expected_n_candidates",
[
({"a": [1, 2]}, 2), # all lists, sample less than n_candidates
({"a": randint(1, 3)}, 10), # not all list, respect n_candidates
],
)
def test_random_search_discrete_distributions(
param_distributions, expected_n_candidates
):
# Make sure random search samples the appropriate number of candidates when
# we ask for more than what's possible. How many parameters are sampled
# depends whether the distributions are 'all lists' or not (see
# ParameterSampler for details). This is somewhat redundant with the checks
# in ParameterSampler but interaction bugs were discovered during
# development of SH
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=0)
base_estimator = FastClassifier()
sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10)
sh.fit(X, y)
assert sh.n_candidates_[0] == expected_n_candidates
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
"params, expected_error_message",
[
(
{"resource": "not_a_parameter"},
"Cannot use resource=not_a_parameter which is not supported",
),
(
{"resource": "a", "max_resources": 100},
"Cannot use parameter a as the resource since it is part of",
),
(
{"max_resources": "auto", "resource": "b"},
"resource can only be 'n_samples' when max_resources='auto'",
),
(
{"min_resources": 15, "max_resources": 14},
"min_resources_=15 is greater than max_resources_=14",
),
({"cv": KFold(shuffle=True)}, "must yield consistent folds"),
({"cv": ShuffleSplit()}, "must yield consistent folds"),
],
)
def test_input_errors(Est, params, expected_error_message):
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X, y = make_classification(100)
sh = Est(base_estimator, param_grid, **params)
with pytest.raises(ValueError, match=expected_error_message):
sh.fit(X, y)
@pytest.mark.parametrize(
"params, expected_error_message",
[
(
{"n_candidates": "exhaust", "min_resources": "exhaust"},
"cannot be both set to 'exhaust'",
),
],
)
def test_input_errors_randomized(params, expected_error_message):
# tests specific to HalvingRandomSearchCV
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X, y = make_classification(100)
sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)
with pytest.raises(ValueError, match=expected_error_message):
sh.fit(X, y)
@pytest.mark.parametrize(
"fraction, subsample_test, expected_train_size, expected_test_size",
[
(0.5, True, 40, 10),
(0.5, False, 40, 20),
(0.2, True, 16, 4),
(0.2, False, 16, 20),
],
)
def test_subsample_splitter_shapes(
fraction, subsample_test, expected_train_size, expected_test_size
):
# Make sure splits returned by SubsampleMetaSplitter are of appropriate
# size
n_samples = 100
X, y = make_classification(n_samples)
cv = _SubsampleMetaSplitter(
base_cv=KFold(5),
fraction=fraction,
subsample_test=subsample_test,
random_state=None,
)
for train, test in cv.split(X, y):
assert train.shape[0] == expected_train_size
assert test.shape[0] == expected_test_size
if subsample_test:
assert train.shape[0] + test.shape[0] == int(n_samples * fraction)
else:
assert test.shape[0] == n_samples // cv.base_cv.get_n_splits()
@pytest.mark.parametrize("subsample_test", (True, False))
def test_subsample_splitter_determinism(subsample_test):
# Make sure _SubsampleMetaSplitter is consistent across calls to split():
# - we're OK having training sets differ (they're always sampled with a
# different fraction anyway)
# - when we don't subsample the test set, we want it to be always the same.
# This check is the most important. This is ensured by the determinism
# of the base_cv.
# Note: we could force both train and test splits to be always the same if
# we drew an int seed in _SubsampleMetaSplitter.__init__
n_samples = 100
X, y = make_classification(n_samples)
cv = _SubsampleMetaSplitter(
base_cv=KFold(5), fraction=0.5, subsample_test=subsample_test, random_state=None
)
folds_a = list(cv.split(X, y, groups=None))
folds_b = list(cv.split(X, y, groups=None))
for (train_a, test_a), (train_b, test_b) in zip(folds_a, folds_b):
assert not np.all(train_a == train_b)
if subsample_test:
assert not np.all(test_a == test_b)
else:
assert np.all(test_a == test_b)
assert np.all(X[test_a] == X[test_b])
@pytest.mark.parametrize(
"k, itr, expected",
[
(1, 0, ["c"]),
(2, 0, ["a", "c"]),
(4, 0, ["d", "b", "a", "c"]),
(10, 0, ["d", "b", "a", "c"]),
(1, 1, ["e"]),
(2, 1, ["f", "e"]),
(10, 1, ["f", "e"]),
(1, 2, ["i"]),
(10, 2, ["g", "h", "i"]),
],
)
def test_top_k(k, itr, expected):
results = { # this isn't a 'real world' result dict
"iter": [0, 0, 0, 0, 1, 1, 2, 2, 2],
"mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9],
"params": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
}
got = _top_k(results, k=k, itr=itr)
assert np.all(got == expected)
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
def test_cv_results(Est):
# test that the cv_results_ matches correctly the logic of the
# tournament: in particular that the candidates continued in each
# successive iteration are those that were best in the previous iteration
pd = pytest.importorskip("pandas")
rng = np.random.RandomState(0)
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifier()
# generate random scores: we want to avoid ties, which would otherwise
# mess with the ordering and make testing harder
def scorer(est, X, y):
return rng.rand()
sh = Est(base_estimator, param_grid, factor=2, scoring=scorer)
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
# non-regression check for
# https://github.com/scikit-learn/scikit-learn/issues/19203
assert isinstance(sh.cv_results_["iter"], np.ndarray)
assert isinstance(sh.cv_results_["n_resources"], np.ndarray)
cv_results_df = pd.DataFrame(sh.cv_results_)
# just make sure we don't have ties
assert len(cv_results_df["mean_test_score"].unique()) == len(cv_results_df)
cv_results_df["params_str"] = cv_results_df["params"].apply(str)
table = cv_results_df.pivot(
index="params_str", columns="iter", values="mean_test_score"
)
# table looks like something like this:
# iter 0 1 2 3 4 5
# params_str
# {'a': 'l2', 'b': 23} 0.75 NaN NaN NaN NaN NaN
# {'a': 'l1', 'b': 30} 0.90 0.875 NaN NaN NaN NaN
# {'a': 'l1', 'b': 0} 0.75 NaN NaN NaN NaN NaN
# {'a': 'l2', 'b': 3} 0.85 0.925 0.9125 0.90625 NaN NaN
# {'a': 'l1', 'b': 5} 0.80 NaN NaN NaN NaN NaN
# ...
# where a NaN indicates that the candidate wasn't evaluated at a given
# iteration, because it wasn't part of the top-K at some previous
# iteration. We here make sure that candidates that aren't in the top-k at
# any given iteration are indeed not evaluated at the subsequent
# iterations.
nan_mask = pd.isna(table)
n_iter = sh.n_iterations_
for it in range(n_iter - 1):
already_discarded_mask = nan_mask[it]
# make sure that if a candidate is already discarded, we don't evaluate
# it later
assert (
already_discarded_mask & nan_mask[it + 1] == already_discarded_mask
).all()
# make sure that the number of discarded candidate is correct
discarded_now_mask = ~already_discarded_mask & nan_mask[it + 1]
kept_mask = ~already_discarded_mask & ~discarded_now_mask
assert kept_mask.sum() == sh.n_candidates_[it + 1]
# make sure that all discarded candidates have a lower score than the
# kept candidates
discarded_max_score = table[it].where(discarded_now_mask).max()
kept_min_score = table[it].where(kept_mask).min()
assert discarded_max_score < kept_min_score
# We now make sure that the best candidate is chosen only from the last
# iteration.
# We also make sure this is true even if there were higher scores in
# earlier rounds (this isn't generally the case, but worth ensuring it's
# possible).
last_iter = cv_results_df["iter"].max()
idx_best_last_iter = cv_results_df[cv_results_df["iter"] == last_iter][
"mean_test_score"
].idxmax()
idx_best_all_iters = cv_results_df["mean_test_score"].idxmax()
assert sh.best_params_ == cv_results_df.iloc[idx_best_last_iter]["params"]
assert (
cv_results_df.iloc[idx_best_last_iter]["mean_test_score"]
< cv_results_df.iloc[idx_best_all_iters]["mean_test_score"]
)
assert (
cv_results_df.iloc[idx_best_last_iter]["params"]
!= cv_results_df.iloc[idx_best_all_iters]["params"]
)
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
def test_base_estimator_inputs(Est):
# make sure that the base estimators are passed the correct parameters and
# number of samples at each iteration.
pd = pytest.importorskip("pandas")
passed_n_samples_fit = []
passed_n_samples_predict = []
passed_params = []
class FastClassifierBookKeeping(FastClassifier):
def fit(self, X, y):
passed_n_samples_fit.append(X.shape[0])
return super().fit(X, y)
def predict(self, X):
passed_n_samples_predict.append(X.shape[0])
return super().predict(X)
def set_params(self, **params):
passed_params.append(params)
return super().set_params(**params)
n_samples = 1024
n_splits = 2
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifierBookKeeping()
sh = Est(
base_estimator,
param_grid,
factor=2,
cv=n_splits,
return_train_score=False,
refit=False,
)
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
assert len(passed_n_samples_fit) == len(passed_n_samples_predict)
passed_n_samples = [
x + y for (x, y) in zip(passed_n_samples_fit, passed_n_samples_predict)
]
# Lists are of length n_splits * n_iter * n_candidates_at_i.
# Each chunk of size n_splits corresponds to the n_splits folds for the
# same candidate at the same iteration, so they contain equal values. We
# subsample such that the lists are of length n_iter * n_candidates_at_it
passed_n_samples = passed_n_samples[::n_splits]
passed_params = passed_params[::n_splits]
cv_results_df = pd.DataFrame(sh.cv_results_)
assert len(passed_params) == len(passed_n_samples) == len(cv_results_df)
uniques, counts = np.unique(passed_n_samples, return_counts=True)
assert (sh.n_resources_ == uniques).all()
assert (sh.n_candidates_ == counts).all()
assert (cv_results_df["params"] == passed_params).all()
assert (cv_results_df["n_resources"] == passed_n_samples).all()
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
def test_groups_support(Est):
# Check if ValueError (when groups is None) propagates to
# HalvingGridSearchCV and HalvingRandomSearchCV
# And also check if groups is correctly passed to the cv object
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=50, n_classes=2, random_state=0)
groups = rng.randint(0, 3, 50)
clf = LinearSVC(random_state=0)
grid = {"C": [1]}
group_cvs = [
LeaveOneGroupOut(),
LeavePGroupsOut(2),
GroupKFold(n_splits=3),
GroupShuffleSplit(random_state=0),
]
error_msg = "The 'groups' parameter should not be None."
for cv in group_cvs:
gs = Est(clf, grid, cv=cv, random_state=0)
with pytest.raises(ValueError, match=error_msg):
gs.fit(X, y)
gs.fit(X, y, groups=groups)
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit(random_state=0)]
for cv in non_group_cvs:
gs = Est(clf, grid, cv=cv)
# Should not raise an error
gs.fit(X, y)
@pytest.mark.parametrize("SearchCV", [HalvingRandomSearchCV, HalvingGridSearchCV])
def test_min_resources_null(SearchCV):
"""Check that we raise an error if the minimum resources is set to 0."""
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X = np.empty(0).reshape(0, 3)
search = SearchCV(base_estimator, param_grid, min_resources="smallest")
err_msg = "min_resources_=0: you might have passed an empty dataset X."
with pytest.raises(ValueError, match=err_msg):
search.fit(X, [])
@pytest.mark.parametrize("SearchCV", [HalvingGridSearchCV, HalvingRandomSearchCV])
def test_select_best_index(SearchCV):
"""Check the selection strategy of the halving search."""
results = { # this isn't a 'real world' result dict
"iter": np.array([0, 0, 0, 0, 1, 1, 2, 2, 2]),
"mean_test_score": np.array([4, 3, 5, 1, 11, 10, 5, 6, 9]),
"params": np.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
}
# we expect the index of 'i'
best_index = SearchCV._select_best_index(None, None, results)
assert best_index == 8
def test_halving_random_search_list_of_dicts():
"""Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
being a list of dictionary.
"""
X, y = make_classification(n_samples=150, n_features=4, random_state=42)
params = [
{"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
{"kernel": ["poly"], "degree": [2, 3]},
]
param_keys = (
"param_C",
"param_degree",
"param_gamma",
"param_kernel",
)
score_keys = (
"mean_test_score",
"mean_train_score",
"rank_test_score",
"split0_test_score",
"split1_test_score",
"split2_test_score",
"split0_train_score",
"split1_train_score",
"split2_train_score",
"std_test_score",
"std_train_score",
"mean_fit_time",
"std_fit_time",
"mean_score_time",
"std_score_time",
)
extra_keys = ("n_resources", "iter")
search = HalvingRandomSearchCV(
SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
)
search.fit(X, y)
n_candidates = sum(search.n_candidates_)
cv_results = search.cv_results_
# Check results structure
check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
expected_cv_results_kinds = {
"param_C": "f",
"param_degree": "i",
"param_gamma": "f",
"param_kernel": "O",
}
check_cv_results_array_types(
search, param_keys, score_keys, expected_cv_results_kinds
)
assert all(
(
cv_results["param_C"].mask[i]
and cv_results["param_gamma"].mask[i]
and not cv_results["param_degree"].mask[i]
)
for i in range(n_candidates)
if cv_results["param_kernel"][i] == "poly"
)
assert all(
(
not cv_results["param_C"].mask[i]
and not cv_results["param_gamma"].mask[i]
and cv_results["param_degree"].mask[i]
)
for i in range(n_candidates)
if cv_results["param_kernel"][i] == "rbf"
)