library packages
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,929 @@
|
||||
"""
|
||||
Testing for the partial dependence module.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import sklearn
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_regressor
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.compose import make_column_transformer
|
||||
from sklearn.datasets import load_iris, make_classification, make_regression
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.ensemble import (
|
||||
GradientBoostingClassifier,
|
||||
GradientBoostingRegressor,
|
||||
HistGradientBoostingClassifier,
|
||||
HistGradientBoostingRegressor,
|
||||
RandomForestRegressor,
|
||||
)
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.inspection import partial_dependence
|
||||
from sklearn.inspection._partial_dependence import (
|
||||
_grid_from_X,
|
||||
_partial_dependence_brute,
|
||||
_partial_dependence_recursion,
|
||||
)
|
||||
from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso
|
||||
from sklearn.metrics import r2_score
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import (
|
||||
PolynomialFeatures,
|
||||
RobustScaler,
|
||||
StandardScaler,
|
||||
scale,
|
||||
)
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.tree.tests.test_tree import assert_is_subtree
|
||||
from sklearn.utils._testing import assert_allclose, assert_array_equal
|
||||
from sklearn.utils.fixes import _IS_32BIT
|
||||
from sklearn.utils.validation import check_random_state
|
||||
|
||||
# toy sample
|
||||
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
|
||||
y = [-1, -1, -1, 1, 1, 1]
|
||||
|
||||
|
||||
# (X, y), n_targets <-- as expected in the output of partial_dep()
|
||||
binary_classification_data = (make_classification(n_samples=50, random_state=0), 1)
|
||||
multiclass_classification_data = (
|
||||
make_classification(
|
||||
n_samples=50, n_classes=3, n_clusters_per_class=1, random_state=0
|
||||
),
|
||||
3,
|
||||
)
|
||||
regression_data = (make_regression(n_samples=50, random_state=0), 1)
|
||||
multioutput_regression_data = (
|
||||
make_regression(n_samples=50, n_targets=2, random_state=0),
|
||||
2,
|
||||
)
|
||||
|
||||
# iris
|
||||
iris = load_iris()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Estimator, method, data",
|
||||
[
|
||||
(GradientBoostingClassifier, "auto", binary_classification_data),
|
||||
(GradientBoostingClassifier, "auto", multiclass_classification_data),
|
||||
(GradientBoostingClassifier, "brute", binary_classification_data),
|
||||
(GradientBoostingClassifier, "brute", multiclass_classification_data),
|
||||
(GradientBoostingRegressor, "auto", regression_data),
|
||||
(GradientBoostingRegressor, "brute", regression_data),
|
||||
(DecisionTreeRegressor, "brute", regression_data),
|
||||
(LinearRegression, "brute", regression_data),
|
||||
(LinearRegression, "brute", multioutput_regression_data),
|
||||
(LogisticRegression, "brute", binary_classification_data),
|
||||
(LogisticRegression, "brute", multiclass_classification_data),
|
||||
(MultiTaskLasso, "brute", multioutput_regression_data),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("grid_resolution", (5, 10))
|
||||
@pytest.mark.parametrize("features", ([1], [1, 2]))
|
||||
@pytest.mark.parametrize("kind", ("average", "individual", "both"))
|
||||
def test_output_shape(Estimator, method, data, grid_resolution, features, kind):
|
||||
# Check that partial_dependence has consistent output shape for different
|
||||
# kinds of estimators:
|
||||
# - classifiers with binary and multiclass settings
|
||||
# - regressors
|
||||
# - multi-task regressors
|
||||
|
||||
est = Estimator()
|
||||
if hasattr(est, "n_estimators"):
|
||||
est.set_params(n_estimators=2) # speed-up computations
|
||||
|
||||
# n_target corresponds to the number of classes (1 for binary classif) or
|
||||
# the number of tasks / outputs in multi task settings. It's equal to 1 for
|
||||
# classical regression_data.
|
||||
(X, y), n_targets = data
|
||||
n_instances = X.shape[0]
|
||||
|
||||
est.fit(X, y)
|
||||
result = partial_dependence(
|
||||
est,
|
||||
X=X,
|
||||
features=features,
|
||||
method=method,
|
||||
kind=kind,
|
||||
grid_resolution=grid_resolution,
|
||||
)
|
||||
pdp, axes = result, result["grid_values"]
|
||||
|
||||
expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))])
|
||||
expected_ice_shape = (
|
||||
n_targets,
|
||||
n_instances,
|
||||
*[grid_resolution for _ in range(len(features))],
|
||||
)
|
||||
if kind == "average":
|
||||
assert pdp.average.shape == expected_pdp_shape
|
||||
elif kind == "individual":
|
||||
assert pdp.individual.shape == expected_ice_shape
|
||||
else: # 'both'
|
||||
assert pdp.average.shape == expected_pdp_shape
|
||||
assert pdp.individual.shape == expected_ice_shape
|
||||
|
||||
expected_axes_shape = (len(features), grid_resolution)
|
||||
assert axes is not None
|
||||
assert np.asarray(axes).shape == expected_axes_shape
|
||||
|
||||
|
||||
def test_grid_from_X():
|
||||
# tests for _grid_from_X: sanity check for output, and for shapes.
|
||||
|
||||
# Make sure that the grid is a cartesian product of the input (it will use
|
||||
# the unique values instead of the percentiles)
|
||||
percentiles = (0.05, 0.95)
|
||||
grid_resolution = 100
|
||||
is_categorical = [False, False]
|
||||
X = np.asarray([[1, 2], [3, 4]])
|
||||
grid, axes = _grid_from_X(X, percentiles, is_categorical, grid_resolution)
|
||||
assert_array_equal(grid, [[1, 2], [1, 4], [3, 2], [3, 4]])
|
||||
assert_array_equal(axes, X.T)
|
||||
|
||||
# test shapes of returned objects depending on the number of unique values
|
||||
# for a feature.
|
||||
rng = np.random.RandomState(0)
|
||||
grid_resolution = 15
|
||||
|
||||
# n_unique_values > grid_resolution
|
||||
X = rng.normal(size=(20, 2))
|
||||
grid, axes = _grid_from_X(
|
||||
X, percentiles, is_categorical, grid_resolution=grid_resolution
|
||||
)
|
||||
assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
|
||||
assert np.asarray(axes).shape == (2, grid_resolution)
|
||||
|
||||
# n_unique_values < grid_resolution, will use actual values
|
||||
n_unique_values = 12
|
||||
X[n_unique_values - 1 :, 0] = 12345
|
||||
rng.shuffle(X) # just to make sure the order is irrelevant
|
||||
grid, axes = _grid_from_X(
|
||||
X, percentiles, is_categorical, grid_resolution=grid_resolution
|
||||
)
|
||||
assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
|
||||
# axes is a list of arrays of different shapes
|
||||
assert axes[0].shape == (n_unique_values,)
|
||||
assert axes[1].shape == (grid_resolution,)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"grid_resolution",
|
||||
[
|
||||
2, # since n_categories > 2, we should not use quantiles resampling
|
||||
100,
|
||||
],
|
||||
)
|
||||
def test_grid_from_X_with_categorical(grid_resolution):
|
||||
"""Check that `_grid_from_X` always sample from categories and does not
|
||||
depend from the percentiles.
|
||||
"""
|
||||
pd = pytest.importorskip("pandas")
|
||||
percentiles = (0.05, 0.95)
|
||||
is_categorical = [True]
|
||||
X = pd.DataFrame({"cat_feature": ["A", "B", "C", "A", "B", "D", "E"]})
|
||||
grid, axes = _grid_from_X(
|
||||
X, percentiles, is_categorical, grid_resolution=grid_resolution
|
||||
)
|
||||
assert grid.shape == (5, X.shape[1])
|
||||
assert axes[0].shape == (5,)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("grid_resolution", [3, 100])
|
||||
def test_grid_from_X_heterogeneous_type(grid_resolution):
|
||||
"""Check that `_grid_from_X` always sample from categories and does not
|
||||
depend from the percentiles.
|
||||
"""
|
||||
pd = pytest.importorskip("pandas")
|
||||
percentiles = (0.05, 0.95)
|
||||
is_categorical = [True, False]
|
||||
X = pd.DataFrame(
|
||||
{
|
||||
"cat": ["A", "B", "C", "A", "B", "D", "E", "A", "B", "D"],
|
||||
"num": [1, 1, 1, 2, 5, 6, 6, 6, 6, 8],
|
||||
}
|
||||
)
|
||||
nunique = X.nunique()
|
||||
|
||||
grid, axes = _grid_from_X(
|
||||
X, percentiles, is_categorical, grid_resolution=grid_resolution
|
||||
)
|
||||
if grid_resolution == 3:
|
||||
assert grid.shape == (15, 2)
|
||||
assert axes[0].shape[0] == nunique["num"]
|
||||
assert axes[1].shape[0] == grid_resolution
|
||||
else:
|
||||
assert grid.shape == (25, 2)
|
||||
assert axes[0].shape[0] == nunique["cat"]
|
||||
assert axes[1].shape[0] == nunique["cat"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"grid_resolution, percentiles, err_msg",
|
||||
[
|
||||
(2, (0, 0.0001), "percentiles are too close"),
|
||||
(100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
|
||||
(100, 12345, "'percentiles' must be a sequence of 2 elements"),
|
||||
(100, (-1, 0.95), r"'percentiles' values must be in \[0, 1\]"),
|
||||
(100, (0.05, 2), r"'percentiles' values must be in \[0, 1\]"),
|
||||
(100, (0.9, 0.1), r"percentiles\[0\] must be strictly less than"),
|
||||
(1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1"),
|
||||
],
|
||||
)
|
||||
def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
|
||||
X = np.asarray([[1, 2], [3, 4]])
|
||||
is_categorical = [False]
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
_grid_from_X(X, percentiles, is_categorical, grid_resolution)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("target_feature", range(5))
|
||||
@pytest.mark.parametrize(
|
||||
"est, method",
|
||||
[
|
||||
(LinearRegression(), "brute"),
|
||||
(GradientBoostingRegressor(random_state=0), "brute"),
|
||||
(GradientBoostingRegressor(random_state=0), "recursion"),
|
||||
(HistGradientBoostingRegressor(random_state=0), "brute"),
|
||||
(HistGradientBoostingRegressor(random_state=0), "recursion"),
|
||||
],
|
||||
)
|
||||
def test_partial_dependence_helpers(est, method, target_feature):
|
||||
# Check that what is returned by _partial_dependence_brute or
|
||||
# _partial_dependence_recursion is equivalent to manually setting a target
|
||||
# feature to a given value, and computing the average prediction over all
|
||||
# samples.
|
||||
# This also checks that the brute and recursion methods give the same
|
||||
# output.
|
||||
# Note that even on the trainset, the brute and the recursion methods
|
||||
# aren't always strictly equivalent, in particular when the slow method
|
||||
# generates unrealistic samples that have low mass in the joint
|
||||
# distribution of the input features, and when some of the features are
|
||||
# dependent. Hence the high tolerance on the checks.
|
||||
|
||||
X, y = make_regression(random_state=0, n_features=5, n_informative=5)
|
||||
# The 'init' estimator for GBDT (here the average prediction) isn't taken
|
||||
# into account with the recursion method, for technical reasons. We set
|
||||
# the mean to 0 to that this 'bug' doesn't have any effect.
|
||||
y = y - y.mean()
|
||||
est.fit(X, y)
|
||||
|
||||
# target feature will be set to .5 and then to 123
|
||||
features = np.array([target_feature], dtype=np.intp)
|
||||
grid = np.array([[0.5], [123]])
|
||||
|
||||
if method == "brute":
|
||||
pdp, predictions = _partial_dependence_brute(
|
||||
est, grid, features, X, response_method="auto"
|
||||
)
|
||||
else:
|
||||
pdp = _partial_dependence_recursion(est, grid, features)
|
||||
|
||||
mean_predictions = []
|
||||
for val in (0.5, 123):
|
||||
X_ = X.copy()
|
||||
X_[:, target_feature] = val
|
||||
mean_predictions.append(est.predict(X_).mean())
|
||||
|
||||
pdp = pdp[0] # (shape is (1, 2) so make it (2,))
|
||||
|
||||
# allow for greater margin for error with recursion method
|
||||
rtol = 1e-1 if method == "recursion" else 1e-3
|
||||
assert np.allclose(pdp, mean_predictions, rtol=rtol)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("seed", range(1))
|
||||
def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
|
||||
# Make sure that the recursion method gives the same results on a
|
||||
# DecisionTreeRegressor and a GradientBoostingRegressor or a
|
||||
# RandomForestRegressor with 1 tree and equivalent parameters.
|
||||
|
||||
rng = np.random.RandomState(seed)
|
||||
|
||||
# Purely random dataset to avoid correlated features
|
||||
n_samples = 1000
|
||||
n_features = 5
|
||||
X = rng.randn(n_samples, n_features)
|
||||
y = rng.randn(n_samples) * 10
|
||||
|
||||
# The 'init' estimator for GBDT (here the average prediction) isn't taken
|
||||
# into account with the recursion method, for technical reasons. We set
|
||||
# the mean to 0 to that this 'bug' doesn't have any effect.
|
||||
y = y - y.mean()
|
||||
|
||||
# set max_depth not too high to avoid splits with same gain but different
|
||||
# features
|
||||
max_depth = 5
|
||||
|
||||
tree_seed = 0
|
||||
forest = RandomForestRegressor(
|
||||
n_estimators=1,
|
||||
max_features=None,
|
||||
bootstrap=False,
|
||||
max_depth=max_depth,
|
||||
random_state=tree_seed,
|
||||
)
|
||||
# The forest will use ensemble.base._set_random_states to set the
|
||||
# random_state of the tree sub-estimator. We simulate this here to have
|
||||
# equivalent estimators.
|
||||
equiv_random_state = check_random_state(tree_seed).randint(np.iinfo(np.int32).max)
|
||||
gbdt = GradientBoostingRegressor(
|
||||
n_estimators=1,
|
||||
learning_rate=1,
|
||||
criterion="squared_error",
|
||||
max_depth=max_depth,
|
||||
random_state=equiv_random_state,
|
||||
)
|
||||
tree = DecisionTreeRegressor(max_depth=max_depth, random_state=equiv_random_state)
|
||||
|
||||
forest.fit(X, y)
|
||||
gbdt.fit(X, y)
|
||||
tree.fit(X, y)
|
||||
|
||||
# sanity check: if the trees aren't the same, the PD values won't be equal
|
||||
try:
|
||||
assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
|
||||
assert_is_subtree(tree.tree_, forest[0].tree_)
|
||||
except AssertionError:
|
||||
# For some reason the trees aren't exactly equal on 32bits, so the PDs
|
||||
# cannot be equal either. See
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/8853
|
||||
assert _IS_32BIT, "this should only fail on 32 bit platforms"
|
||||
return
|
||||
|
||||
grid = rng.randn(50).reshape(-1, 1)
|
||||
for f in range(n_features):
|
||||
features = np.array([f], dtype=np.intp)
|
||||
|
||||
pdp_forest = _partial_dependence_recursion(forest, grid, features)
|
||||
pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
|
||||
pdp_tree = _partial_dependence_recursion(tree, grid, features)
|
||||
|
||||
np.testing.assert_allclose(pdp_gbdt, pdp_tree)
|
||||
np.testing.assert_allclose(pdp_forest, pdp_tree)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"est",
|
||||
(
|
||||
GradientBoostingClassifier(random_state=0),
|
||||
HistGradientBoostingClassifier(random_state=0),
|
||||
),
|
||||
)
|
||||
@pytest.mark.parametrize("target_feature", (0, 1, 2, 3, 4, 5))
|
||||
def test_recursion_decision_function(est, target_feature):
|
||||
# Make sure the recursion method (implicitly uses decision_function) has
|
||||
# the same result as using brute method with
|
||||
# response_method=decision_function
|
||||
|
||||
X, y = make_classification(n_classes=2, n_clusters_per_class=1, random_state=1)
|
||||
assert np.mean(y) == 0.5 # make sure the init estimator predicts 0 anyway
|
||||
|
||||
est.fit(X, y)
|
||||
|
||||
preds_1 = partial_dependence(
|
||||
est,
|
||||
X,
|
||||
[target_feature],
|
||||
response_method="decision_function",
|
||||
method="recursion",
|
||||
kind="average",
|
||||
)
|
||||
preds_2 = partial_dependence(
|
||||
est,
|
||||
X,
|
||||
[target_feature],
|
||||
response_method="decision_function",
|
||||
method="brute",
|
||||
kind="average",
|
||||
)
|
||||
|
||||
assert_allclose(preds_1["average"], preds_2["average"], atol=1e-7)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"est",
|
||||
(
|
||||
LinearRegression(),
|
||||
GradientBoostingRegressor(random_state=0),
|
||||
HistGradientBoostingRegressor(
|
||||
random_state=0, min_samples_leaf=1, max_leaf_nodes=None, max_iter=1
|
||||
),
|
||||
DecisionTreeRegressor(random_state=0),
|
||||
),
|
||||
)
|
||||
@pytest.mark.parametrize("power", (1, 2))
|
||||
def test_partial_dependence_easy_target(est, power):
|
||||
# If the target y only depends on one feature in an obvious way (linear or
|
||||
# quadratic) then the partial dependence for that feature should reflect
|
||||
# it.
|
||||
# We here fit a linear regression_data model (with polynomial features if
|
||||
# needed) and compute r_squared to check that the partial dependence
|
||||
# correctly reflects the target.
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples = 200
|
||||
target_variable = 2
|
||||
X = rng.normal(size=(n_samples, 5))
|
||||
y = X[:, target_variable] ** power
|
||||
|
||||
est.fit(X, y)
|
||||
|
||||
pdp = partial_dependence(
|
||||
est, features=[target_variable], X=X, grid_resolution=1000, kind="average"
|
||||
)
|
||||
|
||||
new_X = pdp["grid_values"][0].reshape(-1, 1)
|
||||
new_y = pdp["average"][0]
|
||||
# add polynomial features if needed
|
||||
new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
|
||||
|
||||
lr = LinearRegression().fit(new_X, new_y)
|
||||
r2 = r2_score(new_y, lr.predict(new_X))
|
||||
|
||||
assert r2 > 0.99
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Estimator",
|
||||
(
|
||||
sklearn.tree.DecisionTreeClassifier,
|
||||
sklearn.tree.ExtraTreeClassifier,
|
||||
sklearn.ensemble.ExtraTreesClassifier,
|
||||
sklearn.neighbors.KNeighborsClassifier,
|
||||
sklearn.neighbors.RadiusNeighborsClassifier,
|
||||
sklearn.ensemble.RandomForestClassifier,
|
||||
),
|
||||
)
|
||||
def test_multiclass_multioutput(Estimator):
|
||||
# Make sure error is raised for multiclass-multioutput classifiers
|
||||
|
||||
# make multiclass-multioutput dataset
|
||||
X, y = make_classification(n_classes=3, n_clusters_per_class=1, random_state=0)
|
||||
y = np.array([y, y]).T
|
||||
|
||||
est = Estimator()
|
||||
est.fit(X, y)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Multiclass-multioutput estimators are not supported"
|
||||
):
|
||||
partial_dependence(est, X, [0])
|
||||
|
||||
|
||||
class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
# simulate that we have some classes
|
||||
self.classes_ = [0, 1]
|
||||
return self
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:A Bunch will be returned")
|
||||
@pytest.mark.parametrize(
|
||||
"estimator, params, err_msg",
|
||||
[
|
||||
(
|
||||
KMeans(random_state=0, n_init="auto"),
|
||||
{"features": [0]},
|
||||
"'estimator' must be a fitted regressor or classifier",
|
||||
),
|
||||
(
|
||||
LinearRegression(),
|
||||
{"features": [0], "response_method": "predict_proba"},
|
||||
"The response_method parameter is ignored for regressors",
|
||||
),
|
||||
(
|
||||
GradientBoostingClassifier(random_state=0),
|
||||
{
|
||||
"features": [0],
|
||||
"response_method": "predict_proba",
|
||||
"method": "recursion",
|
||||
},
|
||||
"'recursion' method, the response_method must be 'decision_function'",
|
||||
),
|
||||
(
|
||||
GradientBoostingClassifier(random_state=0),
|
||||
{"features": [0], "response_method": "predict_proba", "method": "auto"},
|
||||
"'recursion' method, the response_method must be 'decision_function'",
|
||||
),
|
||||
(
|
||||
LinearRegression(),
|
||||
{"features": [0], "method": "recursion", "kind": "individual"},
|
||||
"The 'recursion' method only applies when 'kind' is set to 'average'",
|
||||
),
|
||||
(
|
||||
LinearRegression(),
|
||||
{"features": [0], "method": "recursion", "kind": "both"},
|
||||
"The 'recursion' method only applies when 'kind' is set to 'average'",
|
||||
),
|
||||
(
|
||||
LinearRegression(),
|
||||
{"features": [0], "method": "recursion"},
|
||||
"Only the following estimators support the 'recursion' method:",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_partial_dependence_error(estimator, params, err_msg):
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator.fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
partial_dependence(estimator, X, **params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
|
||||
)
|
||||
@pytest.mark.parametrize("features", [-1, 10000])
|
||||
def test_partial_dependence_unknown_feature_indices(estimator, features):
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator.fit(X, y)
|
||||
|
||||
err_msg = "all features must be in"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
partial_dependence(estimator, X, [features])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
|
||||
)
|
||||
def test_partial_dependence_unknown_feature_string(estimator):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X, y = make_classification(random_state=0)
|
||||
df = pd.DataFrame(X)
|
||||
estimator.fit(df, y)
|
||||
|
||||
features = ["random"]
|
||||
err_msg = "A given column is not a column of the dataframe"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
partial_dependence(estimator, df, features)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator", [LinearRegression(), GradientBoostingClassifier(random_state=0)]
|
||||
)
|
||||
def test_partial_dependence_X_list(estimator):
|
||||
# check that array-like objects are accepted
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator.fit(X, y)
|
||||
partial_dependence(estimator, list(X), [0], kind="average")
|
||||
|
||||
|
||||
def test_warning_recursion_non_constant_init():
|
||||
# make sure that passing a non-constant init parameter to a GBDT and using
|
||||
# recursion method yields a warning.
|
||||
|
||||
gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
|
||||
gbc.fit(X, y)
|
||||
|
||||
with pytest.warns(
|
||||
UserWarning, match="Using recursion method with a non-constant init predictor"
|
||||
):
|
||||
partial_dependence(gbc, X, [0], method="recursion", kind="average")
|
||||
|
||||
with pytest.warns(
|
||||
UserWarning, match="Using recursion method with a non-constant init predictor"
|
||||
):
|
||||
partial_dependence(gbc, X, [0], method="recursion", kind="average")
|
||||
|
||||
|
||||
def test_partial_dependence_sample_weight_of_fitted_estimator():
|
||||
# Test near perfect correlation between partial dependence and diagonal
|
||||
# when sample weights emphasize y = x predictions
|
||||
# non-regression test for #13193
|
||||
# TODO: extend to HistGradientBoosting once sample_weight is supported
|
||||
N = 1000
|
||||
rng = np.random.RandomState(123456)
|
||||
mask = rng.randint(2, size=N, dtype=bool)
|
||||
|
||||
x = rng.rand(N)
|
||||
# set y = x on mask and y = -x outside
|
||||
y = x.copy()
|
||||
y[~mask] = -y[~mask]
|
||||
X = np.c_[mask, x]
|
||||
# sample weights to emphasize data points where y = x
|
||||
sample_weight = np.ones(N)
|
||||
sample_weight[mask] = 1000.0
|
||||
|
||||
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
|
||||
clf.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
pdp = partial_dependence(clf, X, features=[1], kind="average")
|
||||
|
||||
assert np.corrcoef(pdp["average"], pdp["grid_values"])[0, 1] > 0.99
|
||||
|
||||
|
||||
def test_hist_gbdt_sw_not_supported():
|
||||
# TODO: remove/fix when PDP supports HGBT with sample weights
|
||||
clf = HistGradientBoostingRegressor(random_state=1)
|
||||
clf.fit(X, y, sample_weight=np.ones(len(X)))
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="does not support partial dependence"
|
||||
):
|
||||
partial_dependence(clf, X, features=[1])
|
||||
|
||||
|
||||
def test_partial_dependence_pipeline():
|
||||
# check that the partial dependence support pipeline
|
||||
iris = load_iris()
|
||||
|
||||
scaler = StandardScaler()
|
||||
clf = DummyClassifier(random_state=42)
|
||||
pipe = make_pipeline(scaler, clf)
|
||||
|
||||
clf.fit(scaler.fit_transform(iris.data), iris.target)
|
||||
pipe.fit(iris.data, iris.target)
|
||||
|
||||
features = 0
|
||||
pdp_pipe = partial_dependence(
|
||||
pipe, iris.data, features=[features], grid_resolution=10, kind="average"
|
||||
)
|
||||
pdp_clf = partial_dependence(
|
||||
clf,
|
||||
scaler.transform(iris.data),
|
||||
features=[features],
|
||||
grid_resolution=10,
|
||||
kind="average",
|
||||
)
|
||||
assert_allclose(pdp_pipe["average"], pdp_clf["average"])
|
||||
assert_allclose(
|
||||
pdp_pipe["grid_values"][0],
|
||||
pdp_clf["grid_values"][0] * scaler.scale_[features] + scaler.mean_[features],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator",
|
||||
[
|
||||
LogisticRegression(max_iter=1000, random_state=0),
|
||||
GradientBoostingClassifier(random_state=0, n_estimators=5),
|
||||
],
|
||||
ids=["estimator-brute", "estimator-recursion"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"preprocessor",
|
||||
[
|
||||
None,
|
||||
make_column_transformer(
|
||||
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
|
||||
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
|
||||
),
|
||||
make_column_transformer(
|
||||
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
|
||||
remainder="passthrough",
|
||||
),
|
||||
],
|
||||
ids=["None", "column-transformer", "column-transformer-passthrough"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"features",
|
||||
[[0, 2], [iris.feature_names[i] for i in (0, 2)]],
|
||||
ids=["features-integer", "features-string"],
|
||||
)
|
||||
def test_partial_dependence_dataframe(estimator, preprocessor, features):
|
||||
# check that the partial dependence support dataframe and pipeline
|
||||
# including a column transformer
|
||||
pd = pytest.importorskip("pandas")
|
||||
df = pd.DataFrame(scale(iris.data), columns=iris.feature_names)
|
||||
|
||||
pipe = make_pipeline(preprocessor, estimator)
|
||||
pipe.fit(df, iris.target)
|
||||
pdp_pipe = partial_dependence(
|
||||
pipe, df, features=features, grid_resolution=10, kind="average"
|
||||
)
|
||||
|
||||
# the column transformer will reorder the column when transforming
|
||||
# we mixed the index to be sure that we are computing the partial
|
||||
# dependence of the right columns
|
||||
if preprocessor is not None:
|
||||
X_proc = clone(preprocessor).fit_transform(df)
|
||||
features_clf = [0, 1]
|
||||
else:
|
||||
X_proc = df
|
||||
features_clf = [0, 2]
|
||||
|
||||
clf = clone(estimator).fit(X_proc, iris.target)
|
||||
pdp_clf = partial_dependence(
|
||||
clf,
|
||||
X_proc,
|
||||
features=features_clf,
|
||||
method="brute",
|
||||
grid_resolution=10,
|
||||
kind="average",
|
||||
)
|
||||
|
||||
assert_allclose(pdp_pipe["average"], pdp_clf["average"])
|
||||
if preprocessor is not None:
|
||||
scaler = preprocessor.named_transformers_["standardscaler"]
|
||||
assert_allclose(
|
||||
pdp_pipe["grid_values"][1],
|
||||
pdp_clf["grid_values"][1] * scaler.scale_[1] + scaler.mean_[1],
|
||||
)
|
||||
else:
|
||||
assert_allclose(pdp_pipe["grid_values"][1], pdp_clf["grid_values"][1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"features, expected_pd_shape",
|
||||
[
|
||||
(0, (3, 10)),
|
||||
(iris.feature_names[0], (3, 10)),
|
||||
([0, 2], (3, 10, 10)),
|
||||
([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
|
||||
([True, False, True, False], (3, 10, 10)),
|
||||
],
|
||||
ids=["scalar-int", "scalar-str", "list-int", "list-str", "mask"],
|
||||
)
|
||||
def test_partial_dependence_feature_type(features, expected_pd_shape):
|
||||
# check all possible features type supported in PDP
|
||||
pd = pytest.importorskip("pandas")
|
||||
df = pd.DataFrame(iris.data, columns=iris.feature_names)
|
||||
|
||||
preprocessor = make_column_transformer(
|
||||
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
|
||||
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)]),
|
||||
)
|
||||
pipe = make_pipeline(
|
||||
preprocessor, LogisticRegression(max_iter=1000, random_state=0)
|
||||
)
|
||||
pipe.fit(df, iris.target)
|
||||
pdp_pipe = partial_dependence(
|
||||
pipe, df, features=features, grid_resolution=10, kind="average"
|
||||
)
|
||||
assert pdp_pipe["average"].shape == expected_pd_shape
|
||||
assert len(pdp_pipe["grid_values"]) == len(pdp_pipe["average"].shape) - 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator",
|
||||
[
|
||||
LinearRegression(),
|
||||
LogisticRegression(),
|
||||
GradientBoostingRegressor(),
|
||||
GradientBoostingClassifier(),
|
||||
],
|
||||
)
|
||||
def test_partial_dependence_unfitted(estimator):
|
||||
X = iris.data
|
||||
preprocessor = make_column_transformer(
|
||||
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
|
||||
)
|
||||
pipe = make_pipeline(preprocessor, estimator)
|
||||
with pytest.raises(NotFittedError, match="is not fitted yet"):
|
||||
partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
|
||||
with pytest.raises(NotFittedError, match="is not fitted yet"):
|
||||
partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Estimator, data",
|
||||
[
|
||||
(LinearRegression, multioutput_regression_data),
|
||||
(LogisticRegression, binary_classification_data),
|
||||
],
|
||||
)
|
||||
def test_kind_average_and_average_of_individual(Estimator, data):
|
||||
est = Estimator()
|
||||
(X, y), n_targets = data
|
||||
est.fit(X, y)
|
||||
|
||||
pdp_avg = partial_dependence(est, X=X, features=[1, 2], kind="average")
|
||||
pdp_ind = partial_dependence(est, X=X, features=[1, 2], kind="individual")
|
||||
avg_ind = np.mean(pdp_ind["individual"], axis=1)
|
||||
assert_allclose(avg_ind, pdp_avg["average"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Estimator, data",
|
||||
[
|
||||
(LinearRegression, multioutput_regression_data),
|
||||
(LogisticRegression, binary_classification_data),
|
||||
],
|
||||
)
|
||||
def test_partial_dependence_kind_individual_ignores_sample_weight(Estimator, data):
|
||||
"""Check that `sample_weight` does not have any effect on reported ICE."""
|
||||
est = Estimator()
|
||||
(X, y), n_targets = data
|
||||
sample_weight = np.arange(X.shape[0])
|
||||
est.fit(X, y)
|
||||
|
||||
pdp_nsw = partial_dependence(est, X=X, features=[1, 2], kind="individual")
|
||||
pdp_sw = partial_dependence(
|
||||
est, X=X, features=[1, 2], kind="individual", sample_weight=sample_weight
|
||||
)
|
||||
assert_allclose(pdp_nsw["individual"], pdp_sw["individual"])
|
||||
assert_allclose(pdp_nsw["grid_values"], pdp_sw["grid_values"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator",
|
||||
[
|
||||
LinearRegression(),
|
||||
LogisticRegression(),
|
||||
RandomForestRegressor(),
|
||||
GradientBoostingClassifier(),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("non_null_weight_idx", [0, 1, -1])
|
||||
def test_partial_dependence_non_null_weight_idx(estimator, non_null_weight_idx):
|
||||
"""Check that if we pass a `sample_weight` of zeros with only one index with
|
||||
sample weight equals one, then the average `partial_dependence` with this
|
||||
`sample_weight` is equal to the individual `partial_dependence` of the
|
||||
corresponding index.
|
||||
"""
|
||||
X, y = iris.data, iris.target
|
||||
preprocessor = make_column_transformer(
|
||||
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
|
||||
)
|
||||
pipe = make_pipeline(preprocessor, estimator).fit(X, y)
|
||||
|
||||
sample_weight = np.zeros_like(y)
|
||||
sample_weight[non_null_weight_idx] = 1
|
||||
pdp_sw = partial_dependence(
|
||||
pipe,
|
||||
X,
|
||||
[2, 3],
|
||||
kind="average",
|
||||
sample_weight=sample_weight,
|
||||
grid_resolution=10,
|
||||
)
|
||||
pdp_ind = partial_dependence(pipe, X, [2, 3], kind="individual", grid_resolution=10)
|
||||
output_dim = 1 if is_regressor(pipe) else len(np.unique(y))
|
||||
for i in range(output_dim):
|
||||
assert_allclose(
|
||||
pdp_ind["individual"][i][non_null_weight_idx],
|
||||
pdp_sw["average"][i],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Estimator, data",
|
||||
[
|
||||
(LinearRegression, multioutput_regression_data),
|
||||
(LogisticRegression, binary_classification_data),
|
||||
],
|
||||
)
|
||||
def test_partial_dependence_equivalence_equal_sample_weight(Estimator, data):
|
||||
"""Check that `sample_weight=None` is equivalent to having equal weights."""
|
||||
|
||||
est = Estimator()
|
||||
(X, y), n_targets = data
|
||||
est.fit(X, y)
|
||||
|
||||
sample_weight, params = None, {"X": X, "features": [1, 2], "kind": "average"}
|
||||
pdp_sw_none = partial_dependence(est, **params, sample_weight=sample_weight)
|
||||
sample_weight = np.ones(len(y))
|
||||
pdp_sw_unit = partial_dependence(est, **params, sample_weight=sample_weight)
|
||||
assert_allclose(pdp_sw_none["average"], pdp_sw_unit["average"])
|
||||
sample_weight = 2 * np.ones(len(y))
|
||||
pdp_sw_doubling = partial_dependence(est, **params, sample_weight=sample_weight)
|
||||
assert_allclose(pdp_sw_none["average"], pdp_sw_doubling["average"])
|
||||
|
||||
|
||||
def test_partial_dependence_sample_weight_size_error():
|
||||
"""Check that we raise an error when the size of `sample_weight` is not
|
||||
consistent with `X` and `y`.
|
||||
"""
|
||||
est = LogisticRegression()
|
||||
(X, y), n_targets = binary_classification_data
|
||||
sample_weight = np.ones_like(y)
|
||||
est.fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match="sample_weight.shape =="):
|
||||
partial_dependence(
|
||||
est, X, features=[0], sample_weight=sample_weight[1:], grid_resolution=10
|
||||
)
|
||||
|
||||
|
||||
def test_partial_dependence_sample_weight_with_recursion():
|
||||
"""Check that we raise an error when `sample_weight` is provided with
|
||||
`"recursion"` method.
|
||||
"""
|
||||
est = RandomForestRegressor()
|
||||
(X, y), n_targets = regression_data
|
||||
sample_weight = np.ones_like(y)
|
||||
est.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
with pytest.raises(ValueError, match="'recursion' method can only be applied when"):
|
||||
partial_dependence(
|
||||
est, X, features=[0], method="recursion", sample_weight=sample_weight
|
||||
)
|
||||
|
||||
|
||||
def test_mixed_type_categorical():
|
||||
"""Check that we raise a proper error when a column has mixed types and
|
||||
the sorting of `np.unique` will fail."""
|
||||
X = np.array(["A", "B", "C", np.nan], dtype=object).reshape(-1, 1)
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
from sklearn.preprocessing import OrdinalEncoder
|
||||
|
||||
clf = make_pipeline(
|
||||
OrdinalEncoder(encoded_missing_value=-1),
|
||||
LogisticRegression(),
|
||||
).fit(X, y)
|
||||
with pytest.raises(ValueError, match="The column #0 contains mixed data types"):
|
||||
partial_dependence(clf, X, features=[0])
|
||||
@@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"feature_names, array_type, expected_feature_names",
|
||||
[
|
||||
(None, "array", ["x0", "x1", "x2"]),
|
||||
(None, "dataframe", ["a", "b", "c"]),
|
||||
(np.array(["a", "b", "c"]), "array", ["a", "b", "c"]),
|
||||
],
|
||||
)
|
||||
def test_check_feature_names(feature_names, array_type, expected_feature_names):
|
||||
X = np.random.randn(10, 3)
|
||||
column_names = ["a", "b", "c"]
|
||||
X = _convert_container(X, constructor_name=array_type, columns_name=column_names)
|
||||
feature_names_validated = _check_feature_names(X, feature_names)
|
||||
assert feature_names_validated == expected_feature_names
|
||||
|
||||
|
||||
def test_check_feature_names_error():
|
||||
X = np.random.randn(10, 3)
|
||||
feature_names = ["a", "b", "c", "a"]
|
||||
msg = "feature_names should not contain duplicates."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_check_feature_names(X, feature_names)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fx, idx", [(0, 0), (1, 1), ("a", 0), ("b", 1), ("c", 2)])
|
||||
def test_get_feature_index(fx, idx):
|
||||
feature_names = ["a", "b", "c"]
|
||||
assert _get_feature_index(fx, feature_names) == idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fx, feature_names, err_msg",
|
||||
[
|
||||
("a", None, "Cannot plot partial dependence for feature 'a'"),
|
||||
("d", ["a", "b", "c"], "Feature 'd' not in feature_names"),
|
||||
],
|
||||
)
|
||||
def test_get_feature_names_error(fx, feature_names, err_msg):
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
_get_feature_index(fx, feature_names)
|
||||
@@ -0,0 +1,540 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from joblib import parallel_backend
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.datasets import (
|
||||
load_diabetes,
|
||||
load_iris,
|
||||
make_classification,
|
||||
make_regression,
|
||||
)
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.inspection import permutation_importance
|
||||
from sklearn.linear_model import LinearRegression, LogisticRegression
|
||||
from sklearn.metrics import (
|
||||
get_scorer,
|
||||
mean_squared_error,
|
||||
r2_score,
|
||||
)
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
@pytest.mark.parametrize("sample_weight", [None, "ones"])
|
||||
def test_permutation_importance_correlated_feature_regression(
|
||||
n_jobs, max_samples, sample_weight
|
||||
):
|
||||
# Make sure that feature highly correlated to the target have a higher
|
||||
# importance
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
|
||||
|
||||
X = np.hstack([X, y_with_little_noise])
|
||||
|
||||
weights = np.ones_like(y) if sample_weight == "ones" else sample_weight
|
||||
clf = RandomForestRegressor(n_estimators=10, random_state=42)
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(
|
||||
clf,
|
||||
X,
|
||||
y,
|
||||
sample_weight=weights,
|
||||
n_repeats=n_repeats,
|
||||
random_state=rng,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y was added as the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
def test_permutation_importance_correlated_feature_regression_pandas(
|
||||
n_jobs, max_samples
|
||||
):
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
# Make sure that feature highly correlated to the target have a higher
|
||||
# importance
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
dataset = load_iris()
|
||||
X, y = dataset.data, dataset.target
|
||||
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
|
||||
|
||||
# Adds feature correlated with y as the last column
|
||||
X = pd.DataFrame(X, columns=dataset.feature_names)
|
||||
X["correlated_feature"] = y_with_little_noise
|
||||
|
||||
clf = RandomForestClassifier(n_estimators=10, random_state=42)
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(
|
||||
clf,
|
||||
X,
|
||||
y,
|
||||
n_repeats=n_repeats,
|
||||
random_state=rng,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y was added as the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
def test_robustness_to_high_cardinality_noisy_feature(n_jobs, max_samples, seed=42):
|
||||
# Permutation variable importance should not be affected by the high
|
||||
# cardinality bias of traditional feature importances, especially when
|
||||
# computed on a held-out test set:
|
||||
rng = np.random.RandomState(seed)
|
||||
n_repeats = 5
|
||||
n_samples = 1000
|
||||
n_classes = 5
|
||||
n_informative_features = 2
|
||||
n_noise_features = 1
|
||||
n_features = n_informative_features + n_noise_features
|
||||
|
||||
# Generate a multiclass classification dataset and a set of informative
|
||||
# binary features that can be used to predict some classes of y exactly
|
||||
# while leaving some classes unexplained to make the problem harder.
|
||||
classes = np.arange(n_classes)
|
||||
y = rng.choice(classes, size=n_samples)
|
||||
X = np.hstack([(y == c).reshape(-1, 1) for c in classes[:n_informative_features]])
|
||||
X = X.astype(np.float32)
|
||||
|
||||
# Not all target classes are explained by the binary class indicator
|
||||
# features:
|
||||
assert n_informative_features < n_classes
|
||||
|
||||
# Add 10 other noisy features with high cardinality (numerical) values
|
||||
# that can be used to overfit the training data.
|
||||
X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
|
||||
assert X.shape == (n_samples, n_features)
|
||||
|
||||
# Split the dataset to be able to evaluate on a held-out test set. The
|
||||
# Test size should be large enough for importance measurements to be
|
||||
# stable:
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.5, random_state=rng
|
||||
)
|
||||
clf = RandomForestClassifier(n_estimators=5, random_state=rng)
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
# Variable importances computed by impurity decrease on the tree node
|
||||
# splits often use the noisy features in splits. This can give misleading
|
||||
# impression that high cardinality noisy variables are the most important:
|
||||
tree_importances = clf.feature_importances_
|
||||
informative_tree_importances = tree_importances[:n_informative_features]
|
||||
noisy_tree_importances = tree_importances[n_informative_features:]
|
||||
assert informative_tree_importances.max() < noisy_tree_importances.min()
|
||||
|
||||
# Let's check that permutation-based feature importances do not have this
|
||||
# problem.
|
||||
r = permutation_importance(
|
||||
clf,
|
||||
X_test,
|
||||
y_test,
|
||||
n_repeats=n_repeats,
|
||||
random_state=rng,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
assert r.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# Split the importances between informative and noisy features
|
||||
informative_importances = r.importances_mean[:n_informative_features]
|
||||
noisy_importances = r.importances_mean[n_informative_features:]
|
||||
|
||||
# Because we do not have a binary variable explaining each target classes,
|
||||
# the RF model will have to use the random variable to make some
|
||||
# (overfitting) splits (as max_depth is not set). Therefore the noisy
|
||||
# variables will be non-zero but with small values oscillating around
|
||||
# zero:
|
||||
assert max(np.abs(noisy_importances)) > 1e-7
|
||||
assert noisy_importances.max() < 0.05
|
||||
|
||||
# The binary features correlated with y should have a higher importance
|
||||
# than the high cardinality noisy features.
|
||||
# The maximum test accuracy is 2 / 5 == 0.4, each informative feature
|
||||
# contributing approximately a bit more than 0.2 of accuracy.
|
||||
assert informative_importances.min() > 0.15
|
||||
|
||||
|
||||
def test_permutation_importance_mixed_types():
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 4
|
||||
|
||||
# Last column is correlated with y
|
||||
X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
clf = make_pipeline(SimpleImputer(), LogisticRegression(solver="lbfgs"))
|
||||
clf.fit(X, y)
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
# use another random state
|
||||
rng = np.random.RandomState(0)
|
||||
result2 = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
|
||||
assert result2.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
assert not np.allclose(result.importances, result2.importances)
|
||||
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
|
||||
|
||||
|
||||
def test_permutation_importance_mixed_types_pandas():
|
||||
pd = pytest.importorskip("pandas")
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
# Last column is correlated with y
|
||||
X = pd.DataFrame({"col1": [1.0, 2.0, 3.0, np.nan], "col2": ["a", "b", "a", "b"]})
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
|
||||
preprocess = ColumnTransformer(
|
||||
[("num", num_preprocess, ["col1"]), ("cat", OneHotEncoder(), ["col2"])]
|
||||
)
|
||||
clf = make_pipeline(preprocess, LogisticRegression(solver="lbfgs"))
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
def test_permutation_importance_linear_regresssion():
|
||||
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
|
||||
X = scale(X)
|
||||
y = scale(y)
|
||||
|
||||
lr = LinearRegression().fit(X, y)
|
||||
|
||||
# this relationship can be computed in closed form
|
||||
expected_importances = 2 * lr.coef_**2
|
||||
results = permutation_importance(
|
||||
lr, X, y, n_repeats=50, scoring="neg_mean_squared_error"
|
||||
)
|
||||
assert_allclose(
|
||||
expected_importances, results.importances_mean, rtol=1e-1, atol=1e-6
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_samples", [500, 1.0])
|
||||
def test_permutation_importance_equivalence_sequential_parallel(max_samples):
|
||||
# regression test to make sure that sequential and parallel calls will
|
||||
# output the same results.
|
||||
# Also tests that max_samples equal to number of samples is equivalent to 1.0
|
||||
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
lr = LinearRegression().fit(X, y)
|
||||
|
||||
importance_sequential = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=1, max_samples=max_samples
|
||||
)
|
||||
|
||||
# First check that the problem is structured enough and that the model is
|
||||
# complex enough to not yield trivial, constant importances:
|
||||
imp_min = importance_sequential["importances"].min()
|
||||
imp_max = importance_sequential["importances"].max()
|
||||
assert imp_max - imp_min > 0.3
|
||||
|
||||
# The actually check that parallelism does not impact the results
|
||||
# either with shared memory (threading) or without isolated memory
|
||||
# via process-based parallelism using the default backend
|
||||
# ('loky' or 'multiprocessing') depending on the joblib version:
|
||||
|
||||
# process-based parallelism (by default):
|
||||
importance_processes = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
|
||||
)
|
||||
assert_allclose(
|
||||
importance_processes["importances"], importance_sequential["importances"]
|
||||
)
|
||||
|
||||
# thread-based parallelism:
|
||||
with parallel_backend("threading"):
|
||||
importance_threading = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
|
||||
)
|
||||
assert_allclose(
|
||||
importance_threading["importances"], importance_sequential["importances"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [None, 1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples):
|
||||
# This test checks that the column shuffling logic has the same behavior
|
||||
# both a dataframe and a simple numpy array.
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
# regression test to make sure that sequential and parallel calls will
|
||||
# output the same results.
|
||||
X, y = make_regression(n_samples=100, n_features=5, random_state=0)
|
||||
X_df = pd.DataFrame(X)
|
||||
|
||||
# Add a categorical feature that is statistically linked to y:
|
||||
binner = KBinsDiscretizer(n_bins=3, encode="ordinal")
|
||||
cat_column = binner.fit_transform(y.reshape(-1, 1))
|
||||
|
||||
# Concatenate the extra column to the numpy array: integers will be
|
||||
# cast to float values
|
||||
X = np.hstack([X, cat_column])
|
||||
assert X.dtype.kind == "f"
|
||||
|
||||
# Insert extra column as a non-numpy-native dtype (while keeping backward
|
||||
# compat for old pandas versions):
|
||||
if hasattr(pd, "Categorical"):
|
||||
cat_column = pd.Categorical(cat_column.ravel())
|
||||
else:
|
||||
cat_column = cat_column.ravel()
|
||||
new_col_idx = len(X_df.columns)
|
||||
X_df[new_col_idx] = cat_column
|
||||
assert X_df[new_col_idx].dtype == cat_column.dtype
|
||||
|
||||
# Stich an arbitrary index to the dataframe:
|
||||
X_df.index = np.arange(len(X_df)).astype(str)
|
||||
|
||||
rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
|
||||
rf.fit(X, y)
|
||||
|
||||
n_repeats = 3
|
||||
importance_array = permutation_importance(
|
||||
rf,
|
||||
X,
|
||||
y,
|
||||
n_repeats=n_repeats,
|
||||
random_state=0,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
# First check that the problem is structured enough and that the model is
|
||||
# complex enough to not yield trivial, constant importances:
|
||||
imp_min = importance_array["importances"].min()
|
||||
imp_max = importance_array["importances"].max()
|
||||
assert imp_max - imp_min > 0.3
|
||||
|
||||
# Now check that importances computed on dataframe matche the values
|
||||
# of those computed on the array with the same data.
|
||||
importance_dataframe = permutation_importance(
|
||||
rf,
|
||||
X_df,
|
||||
y,
|
||||
n_repeats=n_repeats,
|
||||
random_state=0,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
assert_allclose(
|
||||
importance_array["importances"], importance_dataframe["importances"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_type", ["array", "dataframe"])
|
||||
def test_permutation_importance_large_memmaped_data(input_type):
|
||||
# Smoke, non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15810
|
||||
n_samples, n_features = int(5e4), 4
|
||||
X, y = make_classification(
|
||||
n_samples=n_samples, n_features=n_features, random_state=0
|
||||
)
|
||||
assert X.nbytes > 1e6 # trigger joblib memmaping
|
||||
|
||||
X = _convert_container(X, input_type)
|
||||
clf = DummyClassifier(strategy="prior").fit(X, y)
|
||||
|
||||
# Actual smoke test: should not raise any error:
|
||||
n_repeats = 5
|
||||
r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
|
||||
|
||||
# Auxiliary check: DummyClassifier is feature independent:
|
||||
# permutating feature should not change the predictions
|
||||
expected_importances = np.zeros((n_features, n_repeats))
|
||||
assert_allclose(expected_importances, r.importances)
|
||||
|
||||
|
||||
def test_permutation_importance_sample_weight():
|
||||
# Creating data with 2 features and 1000 samples, where the target
|
||||
# variable is a linear combination of the two features, such that
|
||||
# in half of the samples the impact of feature 1 is twice the impact of
|
||||
# feature 2, and vice versa on the other half of the samples.
|
||||
rng = np.random.RandomState(1)
|
||||
n_samples = 1000
|
||||
n_features = 2
|
||||
n_half_samples = n_samples // 2
|
||||
x = rng.normal(0.0, 0.001, (n_samples, n_features))
|
||||
y = np.zeros(n_samples)
|
||||
y[:n_half_samples] = 2 * x[:n_half_samples, 0] + x[:n_half_samples, 1]
|
||||
y[n_half_samples:] = x[n_half_samples:, 0] + 2 * x[n_half_samples:, 1]
|
||||
|
||||
# Fitting linear regression with perfect prediction
|
||||
lr = LinearRegression(fit_intercept=False)
|
||||
lr.fit(x, y)
|
||||
|
||||
# When all samples are weighted with the same weights, the ratio of
|
||||
# the two features importance should equal to 1 on expectation (when using
|
||||
# mean absolutes error as the loss function).
|
||||
pi = permutation_importance(
|
||||
lr, x, y, random_state=1, scoring="neg_mean_absolute_error", n_repeats=200
|
||||
)
|
||||
x1_x2_imp_ratio_w_none = pi.importances_mean[0] / pi.importances_mean[1]
|
||||
assert x1_x2_imp_ratio_w_none == pytest.approx(1, 0.01)
|
||||
|
||||
# When passing a vector of ones as the sample_weight, results should be
|
||||
# the same as in the case that sample_weight=None.
|
||||
w = np.ones(n_samples)
|
||||
pi = permutation_importance(
|
||||
lr,
|
||||
x,
|
||||
y,
|
||||
random_state=1,
|
||||
scoring="neg_mean_absolute_error",
|
||||
n_repeats=200,
|
||||
sample_weight=w,
|
||||
)
|
||||
x1_x2_imp_ratio_w_ones = pi.importances_mean[0] / pi.importances_mean[1]
|
||||
assert x1_x2_imp_ratio_w_ones == pytest.approx(x1_x2_imp_ratio_w_none, 0.01)
|
||||
|
||||
# When the ratio between the weights of the first half of the samples and
|
||||
# the second half of the samples approaches to infinity, the ratio of
|
||||
# the two features importance should equal to 2 on expectation (when using
|
||||
# mean absolutes error as the loss function).
|
||||
w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
|
||||
lr.fit(x, y, w)
|
||||
pi = permutation_importance(
|
||||
lr,
|
||||
x,
|
||||
y,
|
||||
random_state=1,
|
||||
scoring="neg_mean_absolute_error",
|
||||
n_repeats=200,
|
||||
sample_weight=w,
|
||||
)
|
||||
x1_x2_imp_ratio_w = pi.importances_mean[0] / pi.importances_mean[1]
|
||||
assert x1_x2_imp_ratio_w / x1_x2_imp_ratio_w_none == pytest.approx(2, 0.01)
|
||||
|
||||
|
||||
def test_permutation_importance_no_weights_scoring_function():
|
||||
# Creating a scorer function that does not takes sample_weight
|
||||
def my_scorer(estimator, X, y):
|
||||
return 1
|
||||
|
||||
# Creating some data and estimator for the permutation test
|
||||
x = np.array([[1, 2], [3, 4]])
|
||||
y = np.array([1, 2])
|
||||
w = np.array([1, 1])
|
||||
lr = LinearRegression()
|
||||
lr.fit(x, y)
|
||||
|
||||
# test that permutation_importance does not return error when
|
||||
# sample_weight is None
|
||||
try:
|
||||
permutation_importance(lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1)
|
||||
except TypeError:
|
||||
pytest.fail(
|
||||
"permutation_test raised an error when using a scorer "
|
||||
"function that does not accept sample_weight even though "
|
||||
"sample_weight was None"
|
||||
)
|
||||
|
||||
# test that permutation_importance raise exception when sample_weight is
|
||||
# not None
|
||||
with pytest.raises(TypeError):
|
||||
permutation_importance(
|
||||
lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1, sample_weight=w
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"list_single_scorer, multi_scorer",
|
||||
[
|
||||
(["r2", "neg_mean_squared_error"], ["r2", "neg_mean_squared_error"]),
|
||||
(
|
||||
["r2", "neg_mean_squared_error"],
|
||||
{
|
||||
"r2": get_scorer("r2"),
|
||||
"neg_mean_squared_error": get_scorer("neg_mean_squared_error"),
|
||||
},
|
||||
),
|
||||
(
|
||||
["r2", "neg_mean_squared_error"],
|
||||
lambda estimator, X, y: {
|
||||
"r2": r2_score(y, estimator.predict(X)),
|
||||
"neg_mean_squared_error": -mean_squared_error(y, estimator.predict(X)),
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_permutation_importance_multi_metric(list_single_scorer, multi_scorer):
|
||||
# Test permutation importance when scoring contains multiple scorers
|
||||
|
||||
# Creating some data and estimator for the permutation test
|
||||
x, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
lr = LinearRegression().fit(x, y)
|
||||
|
||||
multi_importance = permutation_importance(
|
||||
lr, x, y, random_state=1, scoring=multi_scorer, n_repeats=2
|
||||
)
|
||||
assert set(multi_importance.keys()) == set(list_single_scorer)
|
||||
|
||||
for scorer in list_single_scorer:
|
||||
multi_result = multi_importance[scorer]
|
||||
single_result = permutation_importance(
|
||||
lr, x, y, random_state=1, scoring=scorer, n_repeats=2
|
||||
)
|
||||
|
||||
assert_allclose(multi_result.importances, single_result.importances)
|
||||
|
||||
|
||||
def test_permutation_importance_max_samples_error():
|
||||
"""Check that a proper error message is raised when `max_samples` is not
|
||||
set to a valid input value.
|
||||
"""
|
||||
X = np.array([(1.0, 2.0, 3.0, 4.0)]).T
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
clf = LogisticRegression()
|
||||
clf.fit(X, y)
|
||||
|
||||
err_msg = r"max_samples must be <= n_samples"
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
permutation_importance(clf, X, y, max_samples=5)
|
||||
Reference in New Issue
Block a user