venv

2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_clip.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_clip.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_hilbert_curve.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_hilbert_curve.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_random.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_random.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_sjoin.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_sjoin.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_tools.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_tools.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_clip.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_clip.py
@@ -1,28 +1,28 @@
 """Tests for the clip module."""

-
 import numpy as np
+import pandas as pd

 import shapely
 from shapely.geometry import (
-    Polygon,
-    Point,
-    LineString,
-    LinearRing,
    GeometryCollection,
+    LinearRing,
+    LineString,
    MultiPoint,
+    Point,
+    Polygon,
    box,
 )

 import geopandas
 from geopandas import GeoDataFrame, GeoSeries, clip
-
-from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
-import pytest
-
+from geopandas._compat import HAS_PYPROJ
 from geopandas.tools.clip import _mask_is_list_like_rectangle

-pytestmark = pytest.mark.skip_no_sindex
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_index_equal
+
 mask_variants_single_rectangle = [
    "single_rectangle_gdf",
    "single_rectangle_gdf_list_bounds",
@@ -43,6 +43,14 @@ def point_gdf():
    return gdf


+@pytest.fixture
+def point_gdf2():
+    """Create a point GeoDataFrame."""
+    pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
@pytest.fixture
 def pointsoutside_nooverlap_gdf():
    """Create a point GeoDataFrame. Its points are all outside the single
@@ -137,7 +145,7 @@ def two_line_gdf():
@pytest.fixture
 def multi_poly_gdf(donut_geometry):
    """Create a multi-polygon GeoDataFrame."""
-    multi_poly = donut_geometry.unary_union
+    multi_poly = donut_geometry.union_all()
    out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
    out_df["attr"] = ["pool"]
    return out_df
@@ -148,7 +156,7 @@ def multi_line(two_line_gdf):
    """Create a multi-line GeoDataFrame.
    This GDF has one multiline and one regular line."""
    # Create a single and multi line object
-    multiline_feat = two_line_gdf.unary_union
+    multiline_feat = two_line_gdf.union_all()
    linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
    out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
    out_df["attr"] = ["road", "stream"]
@@ -158,7 +166,7 @@ def multi_line(two_line_gdf):
@pytest.fixture
 def multi_point(point_gdf):
    """Create a multi-point GeoDataFrame."""
-    multi_point = point_gdf.unary_union
+    multi_point = point_gdf.union_all()
    out_df = GeoDataFrame(
        geometry=GeoSeries(
            [multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
@@ -321,7 +329,7 @@ class TestClipWithSingleRectangleGdf:
        )
        assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
        shape_for_points = (
-            box(*mask) if _mask_is_list_like_rectangle(mask) else mask.unary_union
+            box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
        )
        assert all(clipped.intersects(shape_for_points))

@@ -398,6 +406,7 @@ def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
    assert "GeometryCollection" in clipped.geom_type[0]


+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
 def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
    with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
        clip(point_gdf, single_rectangle_gdf.to_crs(4326))
@@ -460,3 +469,16 @@ def test_clip_empty_mask(buffered_locations, mask):
    )
    clipped = clip(buffered_locations.geometry, mask)
    assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
+
+
+def test_clip_sorting(point_gdf2):
+    """Test the sorting kwarg in clip"""
+    bbox = shapely.geometry.box(0, 0, 2, 2)
+    unsorted_clipped_gdf = point_gdf2.clip(bbox)
+    sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
+
+    expected_sorted_index = pd.Index([1, 3, 5])
+
+    assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
+    assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
+    assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_hilbert_curve.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_hilbert_curve.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from shapely.geometry import Point
 from shapely.wkt import loads

--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_random.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_random.py
@@ -1,28 +1,46 @@
-import pytest
 import numpy
-import geopandas
-import geopandas._compat as compat

+import geopandas
 from geopandas.tools._random import uniform

-multipolygons = geopandas.read_file(geopandas.datasets.get_path("nybb")).geometry
-polygons = multipolygons.explode(ignore_index=True).geometry
-multilinestrings = multipolygons.boundary
-linestrings = polygons.boundary
-points = multipolygons.centroid
+import pytest
+
+
+@pytest.fixture
+def multipolygons(nybb_filename):
+    return geopandas.read_file(nybb_filename).geometry
+
+
+@pytest.fixture
+def polygons(multipolygons):
+    return multipolygons.explode(ignore_index=True).geometry
+
+
+@pytest.fixture
+def multilinestrings(multipolygons):
+    return multipolygons.boundary
+
+
+@pytest.fixture
+def linestrings(polygons):
+    return polygons.boundary
+
+
+@pytest.fixture
+def points(multipolygons):
+    return multipolygons.centroid


-@pytest.mark.skipif(
-    not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
-    reason="array input in interpolate not implemented for shapely<2",
-)
@pytest.mark.parametrize("size", [10, 100])
@pytest.mark.parametrize(
-    "geom", [multipolygons[0], polygons[0], multilinestrings[0], linestrings[0]]
+    "geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
 )
-def test_uniform(geom, size):
+def test_uniform(geom_fixture, size, request):
+    geom = request.getfixturevalue(geom_fixture)[0]
    sample = uniform(geom, size=size, rng=1)
-    sample_series = geopandas.GeoSeries(sample).explode().reset_index(drop=True)
+    sample_series = (
+        geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
+    )
    assert len(sample_series) == size
    sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
        geom, predicate="intersects"
@@ -30,21 +48,13 @@ def test_uniform(geom, size):
    assert len(sample_in_geom) == size


-@pytest.mark.skipif(
-    not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
-    reason="array input in interpolate not implemented for shapely<2",
-)
-def test_uniform_unsupported():
+def test_uniform_unsupported(points):
    with pytest.warns(UserWarning, match="Sampling is not supported"):
        sample = uniform(points[0], size=10, rng=1)
    assert sample.is_empty


-@pytest.mark.skipif(
-    not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
-    reason="array input in interpolate not implemented for shapely<2",
-)
-def test_uniform_generator():
+def test_uniform_generator(polygons):
    sample = uniform(polygons[0], size=10, rng=1)
    sample2 = uniform(polygons[0], size=10, rng=1)
    assert sample.equals(sample2)
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_sjoin.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_sjoin.py
@@ -3,23 +3,24 @@ from typing import Sequence

 import numpy as np
 import pandas as pd
-import shapely

-from shapely.geometry import Point, Polygon, GeometryCollection
+import shapely
+from shapely.geometry import GeometryCollection, Point, Polygon, box

 import geopandas
 import geopandas._compat as compat
-from geopandas import GeoDataFrame, GeoSeries, read_file, sjoin, sjoin_nearest
-from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from geopandas import (
+    GeoDataFrame,
+    GeoSeries,
+    points_from_xy,
+    read_file,
+    sjoin,
+    sjoin_nearest,
+)

-from pandas.testing import assert_frame_equal, assert_series_equal
 import pytest
-
-
-TEST_NEAREST = compat.USE_SHAPELY_20 or (compat.PYGEOS_GE_010 and compat.USE_PYGEOS)
-
-
-pytestmark = pytest.mark.skip_no_sindex
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal


@pytest.fixture()
@@ -95,6 +96,52 @@ def dfs(request):
    return [request.param, df1, df2, expected]


+@pytest.fixture()
+def dfs_shared_attribute():
+    geo_left = [
+        Point(0, 0),
+        Point(1, 1),
+        Point(2, 2),
+        Point(3, 3),
+        Point(4, 4),
+        Point(5, 5),
+        Point(6, 6),
+        Point(7, 7),
+    ]
+    geo_right = [
+        Point(0, 0),
+        Point(1, 1),
+        Point(2, 2),
+        Point(3, 3),
+        Point(4, 4),
+        Point(5, 5),
+        Point(6, 6),
+        Point(7, 7),
+    ]
+    attr_tracker = ["A", "B", "C", "D", "E", "F", "G", "H"]
+
+    left_gdf = geopandas.GeoDataFrame(
+        {
+            "geometry": geo_left,
+            "attr_tracker": attr_tracker,
+            "duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
+            "attr1": [True, True, True, True, True, True, True, True],
+            "attr2": [True, True, True, True, True, True, True, True],
+        }
+    )
+
+    right_gdf = geopandas.GeoDataFrame(
+        {
+            "geometry": geo_right,
+            "duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
+            "attr1": [True, True, False, False, True, True, False, False],
+            "attr2": [True, True, False, False, False, False, False, False],
+        }
+    )
+
+    return left_gdf, right_gdf
+
+
 class TestSpatialJoin:
    @pytest.mark.parametrize(
        "how, lsuffix, rsuffix, expected_cols",
@@ -113,6 +160,7 @@ class TestSpatialJoin:
        joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
        assert set(joined.columns) == expected_cols | {"geometry"}

+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
    @pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True)
    def test_crs_mismatch(self, dfs):
        index, df1, df2, expected = dfs
@@ -120,31 +168,6 @@ class TestSpatialJoin:
        with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
            sjoin(df1, df2)

-    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
-    @pytest.mark.parametrize("op", ["intersects", "contains", "within"])
-    def test_deprecated_op_param(self, dfs, op):
-        _, df1, df2, _ = dfs
-        with pytest.warns(FutureWarning, match="`op` parameter is deprecated"):
-            sjoin(df1, df2, op=op)
-
-    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
-    @pytest.mark.parametrize("op", ["intersects", "contains", "within"])
-    @pytest.mark.parametrize("predicate", ["contains", "within"])
-    def test_deprecated_op_param_nondefault_predicate(self, dfs, op, predicate):
-        _, df1, df2, _ = dfs
-        match = "use the `predicate` parameter instead"
-        if op != predicate:
-            warntype = UserWarning
-            match = (
-                "`predicate` will be overridden by the value of `op`"  # noqa: ISC003
-                + r"(.|\s)*"
-                + match
-            )
-        else:
-            warntype = FutureWarning
-        with pytest.warns(warntype, match=match):
-            sjoin(df1, df2, predicate=predicate, op=op)
-
    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
    def test_unknown_kwargs(self, dfs):
        _, df1, df2, _ = dfs
@@ -154,7 +177,6 @@ class TestSpatialJoin:
        ):
            sjoin(df1, df2, extra_param="test")

-    @pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning")
    @pytest.mark.parametrize(
        "dfs",
        [
@@ -167,12 +189,10 @@ class TestSpatialJoin:
        indirect=True,
    )
    @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
-    @pytest.mark.parametrize("predicate_kw", ["predicate", "op"])
-    def test_inner(self, predicate, predicate_kw, dfs):
+    def test_inner(self, predicate, dfs):
        index, df1, df2, expected = dfs

-        res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate})
-
+        res = sjoin(df1, df2, how="inner", predicate=predicate)
        exp = expected[predicate].dropna().copy()
        exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"})
        exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64")
@@ -182,7 +202,7 @@ class TestSpatialJoin:
            ].astype("int64")
        if index == "named-index":
            exp[["df1_ix", "df2_ix"]] = exp[["df1_ix", "df2_ix"]].astype("int64")
-            exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
+            exp = exp.set_index("df1_ix")
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_left")
            exp.index.name = None
@@ -192,11 +212,7 @@ class TestSpatialJoin:
            )
            exp.index.names = df1.index.names
        if index == "named-multi-index":
-            exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
-                columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
-            )
-            exp.index.names = df1.index.names
-
+            exp = exp.set_index(["df1_ix1", "df1_ix2"])
        assert_frame_equal(res, exp)

    @pytest.mark.parametrize(
@@ -232,7 +248,7 @@ class TestSpatialJoin:
            res["index_right"] = res["index_right"].astype(float)
        elif index == "named-index":
            exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64")
-            exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
+            exp = exp.set_index("df1_ix")
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_left")
            exp.index.name = None
@@ -242,10 +258,7 @@ class TestSpatialJoin:
            )
            exp.index.names = df1.index.names
        if index == "named-multi-index":
-            exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
-                columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
-            )
-            exp.index.names = df1.index.names
+            exp = exp.set_index(["df1_ix1", "df1_ix2"])

        assert_frame_equal(res, exp)

@@ -348,7 +361,7 @@ class TestSpatialJoin:
            res["index_left"] = res["index_left"].astype(float)
        elif index == "named-index":
            exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64")
-            exp = exp.set_index("df2_ix").rename(columns={"df1_ix": "index_left"})
+            exp = exp.set_index("df2_ix")
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_right")
            exp = exp.reindex(columns=res.columns)
@@ -359,20 +372,431 @@ class TestSpatialJoin:
            )
            exp.index.names = df2.index.names
        if index == "named-multi-index":
-            exp = exp.set_index(["df2_ix1", "df2_ix2"]).rename(
-                columns={"df1_ix1": "index_left0", "df1_ix2": "index_left1"}
-            )
-            exp.index.names = df2.index.names
+            exp = exp.set_index(["df2_ix1", "df2_ix2"])
+
        if predicate == "within":
            exp = exp.sort_index()

        assert_frame_equal(res, exp, check_index_type=False)

+    @pytest.mark.skipif(not compat.GEOS_GE_310, reason="`dwithin` requires GEOS 3.10")
+    @pytest.mark.parametrize("how", ["inner"])
+    @pytest.mark.parametrize(
+        "geo_left, geo_right, expected_left, expected_right, distance",
+        [
+            (
+                # Distance is number, 2x1
+                [Point(0, 0), Point(1, 1)],
+                [Point(1, 1)],
+                [0, 1],
+                [0, 0],
+                math.sqrt(2),
+            ),
+            # Distance is number, 2x2
+            (
+                [Point(0, 0), Point(1, 1)],
+                [Point(0, 0), Point(1, 1)],
+                [0, 1, 0, 1],
+                [0, 0, 1, 1],
+                math.sqrt(2),
+            ),
+            # Distance is array, matches len(left)
+            (
+                [Point(0, 0), Point(0, 0), Point(-1, -1)],
+                [Point(1, 1)],
+                [1, 2],
+                [0, 0],
+                [0, math.sqrt(2), math.sqrt(8)],
+            ),
+            # Distance is np.array, matches len(left),
+            # inner join sorts the right GeoDataFrame
+            (
+                [Point(0, 0), Point(0, 0), Point(-1, -1)],
+                [Point(1, 1), Point(0.5, 0.5)],
+                [1, 2, 1, 2],
+                [1, 1, 0, 0],
+                np.array([0, math.sqrt(2), math.sqrt(8)]),
+            ),
+        ],
+    )
+    def test_sjoin_dwithin(
+        self,
+        geo_left,
+        geo_right,
+        expected_left: Sequence[int],
+        expected_right: Sequence[int],
+        distance,
+        how,
+    ):
+        left = geopandas.GeoDataFrame({"geometry": geo_left})
+        right = geopandas.GeoDataFrame({"geometry": geo_right})
+        expected_gdf = left.iloc[expected_left].copy()
+        expected_gdf["index_right"] = expected_right
+        joined = sjoin(left, right, how=how, predicate="dwithin", distance=distance)
+        assert_frame_equal(expected_gdf.sort_index(), joined.sort_index())

+    # GH3239
+    @pytest.mark.parametrize(
+        "predicate",
+        [
+            "contains",
+            "contains_properly",
+            "covered_by",
+            "covers",
+            "crosses",
+            "intersects",
+            "touches",
+            "within",
+        ],
+    )
+    def test_sjoin_left_order(self, predicate):
+        # a set of points in random order -> that order should be preserved
+        # with a left join
+        pts = GeoDataFrame(
+            geometry=points_from_xy([0.1, 0.4, 0.3, 0.7], [0.8, 0.6, 0.9, 0.1])
+        )
+        polys = GeoDataFrame(
+            {"id": [1, 2, 3, 4]},
+            geometry=[
+                box(0, 0, 0.5, 0.5),
+                box(0, 0.5, 0.5, 1),
+                box(0.5, 0, 1, 0.5),
+                box(0.5, 0.5, 1, 1),
+            ],
+        )
+
+        joined = sjoin(pts, polys, predicate=predicate, how="left")
+        assert_index_equal(joined.index, pts.index)
+
+    def test_sjoin_shared_attribute(self, naturalearth_lowres, naturalearth_cities):
+        countries = read_file(naturalearth_lowres)
+        cities = read_file(naturalearth_cities)
+        countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
+
+        # Add first letter of country/city as an attribute column to be compared
+        countries["firstLetter"] = countries["country"].astype(str).str[0]
+        cities["firstLetter"] = cities["name"].astype(str).str[0]
+
+        result = sjoin(cities, countries, on_attribute="firstLetter")
+        assert (
+            result["country"].astype(str).str[0] == result["name"].astype(str).str[0]
+        ).all()
+        assert result.shape == (23, 5)
+
+    @pytest.mark.parametrize(
+        "attr1_key_change_dict, attr2_key_change_dict",
+        [
+            pytest.param(
+                {True: "merge", False: "no_merge"},
+                {True: "merge", False: "no_merge"},
+                id="merge on string attributes",
+            ),
+            pytest.param(
+                {True: 2, False: 1},
+                {True: 2, False: 1},
+                id="merge on integer attributes",
+            ),
+            pytest.param(
+                {True: True, False: False},
+                {True: True, False: False},
+                id="merge on boolean attributes",
+            ),
+            pytest.param(
+                {True: True, False: False},
+                {True: "merge", False: "no_merge"},
+                id="merge on mixed attributes",
+            ),
+        ],
+    )
+    def test_sjoin_multiple_attributes_datatypes(
+        self, dfs_shared_attribute, attr1_key_change_dict, attr2_key_change_dict
+    ):
+        left_gdf, right_gdf = dfs_shared_attribute
+        left_gdf["attr1"] = left_gdf["attr1"].map(attr1_key_change_dict)
+        left_gdf["attr2"] = left_gdf["attr2"].map(attr2_key_change_dict)
+        right_gdf["attr1"] = right_gdf["attr1"].map(attr1_key_change_dict)
+        right_gdf["attr2"] = right_gdf["attr2"].map(attr2_key_change_dict)
+
+        joined = sjoin(left_gdf, right_gdf, on_attribute=("attr1", "attr2"))
+        assert (["A", "B"] == joined["attr_tracker"].values).all()
+
+    def test_sjoin_multiple_attributes_check_header(self, dfs_shared_attribute):
+        left_gdf, right_gdf = dfs_shared_attribute
+        joined = sjoin(left_gdf, right_gdf, on_attribute=["attr1"])
+
+        assert (["A", "B", "E", "F"] == joined["attr_tracker"].values).all()
+        assert {"attr2_left", "attr2_right", "attr1"}.issubset(joined.columns)
+        assert "attr1_left" not in joined
+
+    def test_sjoin_error_column_does_not_exist(self, dfs_shared_attribute):
+        left_gdf, right_gdf = dfs_shared_attribute
+        right_gdf_dropped_attr = right_gdf.drop("attr1", axis=1)
+        left_gdf_dropped_attr = left_gdf.drop("attr1", axis=1)
+
+        with pytest.raises(
+            ValueError,
+            match="Expected column attr1 is missing from the right dataframe.",
+        ):
+            sjoin(left_gdf, right_gdf_dropped_attr, on_attribute="attr1")
+
+        with pytest.raises(
+            ValueError,
+            match="Expected column attr1 is missing from the left dataframe.",
+        ):
+            sjoin(left_gdf_dropped_attr, right_gdf, on_attribute="attr1")
+
+        with pytest.raises(
+            ValueError,
+            match="Expected column attr1 is missing from both of the dataframes.",
+        ):
+            sjoin(left_gdf_dropped_attr, right_gdf_dropped_attr, on_attribute="attr1")
+
+    def test_sjoin_error_use_geometry_column(self, dfs_shared_attribute):
+        left_gdf, right_gdf = dfs_shared_attribute
+        with pytest.raises(
+            ValueError,
+            match="Active geometry column cannot be used as an input for "
+            "on_attribute parameter.",
+        ):
+            sjoin(left_gdf, right_gdf, on_attribute="geometry")
+        with pytest.raises(
+            ValueError,
+            match="Active geometry column cannot be used as an input for "
+            "on_attribute parameter.",
+        ):
+            sjoin(left_gdf, right_gdf, on_attribute=["attr1", "geometry"])
+
+
+class TestIndexNames:
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_preserve_index_names(self, how):
+        # preserve names of both left and right index
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"geometry": geoms}, index=pd.Index([1, 2], name="myidx1"))
+        df2 = GeoDataFrame(
+            {"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx2")
+        )
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"myidx1": [1, 2], "geometry": geoms, "myidx2": ["a", "b"]}
+            ).set_index("myidx1")
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"myidx2": ["a", "b"], "myidx1": [1, 2], "geometry": geoms},
+            ).set_index("myidx2")
+        assert_geodataframe_equal(result, expected)
+
+        # but also add suffixes if both left and right have the same index
+        df1.index.name = "myidx"
+        df2.index.name = "myidx"
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
+            ).set_index("myidx_left")
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"myidx_right": ["a", "b"], "myidx_left": [1, 2], "geometry": geoms},
+            ).set_index("myidx_right")
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_preserve_index_names_multiindex(self, how):
+        # preserve names of both left and right index
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame(
+            {"geometry": geoms},
+            index=pd.MultiIndex.from_tuples(
+                [("a", 1), ("b", 2)], names=["myidx1", "level2"]
+            ),
+        )
+        df2 = GeoDataFrame(
+            {"geometry": geoms},
+            index=pd.MultiIndex.from_tuples(
+                [("c", 3), ("d", 4)], names=["myidx2", None]
+            ),
+        )
+        result = sjoin(df1, df2, how=how)
+        expected_base = GeoDataFrame(
+            {
+                "myidx1": ["a", "b"],
+                "level2": [1, 2],
+                "geometry": geoms,
+                "myidx2": ["c", "d"],
+                "index_right1": [3, 4],
+            }
+        )
+        if how in ("inner", "left"):
+            expected = expected_base.set_index(["myidx1", "level2"])
+        else:
+            # right join
+            expected = expected_base.set_index(["myidx2", "index_right1"])
+            # if it was originally None, that is preserved
+            expected.index.names = ["myidx2", None]
+        assert_geodataframe_equal(result, expected)
+
+        # but also add suffixes if both left and right have the same index
+        df1.index.names = ["myidx", "level2"]
+        df2.index.names = ["myidx", None]
+        result = sjoin(df1, df2, how=how)
+        expected_base = GeoDataFrame(
+            {
+                "myidx_left": ["a", "b"],
+                "level2": [1, 2],
+                "geometry": geoms,
+                "myidx_right": ["c", "d"],
+                "index_right1": [3, 4],
+            }
+        )
+        if how in ("inner", "left"):
+            expected = expected_base.set_index(["myidx_left", "level2"])
+        else:
+            # right join
+            expected = expected_base.set_index(["myidx_right", "index_right1"])
+            # if it was originally None, that is preserved
+            expected.index.names = ["myidx_right", None]
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_duplicate_column_index_name(self, how):
+        # case where a left column and the right index have the same name or the
+        # other way around -> correctly add suffix or preserve index name
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
+        df2 = GeoDataFrame(
+            {"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx")
+        )
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
+                index=pd.Index(["a", "b"], name="myidx_right"),
+            )
+        assert_geodataframe_equal(result, expected)
+
+        result = sjoin(df2, df1, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
+                index=pd.Index(["a", "b"], name="myidx_left"),
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"myidx_left": ["a", "b"], "myidx_right": [1, 2], "geometry": geoms},
+            )
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_duplicate_column_index_name_multiindex(self, how):
+        # case where a left column and the right index have the same name or the
+        # other way around -> correctly add suffix or preserve index name
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
+        df2 = GeoDataFrame(
+            {"geometry": geoms},
+            index=pd.MultiIndex.from_tuples(
+                [("a", 1), ("b", 2)], names=["myidx", "level2"]
+            ),
+        )
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {
+                    "myidx_left": [1, 2],
+                    "geometry": geoms,
+                    "myidx_right": ["a", "b"],
+                    "level2": [1, 2],
+                }
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
+                index=pd.MultiIndex.from_tuples(
+                    [("a", 1), ("b", 2)], names=["myidx_right", "level2"]
+                ),
+            )
+        assert_geodataframe_equal(result, expected)
+
+        result = sjoin(df2, df1, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
+                index=pd.MultiIndex.from_tuples(
+                    [("a", 1), ("b", 2)], names=["myidx_left", "level2"]
+                ),
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {
+                    "myidx_left": ["a", "b"],
+                    "level2": [1, 2],
+                    "myidx_right": [1, 2],
+                    "geometry": geoms,
+                },
+            )
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_conflicting_column_index_name(self, how):
+        # test case where the auto-generated index name conflicts
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"index_right": [1, 2], "geometry": geoms})
+        df2 = GeoDataFrame({"geometry": geoms})
+        with pytest.raises(ValueError, match="'index_right' cannot be a column name"):
+            sjoin(df1, df2, how=how)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_conflicting_column_with_suffix(self, how):
+        # test case where the auto-generated index name conflicts
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame(
+            {"column": [1, 2], "column_right": ["a", "b"], "geometry": geoms}
+        )
+        df2 = GeoDataFrame({"column": [0.1, 0.2], "geometry": geoms})
+
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {1: [1, 2], 2: ["a", "b"], 3: geoms, 4: [0, 1], 5: [0.1, 0.2]}
+            )
+            expected.columns = [
+                "column_left",
+                "column_right",
+                "geometry",
+                "index_right",
+                "column_right",
+            ]
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {1: [0, 1], 2: [1, 2], 3: ["a", "b"], 4: [0.1, 0.2], 5: geoms}
+            )
+            expected.columns = [
+                "index_left",
+                "column_left",
+                "column_right",
+                "column_right",
+                "geometry",
+            ]
+        expected = expected.set_geometry("geometry")
+        assert_geodataframe_equal(result, expected)
+
+
+@pytest.mark.usefixtures("_setup_class_nybb_filename")
 class TestSpatialJoinNYBB:
    def setup_method(self):
-        nybb_filename = geopandas.datasets.get_path("nybb")
-        self.polydf = read_file(nybb_filename)
+        self.polydf = read_file(self.nybb_filename)
        self.crs = self.polydf.crs
        N = 20
        b = [int(x) for x in self.polydf.total_bounds]
@@ -527,7 +951,7 @@ class TestSpatialJoinNYBB:

    def test_sjoin_empty_geometries(self):
        # https://github.com/geopandas/geopandas/issues/944
-        empty = GeoDataFrame(geometry=[GeometryCollection()] * 3)
+        empty = GeoDataFrame(geometry=[GeometryCollection()] * 3, crs=self.crs)
        df = sjoin(pd.concat([self.pointdf, empty]), self.polydf, how="left")
        assert df.shape == (24, 8)
        df2 = sjoin(self.pointdf, pd.concat([self.polydf, empty]), how="left")
@@ -542,8 +966,8 @@ class TestSpatialJoinNYBB:
        assert sjoin(empty, self.pointdf, how="inner", predicate=predicate).empty
        assert sjoin(empty, self.pointdf, how="left", predicate=predicate).empty

-    def test_empty_sjoin_return_duplicated_columns(self):
-        nybb = geopandas.read_file(geopandas.datasets.get_path("nybb"))
+    def test_empty_sjoin_return_duplicated_columns(self, nybb_filename):
+        nybb = geopandas.read_file(nybb_filename)
        nybb2 = nybb.copy()
        nybb2.geometry = nybb2.translate(200000)  # to get non-overlapping

@@ -553,45 +977,24 @@ class TestSpatialJoinNYBB:
        assert "BoroCode_left" in result.columns


-class TestSpatialJoinNaturalEarth:
-    def setup_method(self):
-        world_path = geopandas.datasets.get_path("naturalearth_lowres")
-        cities_path = geopandas.datasets.get_path("naturalearth_cities")
-        self.world = read_file(world_path)
-        self.cities = read_file(cities_path)
-
-    def test_sjoin_inner(self):
-        # GH637
-        countries = self.world[["geometry", "name"]]
-        countries = countries.rename(columns={"name": "country"})
-        cities_with_country = sjoin(
-            self.cities, countries, how="inner", predicate="intersects"
-        )
-        assert cities_with_country.shape == (213, 4)
+@pytest.fixture
+def world(naturalearth_lowres):
+    return read_file(naturalearth_lowres)


-@pytest.mark.skipif(
-    TEST_NEAREST,
-    reason=("This test can only be run _without_ PyGEOS >= 0.10 installed"),
-)
-def test_no_nearest_all():
-    df1 = geopandas.GeoDataFrame({"geometry": []})
-    df2 = geopandas.GeoDataFrame({"geometry": []})
-    with pytest.raises(
-        NotImplementedError,
-        match="Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports",
-    ):
-        sjoin_nearest(df1, df2)
+@pytest.fixture
+def cities(naturalearth_cities):
+    return read_file(naturalearth_cities)
+
+
+def test_sjoin_inner(world, cities):
+    # GH637
+    countries = world[["geometry", "name"]]
+    countries = countries.rename(columns={"name": "country"})
+    cities_with_country = sjoin(cities, countries, how="inner", predicate="intersects")
+    assert cities_with_country.shape == (213, 4)


-@pytest.mark.skipif(
-    not TEST_NEAREST,
-    reason=(
-        "PyGEOS >= 0.10.0"
-        " must be installed and activated via the geopandas.compat module to"
-        " test sjoin_nearest"
-    ),
-)
 class TestNearest:
    @pytest.mark.parametrize(
        "how_kwargs", ({}, {"how": "inner"}, {"how": "left"}, {"how": "right"})
@@ -900,10 +1303,10 @@ class TestNearest:
        assert_geodataframe_equal(expected_gdf, joined)

    @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS")
-    def test_sjoin_nearest_inner(self):
+    def test_sjoin_nearest_inner(self, naturalearth_lowres, naturalearth_cities):
        # check equivalency of left and inner join
-        countries = read_file(geopandas.datasets.get_path("naturalearth_lowres"))
-        cities = read_file(geopandas.datasets.get_path("naturalearth_cities"))
+        countries = read_file(naturalearth_lowres)
+        cities = read_file(naturalearth_cities)
        countries = countries[["geometry", "name"]].rename(columns={"name": "country"})

        # default: inner and left give the same result
@@ -927,19 +1330,8 @@ class TestNearest:
        result5["index_right"] = result5["index_right"].astype("int64")
        assert_geodataframe_equal(result5, result4, check_like=True)

-    expected_index_uncapped = (
-        [1, 3, 3, 1, 2] if compat.PANDAS_GE_22 else [1, 1, 3, 3, 2]
-    )
-
-    @pytest.mark.skipif(
-        not (compat.USE_SHAPELY_20),
-        reason=(
-            "shapely >= 2.0 is required to run sjoin_nearest"
-            "with parameter `exclusive` set"
-        ),
-    )
    @pytest.mark.parametrize(
-        "max_distance,expected", [(None, expected_index_uncapped), (1.1, [3, 3, 1, 2])]
+        "max_distance,expected", [(None, [1, 3, 3, 1, 2]), (1.1, [3, 3, 1, 2])]
    )
    def test_sjoin_nearest_exclusive(self, max_distance, expected):
        geoms = shapely.points(np.arange(3), np.arange(3))