venv

2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/init.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/init.py
@@ -1,8 +1,8 @@
+from .clip import clip
 from .geocoding import geocode, reverse_geocode
 from .overlay import overlay
 from .sjoin import sjoin, sjoin_nearest
 from .util import collect
-from .clip import clip

 __all__ = [
    "collect",
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/_random.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/_random.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/_show_versions.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/_show_versions.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/clip.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/clip.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/geocoding.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/geocoding.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/hilbert_curve.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/hilbert_curve.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/overlay.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/overlay.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/sjoin.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/sjoin.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/util.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/pycache/util.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/_random.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/_random.py
@@ -1,6 +1,7 @@
 from warnings import warn

 import numpy
+
 from shapely.geometry import MultiPoint

 from geopandas.array import from_shapely, points_from_xy
@@ -64,7 +65,7 @@ def _uniform_line(geom, size, generator):
    """

    fracs = generator.uniform(size=size)
-    return from_shapely(geom.interpolate(fracs, normalized=True)).unary_union()
+    return from_shapely(geom.interpolate(fracs, normalized=True)).union_all()


 def _uniform_polygon(geom, size, generator):
@@ -80,4 +81,4 @@ def _uniform_polygon(geom, size, generator):
        )
        valid_samples = batch[batch.sindex.query(geom, predicate="contains")]
        candidates.extend(valid_samples)
-    return GeoSeries(candidates[:size]).unary_union
+    return GeoSeries(candidates[:size]).union_all()
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/_show_versions.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/_show_versions.py
@@ -58,33 +58,27 @@ def _get_C_info():
            geos_dir = None

    try:
-        import fiona
+        import pyogrio

-        gdal_version = fiona.env.get_gdal_release_name()
+        gdal_version = pyogrio.__gdal_version_string__
+        gdal_dir = pyogrio.get_gdal_data_path()
    except Exception:
        gdal_version = None
-    try:
-        import fiona
-
-        gdal_dir = fiona.env.GDALDataFinder().search()
-    except Exception:
        gdal_dir = None

    if gdal_version is None:
        try:
-            import pyogrio
+            import fiona

-            gdal_version = pyogrio.__gdal_version_string__
-            gdal_dir = None
+            gdal_version = fiona.env.get_gdal_release_name()
        except Exception:
-            pass
+            gdal_version = None
        try:
-            # get_gdal_data_path is only available in pyogrio >= 0.4.2
-            from pyogrio import get_gdal_data_path
+            import fiona

-            gdal_dir = get_gdal_data_path()
+            gdal_dir = fiona.env.GDALDataFinder().search()
        except Exception:
-            pass
+            gdal_dir = None

    blob = [
        ("GEOS", geos_version),
@@ -114,16 +108,15 @@ def _get_deps_info():
        "pyproj",
        "shapely",
        # optional deps
-        "fiona",
+        "pyogrio",
        "geoalchemy2",
        "geopy",
        "matplotlib",
        "mapclassify",
-        "pygeos",
-        "pyogrio",
+        "fiona",
+        "psycopg",
        "psycopg2",
        "pyarrow",
-        "rtree",
    ]

    def get_version(module):
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/clip.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/clip.py
@@ -5,11 +5,13 @@ geopandas.clip
 A module to clip vector data using GeoPandas.

 """
+
 import warnings

 import numpy as np
 import pandas.api.types
-from shapely.geometry import Polygon, MultiPolygon, box
+
+from shapely.geometry import MultiPolygon, Polygon, box

 from geopandas import GeoDataFrame, GeoSeries
 from geopandas.array import _check_crs, _crs_mismatch_warn
@@ -21,7 +23,7 @@ def _mask_is_list_like_rectangle(mask):
    )


-def _clip_gdf_with_mask(gdf, mask):
+def _clip_gdf_with_mask(gdf, mask, sort=False):
    """Clip geometry to the polygon/rectangle extent.

    Clip an input GeoDataFrame to the polygon extent of the polygon
@@ -35,6 +37,10 @@ def _clip_gdf_with_mask(gdf, mask):
    mask : (Multi)Polygon, list-like
        Reference polygon/rectangle for clipping.

+    sort : boolean, default False
+        If True, the results will be sorted in ascending order using the
+        geometries' indexes as the primary key.
+
    Returns
    -------
    GeoDataFrame
@@ -47,7 +53,9 @@ def _clip_gdf_with_mask(gdf, mask):
    else:
        intersection_polygon = mask

-    gdf_sub = gdf.iloc[gdf.sindex.query(intersection_polygon, predicate="intersects")]
+    gdf_sub = gdf.iloc[
+        gdf.sindex.query(intersection_polygon, predicate="intersects", sort=sort)
+    ]

    # For performance reasons points don't need to be intersected with poly
    non_point_mask = gdf_sub.geom_type != "Point"
@@ -60,13 +68,13 @@ def _clip_gdf_with_mask(gdf, mask):
    if isinstance(gdf_sub, GeoDataFrame):
        clipped = gdf_sub.copy()
        if clipping_by_rectangle:
-            clipped.loc[
-                non_point_mask, clipped._geometry_column_name
-            ] = gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
+            clipped.loc[non_point_mask, clipped._geometry_column_name] = (
+                gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
+            )
        else:
-            clipped.loc[
-                non_point_mask, clipped._geometry_column_name
-            ] = gdf_sub.geometry.values[non_point_mask].intersection(mask)
+            clipped.loc[non_point_mask, clipped._geometry_column_name] = (
+                gdf_sub.geometry.values[non_point_mask].intersection(mask)
+            )
    else:
        # GeoSeries
        clipped = gdf_sub.copy()
@@ -81,7 +89,7 @@ def _clip_gdf_with_mask(gdf, mask):
    return clipped


-def clip(gdf, mask, keep_geom_type=False):
+def clip(gdf, mask, keep_geom_type=False, sort=False):
    """Clip points, lines, or polygon geometries to the mask extent.

    Both layers must be in the same Coordinate Reference System (CRS).
@@ -112,6 +120,9 @@ def clip(gdf, mask, keep_geom_type=False):
        If True, return only geometries of original type in case of intersection
        resulting in multiple geometry types or GeometryCollections.
        If False, return all resulting geometries (potentially mixed-types).
+    sort : boolean, default False
+        If True, the results will be sorted in ascending order using the
+        geometries' indexes as the primary key.

    Returns
    -------
@@ -185,11 +196,11 @@ def clip(gdf, mask, keep_geom_type=False):
        return gdf.iloc[:0]

    if isinstance(mask, (GeoDataFrame, GeoSeries)):
-        combined_mask = mask.geometry.unary_union
+        combined_mask = mask.geometry.union_all()
    else:
        combined_mask = mask

-    clipped = _clip_gdf_with_mask(gdf, combined_mask)
+    clipped = _clip_gdf_with_mask(gdf, combined_mask, sort=sort)

    if keep_geom_type:
        geomcoll_concat = (clipped.geom_type == "GeometryCollection").any()
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/geocoding.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/geocoding.py
@@ -1,5 +1,5 @@
-from collections import defaultdict
 import time
+from collections import defaultdict

 import pandas as pd

@@ -120,8 +120,8 @@ def reverse_geocode(points, provider=None, **kwargs):

 def _query(data, forward, provider, throttle_time, **kwargs):
    # generic wrapper for calls over lists to geopy Geocoders
-    from geopy.geocoders.base import GeocoderQueryError
    from geopy.geocoders import get_geocoder_for_service
+    from geopy.geocoders.base import GeocoderQueryError

    if forward:
        if not isinstance(data, pd.Series):
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/overlay.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/overlay.py
@@ -4,8 +4,8 @@ from functools import reduce
 import numpy as np
 import pandas as pd

-from geopandas import _compat as compat
 from geopandas import GeoDataFrame, GeoSeries
+from geopandas._compat import PANDAS_GE_30
 from geopandas.array import _check_crs, _crs_mismatch_warn


@@ -15,12 +15,15 @@ def _ensure_geometry_column(df):
    If another column with that name exists, it will be dropped.
    """
    if not df._geometry_column_name == "geometry":
-        if "geometry" in df.columns:
-            df.drop("geometry", axis=1, inplace=True)
-        df.rename(
-            columns={df._geometry_column_name: "geometry"}, copy=False, inplace=True
-        )
-        df.set_geometry("geometry", inplace=True)
+        if PANDAS_GE_30:
+            if "geometry" in df.columns:
+                df = df.drop("geometry", axis=1)
+            df = df.rename_geometry("geometry")
+        else:
+            if "geometry" in df.columns:
+                df.drop("geometry", axis=1, inplace=True)
+            df.rename_geometry("geometry", inplace=True)
+    return df


 def _overlay_intersection(df1, df2):
@@ -37,7 +40,7 @@ def _overlay_intersection(df1, df2):
        right.reset_index(drop=True, inplace=True)
        intersections = left.intersection(right)
        poly_ix = intersections.geom_type.isin(["Polygon", "MultiPolygon"])
-        intersections.loc[poly_ix] = intersections[poly_ix].buffer(0)
+        intersections.loc[poly_ix] = intersections[poly_ix].make_valid()

        # only keep actual intersecting geometries
        pairs_intersect = pd.DataFrame({"__idx1": idx1, "__idx2": idx2})
@@ -66,8 +69,8 @@ def _overlay_intersection(df1, df2):
            right_index=True,
            suffixes=("_1", "_2"),
        )
-        result["__idx1"] = None
-        result["__idx2"] = None
+        result["__idx1"] = np.nan
+        result["__idx2"] = np.nan
        return result[
            result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
        ]
@@ -94,10 +97,7 @@ def _overlay_difference(df1, df2):
        new_g.append(new)
    differences = GeoSeries(new_g, index=df1.index, crs=df1.crs)
    poly_ix = differences.geom_type.isin(["Polygon", "MultiPolygon"])
-    if compat.USE_PYGEOS:
-        differences.loc[poly_ix] = differences[poly_ix].make_valid()
-    else:
-        differences.loc[poly_ix] = differences[poly_ix].buffer(0)
+    differences.loc[poly_ix] = differences[poly_ix].make_valid()
    geom_diff = differences[~differences.is_empty].copy()
    dfdiff = df1[~differences.is_empty].copy()
    dfdiff[dfdiff._geometry_column_name] = geom_diff
@@ -115,8 +115,8 @@ def _overlay_symmetric_diff(df1, df2):
    dfdiff1["__idx2"] = np.nan
    dfdiff2["__idx1"] = np.nan
    # ensure geometry name (otherwise merge goes wrong)
-    _ensure_geometry_column(dfdiff1)
-    _ensure_geometry_column(dfdiff2)
+    dfdiff1 = _ensure_geometry_column(dfdiff1)
+    dfdiff2 = _ensure_geometry_column(dfdiff2)
    # combine both 'difference' dataframes
    dfsym = dfdiff1.merge(
        dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=("_1", "_2")
@@ -170,7 +170,7 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
        which will set keep_geom_type to True but warn upon dropping
        geometries.
    make_valid : bool, default True
-        If True, any invalid input geometries are corrected with a call to `buffer(0)`,
+        If True, any invalid input geometries are corrected with a call to make_valid(),
        if False, a `ValueError` is raised if any input geometries are invalid.

    Returns
@@ -190,40 +190,40 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
    >>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})

    >>> geopandas.overlay(df1, df2, how='union')
-       df1_data  df2_data                                           geometry
-    0       1.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
-    1       2.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
-    2       2.0       2.0  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
-    3       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
-    4       2.0       NaN  MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...
-    5       NaN       1.0  MULTIPOLYGON (((2.00000 3.00000, 2.00000 2.000...
-    6       NaN       2.0  POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....
+        df1_data  df2_data                                           geometry
+    0       1.0       1.0                POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1       2.0       1.0                POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2       2.0       2.0                POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+    3       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    4       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+    5       NaN       1.0  MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
+    6       NaN       2.0      POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))

    >>> geopandas.overlay(df1, df2, how='intersection')
-       df1_data  df2_data                                           geometry
-    0         1         1  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
-    1         2         1  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
-    2         2         2  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
+       df1_data  df2_data                             geometry
+    0         1         1  POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1         2         1  POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2         2         2  POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))

    >>> geopandas.overlay(df1, df2, how='symmetric_difference')
-       df1_data  df2_data                                           geometry
-    0       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
-    1       2.0       NaN  MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...
-    2       NaN       1.0  MULTIPOLYGON (((2.00000 3.00000, 2.00000 2.000...
-    3       NaN       2.0  POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....
+        df1_data  df2_data                                           geometry
+    0       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    1       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
+    2       NaN       1.0  MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
+    3       NaN       2.0      POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))

    >>> geopandas.overlay(df1, df2, how='difference')
-                                            geometry  df1_data
-    0  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....         1
-    1  MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...         2
+                                                geometry  df1_data
+    0      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))         1
+    1  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...         2

    >>> geopandas.overlay(df1, df2, how='identity')
       df1_data  df2_data                                           geometry
-    0       1.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
-    1       2.0       1.0  POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
-    2       2.0       2.0  POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
-    3       1.0       NaN  POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
-    4       2.0       NaN  MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...
+    0       1.0       1.0                POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
+    1       2.0       1.0                POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
+    2       2.0       2.0                POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
+    3       1.0       NaN      POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
+    4       2.0       NaN  MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...

    See also
    --------
@@ -300,7 +300,7 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
            mask = ~df.geometry.is_valid
            col = df._geometry_column_name
            if make_valid:
-                df.loc[mask, col] = df.loc[mask, col].buffer(0)
+                df.loc[mask, col] = df.loc[mask, col].make_valid()
            elif mask.any():
                raise ValueError(
                    "You have passed make_valid=False along with "
@@ -362,7 +362,7 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):

            # level_0 created with above reset_index operation
            # and represents the original geometry collections
-            # TODO avoiding dissolve to call unary_union in this case could further
+            # TODO avoiding dissolve to call union_all in this case could further
            # improve performance (we only need to collect geometries in their
            # respective Multi version)
            dissolved = exploded.dissolve(by="level_0")
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/sjoin.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/sjoin.py
@@ -1,11 +1,12 @@
-from typing import Optional
 import warnings
+from functools import partial
+from typing import Optional

 import numpy as np
 import pandas as pd

 from geopandas import GeoDataFrame
-from geopandas import _compat as compat
+from geopandas._compat import PANDAS_GE_30
 from geopandas.array import _check_crs, _crs_mismatch_warn


@@ -16,6 +17,8 @@ def sjoin(
    predicate="intersects",
    lsuffix="left",
    rsuffix="right",
+    distance=None,
+    on_attribute=None,
    **kwargs,
 ):
    """Spatial join of two GeoDataFrames.
@@ -43,6 +46,16 @@ def sjoin(
        Suffix to apply to overlapping column names (left GeoDataFrame).
    rsuffix : string, default 'right'
        Suffix to apply to overlapping column names (right GeoDataFrame).
+    distance : number or array_like, optional
+        Distance(s) around each input geometry within which to query the tree
+        for the 'dwithin' predicate. If array_like, must be
+        one-dimesional with length equal to length of left GeoDataFrame.
+        Required if ``predicate='dwithin'``.
+    on_attribute : string, list or tuple
+        Column name(s) to join on as an additional join restriction on top
+        of the spatial predicate. These must be found in both DataFrames.
+        If set, observations are joined only if the predicate applies
+        and values in specified columns match.

    Examples
    --------
@@ -74,12 +87,12 @@ def sjoin(

    >>> groceries_w_communities = geopandas.sjoin(groceries, chicago)
    >>> groceries_w_communities.head()  # doctest: +SKIP
-            OBJECTID     Ycoord     Xcoord  ... GonorrF GonorrM Tuberc
-    0          16  41.973266 -87.657073  ...   170.8   468.7   13.6
-    87        365  41.961707 -87.654058  ...   170.8   468.7   13.6
-    90        373  41.963131 -87.656352  ...   170.8   468.7   13.6
-    140       582  41.969131 -87.674882  ...   170.8   468.7   13.6
-    1          18  41.696367 -87.681315  ...   800.5   741.1    2.6
+       OBJECTID       community                           geometry
+    0        16          UPTOWN  MULTIPOINT ((-87.65661 41.97321))
+    1        18     MORGAN PARK  MULTIPOINT ((-87.68136 41.69713))
+    2        22  NEAR WEST SIDE  MULTIPOINT ((-87.63918 41.86847))
+    3        23  NEAR WEST SIDE  MULTIPOINT ((-87.65495 41.87783))
+    4        27         CHATHAM  MULTIPOINT ((-87.62715 41.73623))
    [5 rows x 95 columns]

    See also
@@ -92,40 +105,42 @@ def sjoin(
    Every operation in GeoPandas is planar, i.e. the potential third
    dimension is not taken into account.
    """
-    if "op" in kwargs:
-        op = kwargs.pop("op")
-        deprecation_message = (
-            "The `op` parameter is deprecated and will be removed"
-            " in a future release. Please use the `predicate` parameter"
-            " instead."
-        )
-        if predicate != "intersects" and op != predicate:
-            override_message = (
-                "A non-default value for `predicate` was passed"
-                f' (got `predicate="{predicate}"`'
-                f' in combination with `op="{op}"`).'
-                " The value of `predicate` will be overridden by the value of `op`,"
-                " , which may result in unexpected behavior."
-                f"\n{deprecation_message}"
-            )
-            warnings.warn(override_message, UserWarning, stacklevel=4)
-        else:
-            warnings.warn(deprecation_message, FutureWarning, stacklevel=4)
-        predicate = op
    if kwargs:
        first = next(iter(kwargs.keys()))
        raise TypeError(f"sjoin() got an unexpected keyword argument '{first}'")

-    _basic_checks(left_df, right_df, how, lsuffix, rsuffix)
+    on_attribute = _maybe_make_list(on_attribute)

-    indices = _geom_predicate_query(left_df, right_df, predicate)
+    _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=on_attribute),

-    joined = _frame_join(indices, left_df, right_df, how, lsuffix, rsuffix)
+    indices = _geom_predicate_query(
+        left_df, right_df, predicate, distance, on_attribute=on_attribute
+    )
+
+    joined, _ = _frame_join(
+        left_df,
+        right_df,
+        indices,
+        None,
+        how,
+        lsuffix,
+        rsuffix,
+        predicate,
+        on_attribute=on_attribute,
+    )

    return joined


-def _basic_checks(left_df, right_df, how, lsuffix, rsuffix):
+def _maybe_make_list(obj):
+    if isinstance(obj, tuple):
+        return list(obj)
+    if obj is not None and not isinstance(obj, list):
+        return [obj]
+    return obj
+
+
+def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
    """Checks the validity of join input parameters.

    `how` must be one of the valid options.
@@ -142,6 +157,8 @@ def _basic_checks(left_df, right_df, how, lsuffix, rsuffix):
        left index suffix
    rsuffix : str
        right index suffix
+    on_attribute : list, default None
+        list of column names to merge on along with geometry
    """
    if not isinstance(left_df, GeoDataFrame):
        raise ValueError(
@@ -162,20 +179,28 @@ def _basic_checks(left_df, right_df, how, lsuffix, rsuffix):
    if not _check_crs(left_df, right_df):
        _crs_mismatch_warn(left_df, right_df, stacklevel=4)

-    index_left = "index_{}".format(lsuffix)
-    index_right = "index_{}".format(rsuffix)
-
-    # due to GH 352
-    if any(left_df.columns.isin([index_left, index_right])) or any(
-        right_df.columns.isin([index_left, index_right])
-    ):
-        raise ValueError(
-            "'{0}' and '{1}' cannot be names in the frames being"
-            " joined".format(index_left, index_right)
-        )
+    if on_attribute:
+        for attr in on_attribute:
+            if (attr not in left_df) and (attr not in right_df):
+                raise ValueError(
+                    f"Expected column {attr} is missing from both of the dataframes."
+                )
+            if attr not in left_df:
+                raise ValueError(
+                    f"Expected column {attr} is missing from the left dataframe."
+                )
+            if attr not in right_df:
+                raise ValueError(
+                    f"Expected column {attr} is missing from the right dataframe."
+                )
+            if attr in (left_df.geometry.name, right_df.geometry.name):
+                raise ValueError(
+                    "Active geometry column cannot be used as an input "
+                    "for on_attribute parameter."
+                )


-def _geom_predicate_query(left_df, right_df, predicate):
+def _geom_predicate_query(left_df, right_df, predicate, distance, on_attribute=None):
    """Compute geometric comparisons and get matching indices.

    Parameters
@@ -184,6 +209,9 @@ def _geom_predicate_query(left_df, right_df, predicate):
    right_df : GeoDataFrame
    predicate : string
        Binary predicate to query.
+    on_attribute: list, default None
+        list of column names to merge on along with geometry
+

    Returns
    -------
@@ -191,163 +219,292 @@ def _geom_predicate_query(left_df, right_df, predicate):
        DataFrame with matching indices in
        columns named `_key_left` and `_key_right`.
    """
-    with warnings.catch_warnings():
-        # We don't need to show our own warning here
-        # TODO remove this once the deprecation has been enforced
-        warnings.filterwarnings(
-            "ignore", "Generated spatial index is empty", FutureWarning
-        )

-        original_predicate = predicate
+    original_predicate = predicate

-        if predicate == "within":
-            # within is implemented as the inverse of contains
-            # contains is a faster predicate
-            # see discussion at https://github.com/geopandas/geopandas/pull/1421
-            predicate = "contains"
-            sindex = left_df.sindex
-            input_geoms = right_df.geometry
-        else:
-            # all other predicates are symmetric
-            # keep them the same
-            sindex = right_df.sindex
-            input_geoms = left_df.geometry
+    if predicate == "within":
+        # within is implemented as the inverse of contains
+        # contains is a faster predicate
+        # see discussion at https://github.com/geopandas/geopandas/pull/1421
+        predicate = "contains"
+        sindex = left_df.sindex
+        input_geoms = right_df.geometry
+    else:
+        # all other predicates are symmetric
+        # keep them the same
+        sindex = right_df.sindex
+        input_geoms = left_df.geometry

    if sindex:
-        l_idx, r_idx = sindex.query(input_geoms, predicate=predicate, sort=False)
-        indices = pd.DataFrame({"_key_left": l_idx, "_key_right": r_idx})
+        l_idx, r_idx = sindex.query(
+            input_geoms, predicate=predicate, sort=False, distance=distance
+        )
    else:
        # when sindex is empty / has no valid geometries
-        indices = pd.DataFrame(columns=["_key_left", "_key_right"], dtype=float)
+        l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)

    if original_predicate == "within":
        # within is implemented as the inverse of contains
        # flip back the results
-        indices = indices.rename(
-            columns={"_key_left": "_key_right", "_key_right": "_key_left"}
+        r_idx, l_idx = l_idx, r_idx
+        indexer = np.lexsort((r_idx, l_idx))
+        l_idx = l_idx[indexer]
+        r_idx = r_idx[indexer]
+
+    if on_attribute:
+        for attr in on_attribute:
+            (l_idx, r_idx), _ = _filter_shared_attribute(
+                left_df, right_df, l_idx, r_idx, attr
+            )
+
+    return l_idx, r_idx
+
+
+def _reset_index_with_suffix(df, suffix, other):
+    """
+    Equivalent of df.reset_index(), but with adding 'suffix' to auto-generated
+    column names.
+    """
+    index_original = df.index.names
+    if PANDAS_GE_30:
+        df_reset = df.reset_index()
+    else:
+        # we already made a copy of the dataframe in _frame_join before getting here
+        df_reset = df
+        df_reset.reset_index(inplace=True)
+    column_names = df_reset.columns.to_numpy(copy=True)
+    for i, label in enumerate(index_original):
+        # if the original label was None, add suffix to auto-generated name
+        if label is None:
+            new_label = column_names[i]
+            if "level" in new_label:
+                # reset_index of MultiIndex gives "level_i" names, preserve the "i"
+                lev = new_label.split("_")[1]
+                new_label = f"index_{suffix}{lev}"
+            else:
+                new_label = f"index_{suffix}"
+            # check new label will not be in other dataframe
+            if new_label in df.columns or new_label in other.columns:
+                raise ValueError(
+                    "'{0}' cannot be a column name in the frames being"
+                    " joined".format(new_label)
+                )
+            column_names[i] = new_label
+    return df_reset, pd.Index(column_names)
+
+
+def _process_column_names_with_suffix(
+    left: pd.Index, right: pd.Index, suffixes, left_df, right_df
+):
+    """
+    Add suffixes to overlapping labels (ignoring the geometry column).
+
+    This is based on pandas' merge logic at https://github.com/pandas-dev/pandas/blob/
+    a0779adb183345a8eb4be58b3ad00c223da58768/pandas/core/reshape/merge.py#L2300-L2370
+    """
+    to_rename = left.intersection(right)
+    if len(to_rename) == 0:
+        return left, right
+
+    lsuffix, rsuffix = suffixes
+
+    if not lsuffix and not rsuffix:
+        raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
+
+    def renamer(x, suffix, geometry):
+        if x in to_rename and x != geometry and suffix is not None:
+            return f"{x}_{suffix}"
+        return x
+
+    lrenamer = partial(
+        renamer,
+        suffix=lsuffix,
+        geometry=getattr(left_df, "_geometry_column_name", None),
+    )
+    rrenamer = partial(
+        renamer,
+        suffix=rsuffix,
+        geometry=getattr(right_df, "_geometry_column_name", None),
+    )
+
+    # TODO retain index name?
+    left_renamed = pd.Index([lrenamer(lab) for lab in left])
+    right_renamed = pd.Index([rrenamer(lab) for lab in right])
+
+    dups = []
+    if not left_renamed.is_unique:
+        # Only warn when duplicates are caused because of suffixes, already duplicated
+        # columns in origin should not warn
+        dups = left_renamed[(left_renamed.duplicated()) & (~left.duplicated())].tolist()
+    if not right_renamed.is_unique:
+        dups.extend(
+            right_renamed[(right_renamed.duplicated()) & (~right.duplicated())].tolist()
+        )
+    # TODO turn this into an error (pandas has done so as well)
+    if dups:
+        warnings.warn(
+            f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the "
+            f"result is deprecated and will raise a MergeError in a future version.",
+            FutureWarning,
+            stacklevel=4,
        )

-    return indices
+    return left_renamed, right_renamed


-def _frame_join(join_df, left_df, right_df, how, lsuffix, rsuffix):
+def _restore_index(joined, index_names, index_names_original):
+    """
+    Set back the the original index columns, and restoring their name as `None`
+    if they didn't have a name originally.
+    """
+    if PANDAS_GE_30:
+        joined = joined.set_index(list(index_names))
+    else:
+        joined.set_index(list(index_names), inplace=True)
+
+    # restore the fact that the index didn't have a name
+    joined_index_names = list(joined.index.names)
+    for i, label in enumerate(index_names_original):
+        if label is None:
+            joined_index_names[i] = None
+    joined.index.names = joined_index_names
+    return joined
+
+
+def _adjust_indexers(indices, distances, original_length, how, predicate):
+    """
+    The left/right indexers from the query represents an inner join.
+    For a left or right join, we need to adjust them to include the rows
+    that would not be present in an inner join.
+    """
+    # the indices represent an inner join, no adjustment needed
+    if how == "inner":
+        return indices, distances
+
+    l_idx, r_idx = indices
+
+    if how == "right":
+        # re-sort so it is sorted by the right indexer
+        indexer = np.lexsort((l_idx, r_idx))
+        l_idx, r_idx = l_idx[indexer], r_idx[indexer]
+        if distances is not None:
+            distances = distances[indexer]
+
+        # switch order
+        r_idx, l_idx = l_idx, r_idx
+
+    # determine which indices are missing and where they would need to be inserted
+    idx = np.arange(original_length)
+    l_idx_missing = idx[~np.isin(idx, l_idx)]
+    insert_idx = np.searchsorted(l_idx, l_idx_missing)
+    # for the left indexer, insert those missing indices
+    l_idx = np.insert(l_idx, insert_idx, l_idx_missing)
+    # for the right indexer, insert -1 -> to get missing values in pandas' reindexing
+    r_idx = np.insert(r_idx, insert_idx, -1)
+    # for the indices, already insert those missing values manually
+    if distances is not None:
+        distances = np.insert(distances, insert_idx, np.nan)
+
+    if how == "right":
+        # switch back
+        l_idx, r_idx = r_idx, l_idx
+
+    return (l_idx, r_idx), distances
+
+
+def _frame_join(
+    left_df,
+    right_df,
+    indices,
+    distances,
+    how,
+    lsuffix,
+    rsuffix,
+    predicate,
+    on_attribute=None,
+):
    """Join the GeoDataFrames at the DataFrame level.

    Parameters
    ----------
-    join_df : DataFrame
-        Indices and join data returned by the geometric join.
-        Must have columns `_key_left` and `_key_right`
-        with integer indices representing the matches
-        from `left_df` and `right_df` respectively.
-        Additional columns may be included and will be copied to
-        the resultant GeoDataFrame.
    left_df : GeoDataFrame
    right_df : GeoDataFrame
+    indices : tuple of ndarray
+        Indices returned by the geometric join. Tuple with with integer
+        indices representing the matches from `left_df` and `right_df`
+        respectively.
+    distances : ndarray, optional
+        Passed trough and adapted based on the indices, if needed.
+    how : string
+        The type of join to use on the DataFrame level.
    lsuffix : string
        Suffix to apply to overlapping column names (left GeoDataFrame).
    rsuffix : string
        Suffix to apply to overlapping column names (right GeoDataFrame).
-    how : string
-        The type of join to use on the DataFrame level.
+    on_attribute: list, default None
+        list of column names to merge on along with geometry
+

    Returns
    -------
    GeoDataFrame
        Joined GeoDataFrame.
    """
-    # the spatial index only allows limited (numeric) index types, but an
-    # index in geopandas may be any arbitrary dtype. so reset both indices now
-    # and store references to the original indices, to be reaffixed later.
-    # GH 352
-    index_left = "index_{}".format(lsuffix)
-    left_df = left_df.copy(deep=True)
-    try:
-        left_index_name = left_df.index.name
-        left_df.index = left_df.index.rename(index_left)
-    except TypeError:
-        index_left = [
-            "index_{}".format(lsuffix + str(pos))
-            for pos, ix in enumerate(left_df.index.names)
-        ]
-        left_index_name = left_df.index.names
-        left_df.index = left_df.index.rename(index_left)
-    left_df = left_df.reset_index()
+    if on_attribute:  # avoid renaming or duplicating shared column
+        right_df = right_df.drop(on_attribute, axis=1)

-    index_right = "index_{}".format(rsuffix)
-    right_df = right_df.copy(deep=True)
-    try:
-        right_index_name = right_df.index.name
-        right_df.index = right_df.index.rename(index_right)
-    except TypeError:
-        index_right = [
-            "index_{}".format(rsuffix + str(pos))
-            for pos, ix in enumerate(right_df.index.names)
-        ]
-        right_index_name = right_df.index.names
-        right_df.index = right_df.index.rename(index_right)
-    right_df = right_df.reset_index()
+    if how in ("inner", "left"):
+        right_df = right_df.drop(right_df.geometry.name, axis=1)
+    else:  # how == 'right':
+        left_df = left_df.drop(left_df.geometry.name, axis=1)
+
+    left_df = left_df.copy(deep=False)
+    left_nlevels = left_df.index.nlevels
+    left_index_original = left_df.index.names
+    left_df, left_column_names = _reset_index_with_suffix(left_df, lsuffix, right_df)
+
+    right_df = right_df.copy(deep=False)
+    right_nlevels = right_df.index.nlevels
+    right_index_original = right_df.index.names
+    right_df, right_column_names = _reset_index_with_suffix(right_df, rsuffix, left_df)
+
+    # if conflicting names in left and right, add suffix
+    left_column_names, right_column_names = _process_column_names_with_suffix(
+        left_column_names,
+        right_column_names,
+        (lsuffix, rsuffix),
+        left_df,
+        right_df,
+    )
+    left_df.columns = left_column_names
+    right_df.columns = right_column_names
+    left_index = left_df.columns[:left_nlevels]
+    right_index = right_df.columns[:right_nlevels]

    # perform join on the dataframes
-    if how == "inner":
-        join_df = join_df.set_index("_key_left")
-        joined = (
-            left_df.merge(join_df, left_index=True, right_index=True)
-            .merge(
-                right_df.drop(right_df.geometry.name, axis=1),
-                left_on="_key_right",
-                right_index=True,
-                suffixes=("_{}".format(lsuffix), "_{}".format(rsuffix)),
-            )
-            .set_index(index_left)
-            .drop(["_key_right"], axis=1)
-        )
-        if isinstance(index_left, list):
-            joined.index.names = left_index_name
-        else:
-            joined.index.name = left_index_name
-
-    elif how == "left":
-        join_df = join_df.set_index("_key_left")
-        joined = (
-            left_df.merge(join_df, left_index=True, right_index=True, how="left")
-            .merge(
-                right_df.drop(right_df.geometry.name, axis=1),
-                how="left",
-                left_on="_key_right",
-                right_index=True,
-                suffixes=("_{}".format(lsuffix), "_{}".format(rsuffix)),
-            )
-            .set_index(index_left)
-            .drop(["_key_right"], axis=1)
-        )
-        if isinstance(index_left, list):
-            joined.index.names = left_index_name
-        else:
-            joined.index.name = left_index_name
+    original_length = len(right_df) if how == "right" else len(left_df)
+    (l_idx, r_idx), distances = _adjust_indexers(
+        indices, distances, original_length, how, predicate
+    )
+    # the `take` method doesn't allow introducing NaNs with -1 indices
+    # left = left_df.take(l_idx)
+    # therefore we are using the private _reindex_with_indexers as workaround
+    new_index = pd.RangeIndex(len(l_idx))
+    left = left_df._reindex_with_indexers({0: (new_index, l_idx)})
+    right = right_df._reindex_with_indexers({0: (new_index, r_idx)})
+    if PANDAS_GE_30:
+        kwargs = {}
+    else:
+        kwargs = dict(copy=False)
+    joined = pd.concat([left, right], axis=1, **kwargs)

+    if how in ("inner", "left"):
+        joined = _restore_index(joined, left_index, left_index_original)
    else:  # how == 'right':
-        joined = (
-            left_df.drop(left_df.geometry.name, axis=1)
-            .merge(
-                join_df.merge(
-                    right_df, left_on="_key_right", right_index=True, how="right"
-                ),
-                left_index=True,
-                right_on="_key_left",
-                how="right",
-                suffixes=("_{}".format(lsuffix), "_{}".format(rsuffix)),
-            )
-            .set_index(index_right)
-            .drop(["_key_left", "_key_right"], axis=1)
-            .set_geometry(right_df.geometry.name)
-        )
-        if isinstance(index_right, list):
-            joined.index.names = right_index_name
-        else:
-            joined.index.name = right_index_name
+        joined = joined.set_geometry(right_df.geometry.name)
+        joined = _restore_index(joined, right_index, right_index_original)

-    return joined
+    return joined, distances


 def _nearest_query(
@@ -357,13 +514,8 @@ def _nearest_query(
    how: str,
    return_distance: bool,
    exclusive: bool,
+    on_attribute: Optional[list] = None,
 ):
-    if not (compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_010)):
-        raise NotImplementedError(
-            "Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports "
-            "`nearest_all`. " + compat.INSTALL_PYGEOS_ERROR
-        )
-
    # use the opposite of the join direction for the index
    use_left_as_sindex = how == "right"
    if use_left_as_sindex:
@@ -393,15 +545,37 @@ def _nearest_query(
                distances = distances[sort_order]
        else:
            l_idx, r_idx = input_idx, tree_idx
-        join_df = pd.DataFrame(
-            {"_key_left": l_idx, "_key_right": r_idx, "distances": distances}
-        )
    else:
        # when sindex is empty / has no valid geometries
-        join_df = pd.DataFrame(
-            columns=["_key_left", "_key_right", "distances"], dtype=float
-        )
-    return join_df
+        l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
+        if return_distance:
+            distances = np.array([], dtype=np.float64)
+        else:
+            distances = None
+
+    if on_attribute:
+        for attr in on_attribute:
+            (l_idx, r_idx), shared_attribute_rows = _filter_shared_attribute(
+                left_df, right_df, l_idx, r_idx, attr
+            )
+            distances = distances[shared_attribute_rows]
+
+    return (l_idx, r_idx), distances
+
+
+def _filter_shared_attribute(left_df, right_df, l_idx, r_idx, attribute):
+    """
+    Returns the indices for the left and right dataframe that share the same entry
+    in the attribute column. Also returns a Boolean `shared_attribute_rows` for rows
+    with the same entry.
+    """
+    shared_attribute_rows = (
+        left_df[attribute].iloc[l_idx].values == right_df[attribute].iloc[r_idx].values
+    )
+
+    l_idx = l_idx[shared_attribute_rows]
+    r_idx = r_idx[shared_attribute_rows]
+    return (l_idx, r_idx), shared_attribute_rows


 def sjoin_nearest(
@@ -453,7 +627,6 @@ def sjoin_nearest(
    exclusive : bool, default False
        If True, the nearest geometries that are equal to the input geometry
        will not be returned, default False.
-        Requires Shapely >= 2.0.

    Examples
    --------
@@ -466,7 +639,7 @@ def sjoin_nearest(
    ... ).to_crs(groceries.crs)

    >>> chicago.head()  # doctest: +SKIP
-        ComAreaID  ...                                           geometry
+       ComAreaID  ...                                           geometry
    0         35  ...  POLYGON ((-87.60914 41.84469, -87.60915 41.844...
    1         36  ...  POLYGON ((-87.59215 41.81693, -87.59231 41.816...
    2         37  ...  POLYGON ((-87.62880 41.80189, -87.62879 41.801...
@@ -475,19 +648,19 @@ def sjoin_nearest(
    [5 rows x 87 columns]

    >>> groceries.head()  # doctest: +SKIP
-        OBJECTID     Ycoord  ...  Category                         geometry
-    0        16  41.973266  ...       NaN  MULTIPOINT (-87.65661 41.97321)
-    1        18  41.696367  ...       NaN  MULTIPOINT (-87.68136 41.69713)
-    2        22  41.868634  ...       NaN  MULTIPOINT (-87.63918 41.86847)
-    3        23  41.877590  ...       new  MULTIPOINT (-87.65495 41.87783)
-    4        27  41.737696  ...       NaN  MULTIPOINT (-87.62715 41.73623)
+       OBJECTID     Ycoord  ...  Category                           geometry
+    0        16  41.973266  ...       NaN  MULTIPOINT ((-87.65661 41.97321))
+    1        18  41.696367  ...       NaN  MULTIPOINT ((-87.68136 41.69713))
+    2        22  41.868634  ...       NaN  MULTIPOINT ((-87.63918 41.86847))
+    3        23  41.877590  ...       new  MULTIPOINT ((-87.65495 41.87783))
+    4        27  41.737696  ...       NaN  MULTIPOINT ((-87.62715 41.73623))
    [5 rows x 8 columns]

    >>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago)
    >>> groceries_w_communities[["Chain", "community", "geometry"]].head(2)
-                    Chain community                              geometry
-    0   VIET HOA PLAZA    UPTOWN  MULTIPOINT (1168268.672 1933554.350)
-    87      JEWEL OSCO    UPTOWN  MULTIPOINT (1168837.980 1929246.962)
+                   Chain    community                                geometry
+    0     VIET HOA PLAZA       UPTOWN   MULTIPOINT ((1168268.672 1933554.35))
+    1  COUNTY FAIR FOODS  MORGAN PARK  MULTIPOINT ((1162302.618 1832900.224))


    To include the distances:
@@ -495,10 +668,10 @@ def sjoin_nearest(
    >>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago, \
 distance_col="distances")
    >>> groceries_w_communities[["Chain", "community", \
-"distances"]].head(2)  # doctest: +SKIP
-                    Chain community  distances
-    0   VIET HOA PLAZA    UPTOWN        0.0
-    87      JEWEL OSCO    UPTOWN        0.0
+"distances"]].head(2)
+                   Chain    community  distances
+    0     VIET HOA PLAZA       UPTOWN        0.0
+    1  COUNTY FAIR FOODS  MORGAN PARK        0.0

    In the following example, we get multiple groceries for Uptown because all
    results are equidistant (in this case zero because they intersect).
@@ -508,7 +681,7 @@ distance_col="distances")
 distance_col="distances", how="right")
    >>> uptown_results = \
 chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
-    >>> uptown_results[["Chain", "community"]]  # doctest: +SKIP
+    >>> uptown_results[["Chain", "community"]]
                Chain community
    30  VIET HOA PLAZA    UPTOWN
    30      JEWEL OSCO    UPTOWN
@@ -528,6 +701,7 @@ chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
    Every operation in GeoPandas is planar, i.e. the potential third
    dimension is not taken into account.
    """
+
    _basic_checks(left_df, right_df, how, lsuffix, rsuffix)

    left_df.geometry.values.check_geographic_crs(stacklevel=1)
@@ -535,19 +709,26 @@ chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]

    return_distance = distance_col is not None

-    join_df = _nearest_query(
-        left_df, right_df, max_distance, how, return_distance, exclusive
+    indices, distances = _nearest_query(
+        left_df,
+        right_df,
+        max_distance,
+        how,
+        return_distance,
+        exclusive,
+    )
+    joined, distances = _frame_join(
+        left_df,
+        right_df,
+        indices,
+        distances,
+        how,
+        lsuffix,
+        rsuffix,
+        None,
    )

    if return_distance:
-        join_df = join_df.rename(columns={"distances": distance_col})
-    else:
-        join_df.pop("distances")
-
-    joined = _frame_join(join_df, left_df, right_df, how, lsuffix, rsuffix)
-
-    if return_distance:
-        columns = [c for c in joined.columns if c != distance_col] + [distance_col]
-        joined = joined[columns]
+        joined[distance_col] = distances

    return joined
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_clip.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_clip.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_hilbert_curve.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_hilbert_curve.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_random.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_random.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_sjoin.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_sjoin.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_tools.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/pycache/test_tools.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_clip.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_clip.py
@@ -1,28 +1,28 @@
 """Tests for the clip module."""

-
 import numpy as np
+import pandas as pd

 import shapely
 from shapely.geometry import (
-    Polygon,
-    Point,
-    LineString,
-    LinearRing,
    GeometryCollection,
+    LinearRing,
+    LineString,
    MultiPoint,
+    Point,
+    Polygon,
    box,
 )

 import geopandas
 from geopandas import GeoDataFrame, GeoSeries, clip
-
-from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
-import pytest
-
+from geopandas._compat import HAS_PYPROJ
 from geopandas.tools.clip import _mask_is_list_like_rectangle

-pytestmark = pytest.mark.skip_no_sindex
+import pytest
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_index_equal
+
 mask_variants_single_rectangle = [
    "single_rectangle_gdf",
    "single_rectangle_gdf_list_bounds",
@@ -43,6 +43,14 @@ def point_gdf():
    return gdf


+@pytest.fixture
+def point_gdf2():
+    """Create a point GeoDataFrame."""
+    pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
+    gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
+    return gdf
+
+
@pytest.fixture
 def pointsoutside_nooverlap_gdf():
    """Create a point GeoDataFrame. Its points are all outside the single
@@ -137,7 +145,7 @@ def two_line_gdf():
@pytest.fixture
 def multi_poly_gdf(donut_geometry):
    """Create a multi-polygon GeoDataFrame."""
-    multi_poly = donut_geometry.unary_union
+    multi_poly = donut_geometry.union_all()
    out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
    out_df["attr"] = ["pool"]
    return out_df
@@ -148,7 +156,7 @@ def multi_line(two_line_gdf):
    """Create a multi-line GeoDataFrame.
    This GDF has one multiline and one regular line."""
    # Create a single and multi line object
-    multiline_feat = two_line_gdf.unary_union
+    multiline_feat = two_line_gdf.union_all()
    linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
    out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
    out_df["attr"] = ["road", "stream"]
@@ -158,7 +166,7 @@ def multi_line(two_line_gdf):
@pytest.fixture
 def multi_point(point_gdf):
    """Create a multi-point GeoDataFrame."""
-    multi_point = point_gdf.unary_union
+    multi_point = point_gdf.union_all()
    out_df = GeoDataFrame(
        geometry=GeoSeries(
            [multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
@@ -321,7 +329,7 @@ class TestClipWithSingleRectangleGdf:
        )
        assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
        shape_for_points = (
-            box(*mask) if _mask_is_list_like_rectangle(mask) else mask.unary_union
+            box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
        )
        assert all(clipped.intersects(shape_for_points))

@@ -398,6 +406,7 @@ def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
    assert "GeometryCollection" in clipped.geom_type[0]


+@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
 def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
    with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
        clip(point_gdf, single_rectangle_gdf.to_crs(4326))
@@ -460,3 +469,16 @@ def test_clip_empty_mask(buffered_locations, mask):
    )
    clipped = clip(buffered_locations.geometry, mask)
    assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
+
+
+def test_clip_sorting(point_gdf2):
+    """Test the sorting kwarg in clip"""
+    bbox = shapely.geometry.box(0, 0, 2, 2)
+    unsorted_clipped_gdf = point_gdf2.clip(bbox)
+    sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
+
+    expected_sorted_index = pd.Index([1, 3, 5])
+
+    assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
+    assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
+    assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_hilbert_curve.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_hilbert_curve.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from shapely.geometry import Point
 from shapely.wkt import loads

--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_random.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_random.py
@@ -1,28 +1,46 @@
-import pytest
 import numpy
-import geopandas
-import geopandas._compat as compat

+import geopandas
 from geopandas.tools._random import uniform

-multipolygons = geopandas.read_file(geopandas.datasets.get_path("nybb")).geometry
-polygons = multipolygons.explode(ignore_index=True).geometry
-multilinestrings = multipolygons.boundary
-linestrings = polygons.boundary
-points = multipolygons.centroid
+import pytest
+
+
+@pytest.fixture
+def multipolygons(nybb_filename):
+    return geopandas.read_file(nybb_filename).geometry
+
+
+@pytest.fixture
+def polygons(multipolygons):
+    return multipolygons.explode(ignore_index=True).geometry
+
+
+@pytest.fixture
+def multilinestrings(multipolygons):
+    return multipolygons.boundary
+
+
+@pytest.fixture
+def linestrings(polygons):
+    return polygons.boundary
+
+
+@pytest.fixture
+def points(multipolygons):
+    return multipolygons.centroid


-@pytest.mark.skipif(
-    not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
-    reason="array input in interpolate not implemented for shapely<2",
-)
@pytest.mark.parametrize("size", [10, 100])
@pytest.mark.parametrize(
-    "geom", [multipolygons[0], polygons[0], multilinestrings[0], linestrings[0]]
+    "geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
 )
-def test_uniform(geom, size):
+def test_uniform(geom_fixture, size, request):
+    geom = request.getfixturevalue(geom_fixture)[0]
    sample = uniform(geom, size=size, rng=1)
-    sample_series = geopandas.GeoSeries(sample).explode().reset_index(drop=True)
+    sample_series = (
+        geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
+    )
    assert len(sample_series) == size
    sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
        geom, predicate="intersects"
@@ -30,21 +48,13 @@ def test_uniform(geom, size):
    assert len(sample_in_geom) == size


-@pytest.mark.skipif(
-    not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
-    reason="array input in interpolate not implemented for shapely<2",
-)
-def test_uniform_unsupported():
+def test_uniform_unsupported(points):
    with pytest.warns(UserWarning, match="Sampling is not supported"):
        sample = uniform(points[0], size=10, rng=1)
    assert sample.is_empty


-@pytest.mark.skipif(
-    not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
-    reason="array input in interpolate not implemented for shapely<2",
-)
-def test_uniform_generator():
+def test_uniform_generator(polygons):
    sample = uniform(polygons[0], size=10, rng=1)
    sample2 = uniform(polygons[0], size=10, rng=1)
    assert sample.equals(sample2)
--- a/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_sjoin.py
+++ b/.venv/lib/python3.12/site-packages/geopandas/tools/tests/test_sjoin.py
@@ -3,23 +3,24 @@ from typing import Sequence

 import numpy as np
 import pandas as pd
-import shapely

-from shapely.geometry import Point, Polygon, GeometryCollection
+import shapely
+from shapely.geometry import GeometryCollection, Point, Polygon, box

 import geopandas
 import geopandas._compat as compat
-from geopandas import GeoDataFrame, GeoSeries, read_file, sjoin, sjoin_nearest
-from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from geopandas import (
+    GeoDataFrame,
+    GeoSeries,
+    points_from_xy,
+    read_file,
+    sjoin,
+    sjoin_nearest,
+)

-from pandas.testing import assert_frame_equal, assert_series_equal
 import pytest
-
-
-TEST_NEAREST = compat.USE_SHAPELY_20 or (compat.PYGEOS_GE_010 and compat.USE_PYGEOS)
-
-
-pytestmark = pytest.mark.skip_no_sindex
+from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
+from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal


@pytest.fixture()
@@ -95,6 +96,52 @@ def dfs(request):
    return [request.param, df1, df2, expected]


+@pytest.fixture()
+def dfs_shared_attribute():
+    geo_left = [
+        Point(0, 0),
+        Point(1, 1),
+        Point(2, 2),
+        Point(3, 3),
+        Point(4, 4),
+        Point(5, 5),
+        Point(6, 6),
+        Point(7, 7),
+    ]
+    geo_right = [
+        Point(0, 0),
+        Point(1, 1),
+        Point(2, 2),
+        Point(3, 3),
+        Point(4, 4),
+        Point(5, 5),
+        Point(6, 6),
+        Point(7, 7),
+    ]
+    attr_tracker = ["A", "B", "C", "D", "E", "F", "G", "H"]
+
+    left_gdf = geopandas.GeoDataFrame(
+        {
+            "geometry": geo_left,
+            "attr_tracker": attr_tracker,
+            "duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
+            "attr1": [True, True, True, True, True, True, True, True],
+            "attr2": [True, True, True, True, True, True, True, True],
+        }
+    )
+
+    right_gdf = geopandas.GeoDataFrame(
+        {
+            "geometry": geo_right,
+            "duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
+            "attr1": [True, True, False, False, True, True, False, False],
+            "attr2": [True, True, False, False, False, False, False, False],
+        }
+    )
+
+    return left_gdf, right_gdf
+
+
 class TestSpatialJoin:
    @pytest.mark.parametrize(
        "how, lsuffix, rsuffix, expected_cols",
@@ -113,6 +160,7 @@ class TestSpatialJoin:
        joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
        assert set(joined.columns) == expected_cols | {"geometry"}

+    @pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
    @pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True)
    def test_crs_mismatch(self, dfs):
        index, df1, df2, expected = dfs
@@ -120,31 +168,6 @@ class TestSpatialJoin:
        with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
            sjoin(df1, df2)

-    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
-    @pytest.mark.parametrize("op", ["intersects", "contains", "within"])
-    def test_deprecated_op_param(self, dfs, op):
-        _, df1, df2, _ = dfs
-        with pytest.warns(FutureWarning, match="`op` parameter is deprecated"):
-            sjoin(df1, df2, op=op)
-
-    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
-    @pytest.mark.parametrize("op", ["intersects", "contains", "within"])
-    @pytest.mark.parametrize("predicate", ["contains", "within"])
-    def test_deprecated_op_param_nondefault_predicate(self, dfs, op, predicate):
-        _, df1, df2, _ = dfs
-        match = "use the `predicate` parameter instead"
-        if op != predicate:
-            warntype = UserWarning
-            match = (
-                "`predicate` will be overridden by the value of `op`"  # noqa: ISC003
-                + r"(.|\s)*"
-                + match
-            )
-        else:
-            warntype = FutureWarning
-        with pytest.warns(warntype, match=match):
-            sjoin(df1, df2, predicate=predicate, op=op)
-
    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
    def test_unknown_kwargs(self, dfs):
        _, df1, df2, _ = dfs
@@ -154,7 +177,6 @@ class TestSpatialJoin:
        ):
            sjoin(df1, df2, extra_param="test")

-    @pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning")
    @pytest.mark.parametrize(
        "dfs",
        [
@@ -167,12 +189,10 @@ class TestSpatialJoin:
        indirect=True,
    )
    @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
-    @pytest.mark.parametrize("predicate_kw", ["predicate", "op"])
-    def test_inner(self, predicate, predicate_kw, dfs):
+    def test_inner(self, predicate, dfs):
        index, df1, df2, expected = dfs

-        res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate})
-
+        res = sjoin(df1, df2, how="inner", predicate=predicate)
        exp = expected[predicate].dropna().copy()
        exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"})
        exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64")
@@ -182,7 +202,7 @@ class TestSpatialJoin:
            ].astype("int64")
        if index == "named-index":
            exp[["df1_ix", "df2_ix"]] = exp[["df1_ix", "df2_ix"]].astype("int64")
-            exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
+            exp = exp.set_index("df1_ix")
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_left")
            exp.index.name = None
@@ -192,11 +212,7 @@ class TestSpatialJoin:
            )
            exp.index.names = df1.index.names
        if index == "named-multi-index":
-            exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
-                columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
-            )
-            exp.index.names = df1.index.names
-
+            exp = exp.set_index(["df1_ix1", "df1_ix2"])
        assert_frame_equal(res, exp)

    @pytest.mark.parametrize(
@@ -232,7 +248,7 @@ class TestSpatialJoin:
            res["index_right"] = res["index_right"].astype(float)
        elif index == "named-index":
            exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64")
-            exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
+            exp = exp.set_index("df1_ix")
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_left")
            exp.index.name = None
@@ -242,10 +258,7 @@ class TestSpatialJoin:
            )
            exp.index.names = df1.index.names
        if index == "named-multi-index":
-            exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
-                columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
-            )
-            exp.index.names = df1.index.names
+            exp = exp.set_index(["df1_ix1", "df1_ix2"])

        assert_frame_equal(res, exp)

@@ -348,7 +361,7 @@ class TestSpatialJoin:
            res["index_left"] = res["index_left"].astype(float)
        elif index == "named-index":
            exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64")
-            exp = exp.set_index("df2_ix").rename(columns={"df1_ix": "index_left"})
+            exp = exp.set_index("df2_ix")
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_right")
            exp = exp.reindex(columns=res.columns)
@@ -359,20 +372,431 @@ class TestSpatialJoin:
            )
            exp.index.names = df2.index.names
        if index == "named-multi-index":
-            exp = exp.set_index(["df2_ix1", "df2_ix2"]).rename(
-                columns={"df1_ix1": "index_left0", "df1_ix2": "index_left1"}
-            )
-            exp.index.names = df2.index.names
+            exp = exp.set_index(["df2_ix1", "df2_ix2"])
+
        if predicate == "within":
            exp = exp.sort_index()

        assert_frame_equal(res, exp, check_index_type=False)

+    @pytest.mark.skipif(not compat.GEOS_GE_310, reason="`dwithin` requires GEOS 3.10")
+    @pytest.mark.parametrize("how", ["inner"])
+    @pytest.mark.parametrize(
+        "geo_left, geo_right, expected_left, expected_right, distance",
+        [
+            (
+                # Distance is number, 2x1
+                [Point(0, 0), Point(1, 1)],
+                [Point(1, 1)],
+                [0, 1],
+                [0, 0],
+                math.sqrt(2),
+            ),
+            # Distance is number, 2x2
+            (
+                [Point(0, 0), Point(1, 1)],
+                [Point(0, 0), Point(1, 1)],
+                [0, 1, 0, 1],
+                [0, 0, 1, 1],
+                math.sqrt(2),
+            ),
+            # Distance is array, matches len(left)
+            (
+                [Point(0, 0), Point(0, 0), Point(-1, -1)],
+                [Point(1, 1)],
+                [1, 2],
+                [0, 0],
+                [0, math.sqrt(2), math.sqrt(8)],
+            ),
+            # Distance is np.array, matches len(left),
+            # inner join sorts the right GeoDataFrame
+            (
+                [Point(0, 0), Point(0, 0), Point(-1, -1)],
+                [Point(1, 1), Point(0.5, 0.5)],
+                [1, 2, 1, 2],
+                [1, 1, 0, 0],
+                np.array([0, math.sqrt(2), math.sqrt(8)]),
+            ),
+        ],
+    )
+    def test_sjoin_dwithin(
+        self,
+        geo_left,
+        geo_right,
+        expected_left: Sequence[int],
+        expected_right: Sequence[int],
+        distance,
+        how,
+    ):
+        left = geopandas.GeoDataFrame({"geometry": geo_left})
+        right = geopandas.GeoDataFrame({"geometry": geo_right})
+        expected_gdf = left.iloc[expected_left].copy()
+        expected_gdf["index_right"] = expected_right
+        joined = sjoin(left, right, how=how, predicate="dwithin", distance=distance)
+        assert_frame_equal(expected_gdf.sort_index(), joined.sort_index())

+    # GH3239
+    @pytest.mark.parametrize(
+        "predicate",
+        [
+            "contains",
+            "contains_properly",
+            "covered_by",
+            "covers",
+            "crosses",
+            "intersects",
+            "touches",
+            "within",
+        ],
+    )
+    def test_sjoin_left_order(self, predicate):
+        # a set of points in random order -> that order should be preserved
+        # with a left join
+        pts = GeoDataFrame(
+            geometry=points_from_xy([0.1, 0.4, 0.3, 0.7], [0.8, 0.6, 0.9, 0.1])
+        )
+        polys = GeoDataFrame(
+            {"id": [1, 2, 3, 4]},
+            geometry=[
+                box(0, 0, 0.5, 0.5),
+                box(0, 0.5, 0.5, 1),
+                box(0.5, 0, 1, 0.5),
+                box(0.5, 0.5, 1, 1),
+            ],
+        )
+
+        joined = sjoin(pts, polys, predicate=predicate, how="left")
+        assert_index_equal(joined.index, pts.index)
+
+    def test_sjoin_shared_attribute(self, naturalearth_lowres, naturalearth_cities):
+        countries = read_file(naturalearth_lowres)
+        cities = read_file(naturalearth_cities)
+        countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
+
+        # Add first letter of country/city as an attribute column to be compared
+        countries["firstLetter"] = countries["country"].astype(str).str[0]
+        cities["firstLetter"] = cities["name"].astype(str).str[0]
+
+        result = sjoin(cities, countries, on_attribute="firstLetter")
+        assert (
+            result["country"].astype(str).str[0] == result["name"].astype(str).str[0]
+        ).all()
+        assert result.shape == (23, 5)
+
+    @pytest.mark.parametrize(
+        "attr1_key_change_dict, attr2_key_change_dict",
+        [
+            pytest.param(
+                {True: "merge", False: "no_merge"},
+                {True: "merge", False: "no_merge"},
+                id="merge on string attributes",
+            ),
+            pytest.param(
+                {True: 2, False: 1},
+                {True: 2, False: 1},
+                id="merge on integer attributes",
+            ),
+            pytest.param(
+                {True: True, False: False},
+                {True: True, False: False},
+                id="merge on boolean attributes",
+            ),
+            pytest.param(
+                {True: True, False: False},
+                {True: "merge", False: "no_merge"},
+                id="merge on mixed attributes",
+            ),
+        ],
+    )
+    def test_sjoin_multiple_attributes_datatypes(
+        self, dfs_shared_attribute, attr1_key_change_dict, attr2_key_change_dict
+    ):
+        left_gdf, right_gdf = dfs_shared_attribute
+        left_gdf["attr1"] = left_gdf["attr1"].map(attr1_key_change_dict)
+        left_gdf["attr2"] = left_gdf["attr2"].map(attr2_key_change_dict)
+        right_gdf["attr1"] = right_gdf["attr1"].map(attr1_key_change_dict)
+        right_gdf["attr2"] = right_gdf["attr2"].map(attr2_key_change_dict)
+
+        joined = sjoin(left_gdf, right_gdf, on_attribute=("attr1", "attr2"))
+        assert (["A", "B"] == joined["attr_tracker"].values).all()
+
+    def test_sjoin_multiple_attributes_check_header(self, dfs_shared_attribute):
+        left_gdf, right_gdf = dfs_shared_attribute
+        joined = sjoin(left_gdf, right_gdf, on_attribute=["attr1"])
+
+        assert (["A", "B", "E", "F"] == joined["attr_tracker"].values).all()
+        assert {"attr2_left", "attr2_right", "attr1"}.issubset(joined.columns)
+        assert "attr1_left" not in joined
+
+    def test_sjoin_error_column_does_not_exist(self, dfs_shared_attribute):
+        left_gdf, right_gdf = dfs_shared_attribute
+        right_gdf_dropped_attr = right_gdf.drop("attr1", axis=1)
+        left_gdf_dropped_attr = left_gdf.drop("attr1", axis=1)
+
+        with pytest.raises(
+            ValueError,
+            match="Expected column attr1 is missing from the right dataframe.",
+        ):
+            sjoin(left_gdf, right_gdf_dropped_attr, on_attribute="attr1")
+
+        with pytest.raises(
+            ValueError,
+            match="Expected column attr1 is missing from the left dataframe.",
+        ):
+            sjoin(left_gdf_dropped_attr, right_gdf, on_attribute="attr1")
+
+        with pytest.raises(
+            ValueError,
+            match="Expected column attr1 is missing from both of the dataframes.",
+        ):
+            sjoin(left_gdf_dropped_attr, right_gdf_dropped_attr, on_attribute="attr1")
+
+    def test_sjoin_error_use_geometry_column(self, dfs_shared_attribute):
+        left_gdf, right_gdf = dfs_shared_attribute
+        with pytest.raises(
+            ValueError,
+            match="Active geometry column cannot be used as an input for "
+            "on_attribute parameter.",
+        ):
+            sjoin(left_gdf, right_gdf, on_attribute="geometry")
+        with pytest.raises(
+            ValueError,
+            match="Active geometry column cannot be used as an input for "
+            "on_attribute parameter.",
+        ):
+            sjoin(left_gdf, right_gdf, on_attribute=["attr1", "geometry"])
+
+
+class TestIndexNames:
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_preserve_index_names(self, how):
+        # preserve names of both left and right index
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"geometry": geoms}, index=pd.Index([1, 2], name="myidx1"))
+        df2 = GeoDataFrame(
+            {"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx2")
+        )
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"myidx1": [1, 2], "geometry": geoms, "myidx2": ["a", "b"]}
+            ).set_index("myidx1")
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"myidx2": ["a", "b"], "myidx1": [1, 2], "geometry": geoms},
+            ).set_index("myidx2")
+        assert_geodataframe_equal(result, expected)
+
+        # but also add suffixes if both left and right have the same index
+        df1.index.name = "myidx"
+        df2.index.name = "myidx"
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
+            ).set_index("myidx_left")
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"myidx_right": ["a", "b"], "myidx_left": [1, 2], "geometry": geoms},
+            ).set_index("myidx_right")
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_preserve_index_names_multiindex(self, how):
+        # preserve names of both left and right index
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame(
+            {"geometry": geoms},
+            index=pd.MultiIndex.from_tuples(
+                [("a", 1), ("b", 2)], names=["myidx1", "level2"]
+            ),
+        )
+        df2 = GeoDataFrame(
+            {"geometry": geoms},
+            index=pd.MultiIndex.from_tuples(
+                [("c", 3), ("d", 4)], names=["myidx2", None]
+            ),
+        )
+        result = sjoin(df1, df2, how=how)
+        expected_base = GeoDataFrame(
+            {
+                "myidx1": ["a", "b"],
+                "level2": [1, 2],
+                "geometry": geoms,
+                "myidx2": ["c", "d"],
+                "index_right1": [3, 4],
+            }
+        )
+        if how in ("inner", "left"):
+            expected = expected_base.set_index(["myidx1", "level2"])
+        else:
+            # right join
+            expected = expected_base.set_index(["myidx2", "index_right1"])
+            # if it was originally None, that is preserved
+            expected.index.names = ["myidx2", None]
+        assert_geodataframe_equal(result, expected)
+
+        # but also add suffixes if both left and right have the same index
+        df1.index.names = ["myidx", "level2"]
+        df2.index.names = ["myidx", None]
+        result = sjoin(df1, df2, how=how)
+        expected_base = GeoDataFrame(
+            {
+                "myidx_left": ["a", "b"],
+                "level2": [1, 2],
+                "geometry": geoms,
+                "myidx_right": ["c", "d"],
+                "index_right1": [3, 4],
+            }
+        )
+        if how in ("inner", "left"):
+            expected = expected_base.set_index(["myidx_left", "level2"])
+        else:
+            # right join
+            expected = expected_base.set_index(["myidx_right", "index_right1"])
+            # if it was originally None, that is preserved
+            expected.index.names = ["myidx_right", None]
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_duplicate_column_index_name(self, how):
+        # case where a left column and the right index have the same name or the
+        # other way around -> correctly add suffix or preserve index name
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
+        df2 = GeoDataFrame(
+            {"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx")
+        )
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
+                index=pd.Index(["a", "b"], name="myidx_right"),
+            )
+        assert_geodataframe_equal(result, expected)
+
+        result = sjoin(df2, df1, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
+                index=pd.Index(["a", "b"], name="myidx_left"),
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"myidx_left": ["a", "b"], "myidx_right": [1, 2], "geometry": geoms},
+            )
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_duplicate_column_index_name_multiindex(self, how):
+        # case where a left column and the right index have the same name or the
+        # other way around -> correctly add suffix or preserve index name
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
+        df2 = GeoDataFrame(
+            {"geometry": geoms},
+            index=pd.MultiIndex.from_tuples(
+                [("a", 1), ("b", 2)], names=["myidx", "level2"]
+            ),
+        )
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {
+                    "myidx_left": [1, 2],
+                    "geometry": geoms,
+                    "myidx_right": ["a", "b"],
+                    "level2": [1, 2],
+                }
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
+                index=pd.MultiIndex.from_tuples(
+                    [("a", 1), ("b", 2)], names=["myidx_right", "level2"]
+                ),
+            )
+        assert_geodataframe_equal(result, expected)
+
+        result = sjoin(df2, df1, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
+                index=pd.MultiIndex.from_tuples(
+                    [("a", 1), ("b", 2)], names=["myidx_left", "level2"]
+                ),
+            )
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {
+                    "myidx_left": ["a", "b"],
+                    "level2": [1, 2],
+                    "myidx_right": [1, 2],
+                    "geometry": geoms,
+                },
+            )
+        assert_geodataframe_equal(result, expected)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_conflicting_column_index_name(self, how):
+        # test case where the auto-generated index name conflicts
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame({"index_right": [1, 2], "geometry": geoms})
+        df2 = GeoDataFrame({"geometry": geoms})
+        with pytest.raises(ValueError, match="'index_right' cannot be a column name"):
+            sjoin(df1, df2, how=how)
+
+    @pytest.mark.parametrize("how", ["inner", "left", "right"])
+    def test_conflicting_column_with_suffix(self, how):
+        # test case where the auto-generated index name conflicts
+        geoms = [Point(1, 1), Point(2, 2)]
+        df1 = GeoDataFrame(
+            {"column": [1, 2], "column_right": ["a", "b"], "geometry": geoms}
+        )
+        df2 = GeoDataFrame({"column": [0.1, 0.2], "geometry": geoms})
+
+        result = sjoin(df1, df2, how=how)
+        if how in ("inner", "left"):
+            expected = GeoDataFrame(
+                {1: [1, 2], 2: ["a", "b"], 3: geoms, 4: [0, 1], 5: [0.1, 0.2]}
+            )
+            expected.columns = [
+                "column_left",
+                "column_right",
+                "geometry",
+                "index_right",
+                "column_right",
+            ]
+        else:
+            # right join
+            expected = GeoDataFrame(
+                {1: [0, 1], 2: [1, 2], 3: ["a", "b"], 4: [0.1, 0.2], 5: geoms}
+            )
+            expected.columns = [
+                "index_left",
+                "column_left",
+                "column_right",
+                "column_right",
+                "geometry",
+            ]
+        expected = expected.set_geometry("geometry")
+        assert_geodataframe_equal(result, expected)
+
+
+@pytest.mark.usefixtures("_setup_class_nybb_filename")
 class TestSpatialJoinNYBB:
    def setup_method(self):
-        nybb_filename = geopandas.datasets.get_path("nybb")
-        self.polydf = read_file(nybb_filename)
+        self.polydf = read_file(self.nybb_filename)
        self.crs = self.polydf.crs
        N = 20
        b = [int(x) for x in self.polydf.total_bounds]
@@ -527,7 +951,7 @@ class TestSpatialJoinNYBB:

    def test_sjoin_empty_geometries(self):
        # https://github.com/geopandas/geopandas/issues/944
-        empty = GeoDataFrame(geometry=[GeometryCollection()] * 3)
+        empty = GeoDataFrame(geometry=[GeometryCollection()] * 3, crs=self.crs)
        df = sjoin(pd.concat([self.pointdf, empty]), self.polydf, how="left")
        assert df.shape == (24, 8)
        df2 = sjoin(self.pointdf, pd.concat([self.polydf, empty]), how="left")
@@ -542,8 +966,8 @@ class TestSpatialJoinNYBB:
        assert sjoin(empty, self.pointdf, how="inner", predicate=predicate).empty
        assert sjoin(empty, self.pointdf, how="left", predicate=predicate).empty

-    def test_empty_sjoin_return_duplicated_columns(self):
-        nybb = geopandas.read_file(geopandas.datasets.get_path("nybb"))
+    def test_empty_sjoin_return_duplicated_columns(self, nybb_filename):
+        nybb = geopandas.read_file(nybb_filename)
        nybb2 = nybb.copy()
        nybb2.geometry = nybb2.translate(200000)  # to get non-overlapping

@@ -553,45 +977,24 @@ class TestSpatialJoinNYBB:
        assert "BoroCode_left" in result.columns


-class TestSpatialJoinNaturalEarth:
-    def setup_method(self):
-        world_path = geopandas.datasets.get_path("naturalearth_lowres")
-        cities_path = geopandas.datasets.get_path("naturalearth_cities")
-        self.world = read_file(world_path)
-        self.cities = read_file(cities_path)
-
-    def test_sjoin_inner(self):
-        # GH637
-        countries = self.world[["geometry", "name"]]
-        countries = countries.rename(columns={"name": "country"})
-        cities_with_country = sjoin(
-            self.cities, countries, how="inner", predicate="intersects"
-        )
-        assert cities_with_country.shape == (213, 4)
+@pytest.fixture
+def world(naturalearth_lowres):
+    return read_file(naturalearth_lowres)


-@pytest.mark.skipif(
-    TEST_NEAREST,
-    reason=("This test can only be run _without_ PyGEOS >= 0.10 installed"),
-)
-def test_no_nearest_all():
-    df1 = geopandas.GeoDataFrame({"geometry": []})
-    df2 = geopandas.GeoDataFrame({"geometry": []})
-    with pytest.raises(
-        NotImplementedError,
-        match="Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports",
-    ):
-        sjoin_nearest(df1, df2)
+@pytest.fixture
+def cities(naturalearth_cities):
+    return read_file(naturalearth_cities)
+
+
+def test_sjoin_inner(world, cities):
+    # GH637
+    countries = world[["geometry", "name"]]
+    countries = countries.rename(columns={"name": "country"})
+    cities_with_country = sjoin(cities, countries, how="inner", predicate="intersects")
+    assert cities_with_country.shape == (213, 4)


-@pytest.mark.skipif(
-    not TEST_NEAREST,
-    reason=(
-        "PyGEOS >= 0.10.0"
-        " must be installed and activated via the geopandas.compat module to"
-        " test sjoin_nearest"
-    ),
-)
 class TestNearest:
    @pytest.mark.parametrize(
        "how_kwargs", ({}, {"how": "inner"}, {"how": "left"}, {"how": "right"})
@@ -900,10 +1303,10 @@ class TestNearest:
        assert_geodataframe_equal(expected_gdf, joined)

    @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS")
-    def test_sjoin_nearest_inner(self):
+    def test_sjoin_nearest_inner(self, naturalearth_lowres, naturalearth_cities):
        # check equivalency of left and inner join
-        countries = read_file(geopandas.datasets.get_path("naturalearth_lowres"))
-        cities = read_file(geopandas.datasets.get_path("naturalearth_cities"))
+        countries = read_file(naturalearth_lowres)
+        cities = read_file(naturalearth_cities)
        countries = countries[["geometry", "name"]].rename(columns={"name": "country"})

        # default: inner and left give the same result
@@ -927,19 +1330,8 @@ class TestNearest:
        result5["index_right"] = result5["index_right"].astype("int64")
        assert_geodataframe_equal(result5, result4, check_like=True)

-    expected_index_uncapped = (
-        [1, 3, 3, 1, 2] if compat.PANDAS_GE_22 else [1, 1, 3, 3, 2]
-    )
-
-    @pytest.mark.skipif(
-        not (compat.USE_SHAPELY_20),
-        reason=(
-            "shapely >= 2.0 is required to run sjoin_nearest"
-            "with parameter `exclusive` set"
-        ),
-    )
    @pytest.mark.parametrize(
-        "max_distance,expected", [(None, expected_index_uncapped), (1.1, [3, 3, 1, 2])]
+        "max_distance,expected", [(None, [1, 3, 3, 1, 2]), (1.1, [3, 3, 1, 2])]
    )
    def test_sjoin_nearest_exclusive(self, max_distance, expected):
        geoms = shapely.points(np.arange(3), np.arange(3))