This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -1,28 +1,28 @@
"""Tests for the clip module."""
import numpy as np
import pandas as pd
import shapely
from shapely.geometry import (
Polygon,
Point,
LineString,
LinearRing,
GeometryCollection,
LinearRing,
LineString,
MultiPoint,
Point,
Polygon,
box,
)
import geopandas
from geopandas import GeoDataFrame, GeoSeries, clip
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
import pytest
from geopandas._compat import HAS_PYPROJ
from geopandas.tools.clip import _mask_is_list_like_rectangle
pytestmark = pytest.mark.skip_no_sindex
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from pandas.testing import assert_index_equal
mask_variants_single_rectangle = [
"single_rectangle_gdf",
"single_rectangle_gdf_list_bounds",
@@ -43,6 +43,14 @@ def point_gdf():
return gdf
@pytest.fixture
def point_gdf2():
"""Create a point GeoDataFrame."""
pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
return gdf
@pytest.fixture
def pointsoutside_nooverlap_gdf():
"""Create a point GeoDataFrame. Its points are all outside the single
@@ -137,7 +145,7 @@ def two_line_gdf():
@pytest.fixture
def multi_poly_gdf(donut_geometry):
"""Create a multi-polygon GeoDataFrame."""
multi_poly = donut_geometry.unary_union
multi_poly = donut_geometry.union_all()
out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
out_df["attr"] = ["pool"]
return out_df
@@ -148,7 +156,7 @@ def multi_line(two_line_gdf):
"""Create a multi-line GeoDataFrame.
This GDF has one multiline and one regular line."""
# Create a single and multi line object
multiline_feat = two_line_gdf.unary_union
multiline_feat = two_line_gdf.union_all()
linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
out_df["attr"] = ["road", "stream"]
@@ -158,7 +166,7 @@ def multi_line(two_line_gdf):
@pytest.fixture
def multi_point(point_gdf):
"""Create a multi-point GeoDataFrame."""
multi_point = point_gdf.unary_union
multi_point = point_gdf.union_all()
out_df = GeoDataFrame(
geometry=GeoSeries(
[multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
@@ -321,7 +329,7 @@ class TestClipWithSingleRectangleGdf:
)
assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
shape_for_points = (
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.unary_union
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
)
assert all(clipped.intersects(shape_for_points))
@@ -398,6 +406,7 @@ def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
assert "GeometryCollection" in clipped.geom_type[0]
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
clip(point_gdf, single_rectangle_gdf.to_crs(4326))
@@ -460,3 +469,16 @@ def test_clip_empty_mask(buffered_locations, mask):
)
clipped = clip(buffered_locations.geometry, mask)
assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
def test_clip_sorting(point_gdf2):
"""Test the sorting kwarg in clip"""
bbox = shapely.geometry.box(0, 0, 2, 2)
unsorted_clipped_gdf = point_gdf2.clip(bbox)
sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
expected_sorted_index = pd.Index([1, 3, 5])
assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)

View File

@@ -1,4 +1,5 @@
import numpy as np
from shapely.geometry import Point
from shapely.wkt import loads

View File

@@ -1,28 +1,46 @@
import pytest
import numpy
import geopandas
import geopandas._compat as compat
import geopandas
from geopandas.tools._random import uniform
multipolygons = geopandas.read_file(geopandas.datasets.get_path("nybb")).geometry
polygons = multipolygons.explode(ignore_index=True).geometry
multilinestrings = multipolygons.boundary
linestrings = polygons.boundary
points = multipolygons.centroid
import pytest
@pytest.fixture
def multipolygons(nybb_filename):
return geopandas.read_file(nybb_filename).geometry
@pytest.fixture
def polygons(multipolygons):
return multipolygons.explode(ignore_index=True).geometry
@pytest.fixture
def multilinestrings(multipolygons):
return multipolygons.boundary
@pytest.fixture
def linestrings(polygons):
return polygons.boundary
@pytest.fixture
def points(multipolygons):
return multipolygons.centroid
@pytest.mark.skipif(
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
reason="array input in interpolate not implemented for shapely<2",
)
@pytest.mark.parametrize("size", [10, 100])
@pytest.mark.parametrize(
"geom", [multipolygons[0], polygons[0], multilinestrings[0], linestrings[0]]
"geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
)
def test_uniform(geom, size):
def test_uniform(geom_fixture, size, request):
geom = request.getfixturevalue(geom_fixture)[0]
sample = uniform(geom, size=size, rng=1)
sample_series = geopandas.GeoSeries(sample).explode().reset_index(drop=True)
sample_series = (
geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
)
assert len(sample_series) == size
sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
geom, predicate="intersects"
@@ -30,21 +48,13 @@ def test_uniform(geom, size):
assert len(sample_in_geom) == size
@pytest.mark.skipif(
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
reason="array input in interpolate not implemented for shapely<2",
)
def test_uniform_unsupported():
def test_uniform_unsupported(points):
with pytest.warns(UserWarning, match="Sampling is not supported"):
sample = uniform(points[0], size=10, rng=1)
assert sample.is_empty
@pytest.mark.skipif(
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
reason="array input in interpolate not implemented for shapely<2",
)
def test_uniform_generator():
def test_uniform_generator(polygons):
sample = uniform(polygons[0], size=10, rng=1)
sample2 = uniform(polygons[0], size=10, rng=1)
assert sample.equals(sample2)

View File

@@ -3,23 +3,24 @@ from typing import Sequence
import numpy as np
import pandas as pd
import shapely
from shapely.geometry import Point, Polygon, GeometryCollection
import shapely
from shapely.geometry import GeometryCollection, Point, Polygon, box
import geopandas
import geopandas._compat as compat
from geopandas import GeoDataFrame, GeoSeries, read_file, sjoin, sjoin_nearest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from geopandas import (
GeoDataFrame,
GeoSeries,
points_from_xy,
read_file,
sjoin,
sjoin_nearest,
)
from pandas.testing import assert_frame_equal, assert_series_equal
import pytest
TEST_NEAREST = compat.USE_SHAPELY_20 or (compat.PYGEOS_GE_010 and compat.USE_PYGEOS)
pytestmark = pytest.mark.skip_no_sindex
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
@pytest.fixture()
@@ -95,6 +96,52 @@ def dfs(request):
return [request.param, df1, df2, expected]
@pytest.fixture()
def dfs_shared_attribute():
geo_left = [
Point(0, 0),
Point(1, 1),
Point(2, 2),
Point(3, 3),
Point(4, 4),
Point(5, 5),
Point(6, 6),
Point(7, 7),
]
geo_right = [
Point(0, 0),
Point(1, 1),
Point(2, 2),
Point(3, 3),
Point(4, 4),
Point(5, 5),
Point(6, 6),
Point(7, 7),
]
attr_tracker = ["A", "B", "C", "D", "E", "F", "G", "H"]
left_gdf = geopandas.GeoDataFrame(
{
"geometry": geo_left,
"attr_tracker": attr_tracker,
"duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
"attr1": [True, True, True, True, True, True, True, True],
"attr2": [True, True, True, True, True, True, True, True],
}
)
right_gdf = geopandas.GeoDataFrame(
{
"geometry": geo_right,
"duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
"attr1": [True, True, False, False, True, True, False, False],
"attr2": [True, True, False, False, False, False, False, False],
}
)
return left_gdf, right_gdf
class TestSpatialJoin:
@pytest.mark.parametrize(
"how, lsuffix, rsuffix, expected_cols",
@@ -113,6 +160,7 @@ class TestSpatialJoin:
joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
assert set(joined.columns) == expected_cols | {"geometry"}
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
@pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True)
def test_crs_mismatch(self, dfs):
index, df1, df2, expected = dfs
@@ -120,31 +168,6 @@ class TestSpatialJoin:
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
sjoin(df1, df2)
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
@pytest.mark.parametrize("op", ["intersects", "contains", "within"])
def test_deprecated_op_param(self, dfs, op):
_, df1, df2, _ = dfs
with pytest.warns(FutureWarning, match="`op` parameter is deprecated"):
sjoin(df1, df2, op=op)
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
@pytest.mark.parametrize("op", ["intersects", "contains", "within"])
@pytest.mark.parametrize("predicate", ["contains", "within"])
def test_deprecated_op_param_nondefault_predicate(self, dfs, op, predicate):
_, df1, df2, _ = dfs
match = "use the `predicate` parameter instead"
if op != predicate:
warntype = UserWarning
match = (
"`predicate` will be overridden by the value of `op`" # noqa: ISC003
+ r"(.|\s)*"
+ match
)
else:
warntype = FutureWarning
with pytest.warns(warntype, match=match):
sjoin(df1, df2, predicate=predicate, op=op)
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
def test_unknown_kwargs(self, dfs):
_, df1, df2, _ = dfs
@@ -154,7 +177,6 @@ class TestSpatialJoin:
):
sjoin(df1, df2, extra_param="test")
@pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning")
@pytest.mark.parametrize(
"dfs",
[
@@ -167,12 +189,10 @@ class TestSpatialJoin:
indirect=True,
)
@pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
@pytest.mark.parametrize("predicate_kw", ["predicate", "op"])
def test_inner(self, predicate, predicate_kw, dfs):
def test_inner(self, predicate, dfs):
index, df1, df2, expected = dfs
res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate})
res = sjoin(df1, df2, how="inner", predicate=predicate)
exp = expected[predicate].dropna().copy()
exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"})
exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64")
@@ -182,7 +202,7 @@ class TestSpatialJoin:
].astype("int64")
if index == "named-index":
exp[["df1_ix", "df2_ix"]] = exp[["df1_ix", "df2_ix"]].astype("int64")
exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
exp = exp.set_index("df1_ix")
if index in ["default-index", "string-index"]:
exp = exp.set_index("index_left")
exp.index.name = None
@@ -192,11 +212,7 @@ class TestSpatialJoin:
)
exp.index.names = df1.index.names
if index == "named-multi-index":
exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
)
exp.index.names = df1.index.names
exp = exp.set_index(["df1_ix1", "df1_ix2"])
assert_frame_equal(res, exp)
@pytest.mark.parametrize(
@@ -232,7 +248,7 @@ class TestSpatialJoin:
res["index_right"] = res["index_right"].astype(float)
elif index == "named-index":
exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64")
exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
exp = exp.set_index("df1_ix")
if index in ["default-index", "string-index"]:
exp = exp.set_index("index_left")
exp.index.name = None
@@ -242,10 +258,7 @@ class TestSpatialJoin:
)
exp.index.names = df1.index.names
if index == "named-multi-index":
exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
)
exp.index.names = df1.index.names
exp = exp.set_index(["df1_ix1", "df1_ix2"])
assert_frame_equal(res, exp)
@@ -348,7 +361,7 @@ class TestSpatialJoin:
res["index_left"] = res["index_left"].astype(float)
elif index == "named-index":
exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64")
exp = exp.set_index("df2_ix").rename(columns={"df1_ix": "index_left"})
exp = exp.set_index("df2_ix")
if index in ["default-index", "string-index"]:
exp = exp.set_index("index_right")
exp = exp.reindex(columns=res.columns)
@@ -359,20 +372,431 @@ class TestSpatialJoin:
)
exp.index.names = df2.index.names
if index == "named-multi-index":
exp = exp.set_index(["df2_ix1", "df2_ix2"]).rename(
columns={"df1_ix1": "index_left0", "df1_ix2": "index_left1"}
)
exp.index.names = df2.index.names
exp = exp.set_index(["df2_ix1", "df2_ix2"])
if predicate == "within":
exp = exp.sort_index()
assert_frame_equal(res, exp, check_index_type=False)
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="`dwithin` requires GEOS 3.10")
@pytest.mark.parametrize("how", ["inner"])
@pytest.mark.parametrize(
"geo_left, geo_right, expected_left, expected_right, distance",
[
(
# Distance is number, 2x1
[Point(0, 0), Point(1, 1)],
[Point(1, 1)],
[0, 1],
[0, 0],
math.sqrt(2),
),
# Distance is number, 2x2
(
[Point(0, 0), Point(1, 1)],
[Point(0, 0), Point(1, 1)],
[0, 1, 0, 1],
[0, 0, 1, 1],
math.sqrt(2),
),
# Distance is array, matches len(left)
(
[Point(0, 0), Point(0, 0), Point(-1, -1)],
[Point(1, 1)],
[1, 2],
[0, 0],
[0, math.sqrt(2), math.sqrt(8)],
),
# Distance is np.array, matches len(left),
# inner join sorts the right GeoDataFrame
(
[Point(0, 0), Point(0, 0), Point(-1, -1)],
[Point(1, 1), Point(0.5, 0.5)],
[1, 2, 1, 2],
[1, 1, 0, 0],
np.array([0, math.sqrt(2), math.sqrt(8)]),
),
],
)
def test_sjoin_dwithin(
self,
geo_left,
geo_right,
expected_left: Sequence[int],
expected_right: Sequence[int],
distance,
how,
):
left = geopandas.GeoDataFrame({"geometry": geo_left})
right = geopandas.GeoDataFrame({"geometry": geo_right})
expected_gdf = left.iloc[expected_left].copy()
expected_gdf["index_right"] = expected_right
joined = sjoin(left, right, how=how, predicate="dwithin", distance=distance)
assert_frame_equal(expected_gdf.sort_index(), joined.sort_index())
# GH3239
@pytest.mark.parametrize(
"predicate",
[
"contains",
"contains_properly",
"covered_by",
"covers",
"crosses",
"intersects",
"touches",
"within",
],
)
def test_sjoin_left_order(self, predicate):
# a set of points in random order -> that order should be preserved
# with a left join
pts = GeoDataFrame(
geometry=points_from_xy([0.1, 0.4, 0.3, 0.7], [0.8, 0.6, 0.9, 0.1])
)
polys = GeoDataFrame(
{"id": [1, 2, 3, 4]},
geometry=[
box(0, 0, 0.5, 0.5),
box(0, 0.5, 0.5, 1),
box(0.5, 0, 1, 0.5),
box(0.5, 0.5, 1, 1),
],
)
joined = sjoin(pts, polys, predicate=predicate, how="left")
assert_index_equal(joined.index, pts.index)
def test_sjoin_shared_attribute(self, naturalearth_lowres, naturalearth_cities):
countries = read_file(naturalearth_lowres)
cities = read_file(naturalearth_cities)
countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
# Add first letter of country/city as an attribute column to be compared
countries["firstLetter"] = countries["country"].astype(str).str[0]
cities["firstLetter"] = cities["name"].astype(str).str[0]
result = sjoin(cities, countries, on_attribute="firstLetter")
assert (
result["country"].astype(str).str[0] == result["name"].astype(str).str[0]
).all()
assert result.shape == (23, 5)
@pytest.mark.parametrize(
"attr1_key_change_dict, attr2_key_change_dict",
[
pytest.param(
{True: "merge", False: "no_merge"},
{True: "merge", False: "no_merge"},
id="merge on string attributes",
),
pytest.param(
{True: 2, False: 1},
{True: 2, False: 1},
id="merge on integer attributes",
),
pytest.param(
{True: True, False: False},
{True: True, False: False},
id="merge on boolean attributes",
),
pytest.param(
{True: True, False: False},
{True: "merge", False: "no_merge"},
id="merge on mixed attributes",
),
],
)
def test_sjoin_multiple_attributes_datatypes(
self, dfs_shared_attribute, attr1_key_change_dict, attr2_key_change_dict
):
left_gdf, right_gdf = dfs_shared_attribute
left_gdf["attr1"] = left_gdf["attr1"].map(attr1_key_change_dict)
left_gdf["attr2"] = left_gdf["attr2"].map(attr2_key_change_dict)
right_gdf["attr1"] = right_gdf["attr1"].map(attr1_key_change_dict)
right_gdf["attr2"] = right_gdf["attr2"].map(attr2_key_change_dict)
joined = sjoin(left_gdf, right_gdf, on_attribute=("attr1", "attr2"))
assert (["A", "B"] == joined["attr_tracker"].values).all()
def test_sjoin_multiple_attributes_check_header(self, dfs_shared_attribute):
left_gdf, right_gdf = dfs_shared_attribute
joined = sjoin(left_gdf, right_gdf, on_attribute=["attr1"])
assert (["A", "B", "E", "F"] == joined["attr_tracker"].values).all()
assert {"attr2_left", "attr2_right", "attr1"}.issubset(joined.columns)
assert "attr1_left" not in joined
def test_sjoin_error_column_does_not_exist(self, dfs_shared_attribute):
left_gdf, right_gdf = dfs_shared_attribute
right_gdf_dropped_attr = right_gdf.drop("attr1", axis=1)
left_gdf_dropped_attr = left_gdf.drop("attr1", axis=1)
with pytest.raises(
ValueError,
match="Expected column attr1 is missing from the right dataframe.",
):
sjoin(left_gdf, right_gdf_dropped_attr, on_attribute="attr1")
with pytest.raises(
ValueError,
match="Expected column attr1 is missing from the left dataframe.",
):
sjoin(left_gdf_dropped_attr, right_gdf, on_attribute="attr1")
with pytest.raises(
ValueError,
match="Expected column attr1 is missing from both of the dataframes.",
):
sjoin(left_gdf_dropped_attr, right_gdf_dropped_attr, on_attribute="attr1")
def test_sjoin_error_use_geometry_column(self, dfs_shared_attribute):
left_gdf, right_gdf = dfs_shared_attribute
with pytest.raises(
ValueError,
match="Active geometry column cannot be used as an input for "
"on_attribute parameter.",
):
sjoin(left_gdf, right_gdf, on_attribute="geometry")
with pytest.raises(
ValueError,
match="Active geometry column cannot be used as an input for "
"on_attribute parameter.",
):
sjoin(left_gdf, right_gdf, on_attribute=["attr1", "geometry"])
class TestIndexNames:
@pytest.mark.parametrize("how", ["inner", "left", "right"])
def test_preserve_index_names(self, how):
# preserve names of both left and right index
geoms = [Point(1, 1), Point(2, 2)]
df1 = GeoDataFrame({"geometry": geoms}, index=pd.Index([1, 2], name="myidx1"))
df2 = GeoDataFrame(
{"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx2")
)
result = sjoin(df1, df2, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{"myidx1": [1, 2], "geometry": geoms, "myidx2": ["a", "b"]}
).set_index("myidx1")
else:
# right join
expected = GeoDataFrame(
{"myidx2": ["a", "b"], "myidx1": [1, 2], "geometry": geoms},
).set_index("myidx2")
assert_geodataframe_equal(result, expected)
# but also add suffixes if both left and right have the same index
df1.index.name = "myidx"
df2.index.name = "myidx"
result = sjoin(df1, df2, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
).set_index("myidx_left")
else:
# right join
expected = GeoDataFrame(
{"myidx_right": ["a", "b"], "myidx_left": [1, 2], "geometry": geoms},
).set_index("myidx_right")
assert_geodataframe_equal(result, expected)
@pytest.mark.parametrize("how", ["inner", "left", "right"])
def test_preserve_index_names_multiindex(self, how):
# preserve names of both left and right index
geoms = [Point(1, 1), Point(2, 2)]
df1 = GeoDataFrame(
{"geometry": geoms},
index=pd.MultiIndex.from_tuples(
[("a", 1), ("b", 2)], names=["myidx1", "level2"]
),
)
df2 = GeoDataFrame(
{"geometry": geoms},
index=pd.MultiIndex.from_tuples(
[("c", 3), ("d", 4)], names=["myidx2", None]
),
)
result = sjoin(df1, df2, how=how)
expected_base = GeoDataFrame(
{
"myidx1": ["a", "b"],
"level2": [1, 2],
"geometry": geoms,
"myidx2": ["c", "d"],
"index_right1": [3, 4],
}
)
if how in ("inner", "left"):
expected = expected_base.set_index(["myidx1", "level2"])
else:
# right join
expected = expected_base.set_index(["myidx2", "index_right1"])
# if it was originally None, that is preserved
expected.index.names = ["myidx2", None]
assert_geodataframe_equal(result, expected)
# but also add suffixes if both left and right have the same index
df1.index.names = ["myidx", "level2"]
df2.index.names = ["myidx", None]
result = sjoin(df1, df2, how=how)
expected_base = GeoDataFrame(
{
"myidx_left": ["a", "b"],
"level2": [1, 2],
"geometry": geoms,
"myidx_right": ["c", "d"],
"index_right1": [3, 4],
}
)
if how in ("inner", "left"):
expected = expected_base.set_index(["myidx_left", "level2"])
else:
# right join
expected = expected_base.set_index(["myidx_right", "index_right1"])
# if it was originally None, that is preserved
expected.index.names = ["myidx_right", None]
assert_geodataframe_equal(result, expected)
@pytest.mark.parametrize("how", ["inner", "left", "right"])
def test_duplicate_column_index_name(self, how):
# case where a left column and the right index have the same name or the
# other way around -> correctly add suffix or preserve index name
geoms = [Point(1, 1), Point(2, 2)]
df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
df2 = GeoDataFrame(
{"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx")
)
result = sjoin(df1, df2, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
)
else:
# right join
expected = GeoDataFrame(
{"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
index=pd.Index(["a", "b"], name="myidx_right"),
)
assert_geodataframe_equal(result, expected)
result = sjoin(df2, df1, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
index=pd.Index(["a", "b"], name="myidx_left"),
)
else:
# right join
expected = GeoDataFrame(
{"myidx_left": ["a", "b"], "myidx_right": [1, 2], "geometry": geoms},
)
assert_geodataframe_equal(result, expected)
@pytest.mark.parametrize("how", ["inner", "left", "right"])
def test_duplicate_column_index_name_multiindex(self, how):
# case where a left column and the right index have the same name or the
# other way around -> correctly add suffix or preserve index name
geoms = [Point(1, 1), Point(2, 2)]
df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
df2 = GeoDataFrame(
{"geometry": geoms},
index=pd.MultiIndex.from_tuples(
[("a", 1), ("b", 2)], names=["myidx", "level2"]
),
)
result = sjoin(df1, df2, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{
"myidx_left": [1, 2],
"geometry": geoms,
"myidx_right": ["a", "b"],
"level2": [1, 2],
}
)
else:
# right join
expected = GeoDataFrame(
{"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
index=pd.MultiIndex.from_tuples(
[("a", 1), ("b", 2)], names=["myidx_right", "level2"]
),
)
assert_geodataframe_equal(result, expected)
result = sjoin(df2, df1, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
index=pd.MultiIndex.from_tuples(
[("a", 1), ("b", 2)], names=["myidx_left", "level2"]
),
)
else:
# right join
expected = GeoDataFrame(
{
"myidx_left": ["a", "b"],
"level2": [1, 2],
"myidx_right": [1, 2],
"geometry": geoms,
},
)
assert_geodataframe_equal(result, expected)
@pytest.mark.parametrize("how", ["inner", "left", "right"])
def test_conflicting_column_index_name(self, how):
# test case where the auto-generated index name conflicts
geoms = [Point(1, 1), Point(2, 2)]
df1 = GeoDataFrame({"index_right": [1, 2], "geometry": geoms})
df2 = GeoDataFrame({"geometry": geoms})
with pytest.raises(ValueError, match="'index_right' cannot be a column name"):
sjoin(df1, df2, how=how)
@pytest.mark.parametrize("how", ["inner", "left", "right"])
def test_conflicting_column_with_suffix(self, how):
# test case where the auto-generated index name conflicts
geoms = [Point(1, 1), Point(2, 2)]
df1 = GeoDataFrame(
{"column": [1, 2], "column_right": ["a", "b"], "geometry": geoms}
)
df2 = GeoDataFrame({"column": [0.1, 0.2], "geometry": geoms})
result = sjoin(df1, df2, how=how)
if how in ("inner", "left"):
expected = GeoDataFrame(
{1: [1, 2], 2: ["a", "b"], 3: geoms, 4: [0, 1], 5: [0.1, 0.2]}
)
expected.columns = [
"column_left",
"column_right",
"geometry",
"index_right",
"column_right",
]
else:
# right join
expected = GeoDataFrame(
{1: [0, 1], 2: [1, 2], 3: ["a", "b"], 4: [0.1, 0.2], 5: geoms}
)
expected.columns = [
"index_left",
"column_left",
"column_right",
"column_right",
"geometry",
]
expected = expected.set_geometry("geometry")
assert_geodataframe_equal(result, expected)
@pytest.mark.usefixtures("_setup_class_nybb_filename")
class TestSpatialJoinNYBB:
def setup_method(self):
nybb_filename = geopandas.datasets.get_path("nybb")
self.polydf = read_file(nybb_filename)
self.polydf = read_file(self.nybb_filename)
self.crs = self.polydf.crs
N = 20
b = [int(x) for x in self.polydf.total_bounds]
@@ -527,7 +951,7 @@ class TestSpatialJoinNYBB:
def test_sjoin_empty_geometries(self):
# https://github.com/geopandas/geopandas/issues/944
empty = GeoDataFrame(geometry=[GeometryCollection()] * 3)
empty = GeoDataFrame(geometry=[GeometryCollection()] * 3, crs=self.crs)
df = sjoin(pd.concat([self.pointdf, empty]), self.polydf, how="left")
assert df.shape == (24, 8)
df2 = sjoin(self.pointdf, pd.concat([self.polydf, empty]), how="left")
@@ -542,8 +966,8 @@ class TestSpatialJoinNYBB:
assert sjoin(empty, self.pointdf, how="inner", predicate=predicate).empty
assert sjoin(empty, self.pointdf, how="left", predicate=predicate).empty
def test_empty_sjoin_return_duplicated_columns(self):
nybb = geopandas.read_file(geopandas.datasets.get_path("nybb"))
def test_empty_sjoin_return_duplicated_columns(self, nybb_filename):
nybb = geopandas.read_file(nybb_filename)
nybb2 = nybb.copy()
nybb2.geometry = nybb2.translate(200000) # to get non-overlapping
@@ -553,45 +977,24 @@ class TestSpatialJoinNYBB:
assert "BoroCode_left" in result.columns
class TestSpatialJoinNaturalEarth:
def setup_method(self):
world_path = geopandas.datasets.get_path("naturalearth_lowres")
cities_path = geopandas.datasets.get_path("naturalearth_cities")
self.world = read_file(world_path)
self.cities = read_file(cities_path)
def test_sjoin_inner(self):
# GH637
countries = self.world[["geometry", "name"]]
countries = countries.rename(columns={"name": "country"})
cities_with_country = sjoin(
self.cities, countries, how="inner", predicate="intersects"
)
assert cities_with_country.shape == (213, 4)
@pytest.fixture
def world(naturalearth_lowres):
return read_file(naturalearth_lowres)
@pytest.mark.skipif(
TEST_NEAREST,
reason=("This test can only be run _without_ PyGEOS >= 0.10 installed"),
)
def test_no_nearest_all():
df1 = geopandas.GeoDataFrame({"geometry": []})
df2 = geopandas.GeoDataFrame({"geometry": []})
with pytest.raises(
NotImplementedError,
match="Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports",
):
sjoin_nearest(df1, df2)
@pytest.fixture
def cities(naturalearth_cities):
return read_file(naturalearth_cities)
def test_sjoin_inner(world, cities):
# GH637
countries = world[["geometry", "name"]]
countries = countries.rename(columns={"name": "country"})
cities_with_country = sjoin(cities, countries, how="inner", predicate="intersects")
assert cities_with_country.shape == (213, 4)
@pytest.mark.skipif(
not TEST_NEAREST,
reason=(
"PyGEOS >= 0.10.0"
" must be installed and activated via the geopandas.compat module to"
" test sjoin_nearest"
),
)
class TestNearest:
@pytest.mark.parametrize(
"how_kwargs", ({}, {"how": "inner"}, {"how": "left"}, {"how": "right"})
@@ -900,10 +1303,10 @@ class TestNearest:
assert_geodataframe_equal(expected_gdf, joined)
@pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS")
def test_sjoin_nearest_inner(self):
def test_sjoin_nearest_inner(self, naturalearth_lowres, naturalearth_cities):
# check equivalency of left and inner join
countries = read_file(geopandas.datasets.get_path("naturalearth_lowres"))
cities = read_file(geopandas.datasets.get_path("naturalearth_cities"))
countries = read_file(naturalearth_lowres)
cities = read_file(naturalearth_cities)
countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
# default: inner and left give the same result
@@ -927,19 +1330,8 @@ class TestNearest:
result5["index_right"] = result5["index_right"].astype("int64")
assert_geodataframe_equal(result5, result4, check_like=True)
expected_index_uncapped = (
[1, 3, 3, 1, 2] if compat.PANDAS_GE_22 else [1, 1, 3, 3, 2]
)
@pytest.mark.skipif(
not (compat.USE_SHAPELY_20),
reason=(
"shapely >= 2.0 is required to run sjoin_nearest"
"with parameter `exclusive` set"
),
)
@pytest.mark.parametrize(
"max_distance,expected", [(None, expected_index_uncapped), (1.1, [3, 3, 1, 2])]
"max_distance,expected", [(None, [1, 3, 3, 1, 2]), (1.1, [3, 3, 1, 2])]
)
def test_sjoin_nearest_exclusive(self, max_distance, expected):
geoms = shapely.points(np.arange(3), np.arange(3))