venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,28 +1,28 @@
|
||||
"""Tests for the clip module."""
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import (
|
||||
Polygon,
|
||||
Point,
|
||||
LineString,
|
||||
LinearRing,
|
||||
GeometryCollection,
|
||||
LinearRing,
|
||||
LineString,
|
||||
MultiPoint,
|
||||
Point,
|
||||
Polygon,
|
||||
box,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, GeoSeries, clip
|
||||
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
import pytest
|
||||
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.tools.clip import _mask_is_list_like_rectangle
|
||||
|
||||
pytestmark = pytest.mark.skip_no_sindex
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from pandas.testing import assert_index_equal
|
||||
|
||||
mask_variants_single_rectangle = [
|
||||
"single_rectangle_gdf",
|
||||
"single_rectangle_gdf_list_bounds",
|
||||
@@ -43,6 +43,14 @@ def point_gdf():
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def point_gdf2():
|
||||
"""Create a point GeoDataFrame."""
|
||||
pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
|
||||
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pointsoutside_nooverlap_gdf():
|
||||
"""Create a point GeoDataFrame. Its points are all outside the single
|
||||
@@ -137,7 +145,7 @@ def two_line_gdf():
|
||||
@pytest.fixture
|
||||
def multi_poly_gdf(donut_geometry):
|
||||
"""Create a multi-polygon GeoDataFrame."""
|
||||
multi_poly = donut_geometry.unary_union
|
||||
multi_poly = donut_geometry.union_all()
|
||||
out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
|
||||
out_df["attr"] = ["pool"]
|
||||
return out_df
|
||||
@@ -148,7 +156,7 @@ def multi_line(two_line_gdf):
|
||||
"""Create a multi-line GeoDataFrame.
|
||||
This GDF has one multiline and one regular line."""
|
||||
# Create a single and multi line object
|
||||
multiline_feat = two_line_gdf.unary_union
|
||||
multiline_feat = two_line_gdf.union_all()
|
||||
linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
|
||||
out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
|
||||
out_df["attr"] = ["road", "stream"]
|
||||
@@ -158,7 +166,7 @@ def multi_line(two_line_gdf):
|
||||
@pytest.fixture
|
||||
def multi_point(point_gdf):
|
||||
"""Create a multi-point GeoDataFrame."""
|
||||
multi_point = point_gdf.unary_union
|
||||
multi_point = point_gdf.union_all()
|
||||
out_df = GeoDataFrame(
|
||||
geometry=GeoSeries(
|
||||
[multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
|
||||
@@ -321,7 +329,7 @@ class TestClipWithSingleRectangleGdf:
|
||||
)
|
||||
assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
|
||||
shape_for_points = (
|
||||
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.unary_union
|
||||
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
|
||||
)
|
||||
assert all(clipped.intersects(shape_for_points))
|
||||
|
||||
@@ -398,6 +406,7 @@ def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
|
||||
assert "GeometryCollection" in clipped.geom_type[0]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
|
||||
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
|
||||
clip(point_gdf, single_rectangle_gdf.to_crs(4326))
|
||||
@@ -460,3 +469,16 @@ def test_clip_empty_mask(buffered_locations, mask):
|
||||
)
|
||||
clipped = clip(buffered_locations.geometry, mask)
|
||||
assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
|
||||
|
||||
|
||||
def test_clip_sorting(point_gdf2):
|
||||
"""Test the sorting kwarg in clip"""
|
||||
bbox = shapely.geometry.box(0, 0, 2, 2)
|
||||
unsorted_clipped_gdf = point_gdf2.clip(bbox)
|
||||
sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
|
||||
|
||||
expected_sorted_index = pd.Index([1, 3, 5])
|
||||
|
||||
assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
|
||||
assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
|
||||
assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import numpy as np
|
||||
|
||||
from shapely.geometry import Point
|
||||
from shapely.wkt import loads
|
||||
|
||||
|
||||
@@ -1,28 +1,46 @@
|
||||
import pytest
|
||||
import numpy
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
|
||||
import geopandas
|
||||
from geopandas.tools._random import uniform
|
||||
|
||||
multipolygons = geopandas.read_file(geopandas.datasets.get_path("nybb")).geometry
|
||||
polygons = multipolygons.explode(ignore_index=True).geometry
|
||||
multilinestrings = multipolygons.boundary
|
||||
linestrings = polygons.boundary
|
||||
points = multipolygons.centroid
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multipolygons(nybb_filename):
|
||||
return geopandas.read_file(nybb_filename).geometry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def polygons(multipolygons):
|
||||
return multipolygons.explode(ignore_index=True).geometry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multilinestrings(multipolygons):
|
||||
return multipolygons.boundary
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def linestrings(polygons):
|
||||
return polygons.boundary
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def points(multipolygons):
|
||||
return multipolygons.centroid
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
|
||||
reason="array input in interpolate not implemented for shapely<2",
|
||||
)
|
||||
@pytest.mark.parametrize("size", [10, 100])
|
||||
@pytest.mark.parametrize(
|
||||
"geom", [multipolygons[0], polygons[0], multilinestrings[0], linestrings[0]]
|
||||
"geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
|
||||
)
|
||||
def test_uniform(geom, size):
|
||||
def test_uniform(geom_fixture, size, request):
|
||||
geom = request.getfixturevalue(geom_fixture)[0]
|
||||
sample = uniform(geom, size=size, rng=1)
|
||||
sample_series = geopandas.GeoSeries(sample).explode().reset_index(drop=True)
|
||||
sample_series = (
|
||||
geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
|
||||
)
|
||||
assert len(sample_series) == size
|
||||
sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
|
||||
geom, predicate="intersects"
|
||||
@@ -30,21 +48,13 @@ def test_uniform(geom, size):
|
||||
assert len(sample_in_geom) == size
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
|
||||
reason="array input in interpolate not implemented for shapely<2",
|
||||
)
|
||||
def test_uniform_unsupported():
|
||||
def test_uniform_unsupported(points):
|
||||
with pytest.warns(UserWarning, match="Sampling is not supported"):
|
||||
sample = uniform(points[0], size=10, rng=1)
|
||||
assert sample.is_empty
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
|
||||
reason="array input in interpolate not implemented for shapely<2",
|
||||
)
|
||||
def test_uniform_generator():
|
||||
def test_uniform_generator(polygons):
|
||||
sample = uniform(polygons[0], size=10, rng=1)
|
||||
sample2 = uniform(polygons[0], size=10, rng=1)
|
||||
assert sample.equals(sample2)
|
||||
|
||||
@@ -3,23 +3,24 @@ from typing import Sequence
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import shapely
|
||||
|
||||
from shapely.geometry import Point, Polygon, GeometryCollection
|
||||
import shapely
|
||||
from shapely.geometry import GeometryCollection, Point, Polygon, box
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, GeoSeries, read_file, sjoin, sjoin_nearest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from geopandas import (
|
||||
GeoDataFrame,
|
||||
GeoSeries,
|
||||
points_from_xy,
|
||||
read_file,
|
||||
sjoin,
|
||||
sjoin_nearest,
|
||||
)
|
||||
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
import pytest
|
||||
|
||||
|
||||
TEST_NEAREST = compat.USE_SHAPELY_20 or (compat.PYGEOS_GE_010 and compat.USE_PYGEOS)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skip_no_sindex
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@@ -95,6 +96,52 @@ def dfs(request):
|
||||
return [request.param, df1, df2, expected]
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def dfs_shared_attribute():
|
||||
geo_left = [
|
||||
Point(0, 0),
|
||||
Point(1, 1),
|
||||
Point(2, 2),
|
||||
Point(3, 3),
|
||||
Point(4, 4),
|
||||
Point(5, 5),
|
||||
Point(6, 6),
|
||||
Point(7, 7),
|
||||
]
|
||||
geo_right = [
|
||||
Point(0, 0),
|
||||
Point(1, 1),
|
||||
Point(2, 2),
|
||||
Point(3, 3),
|
||||
Point(4, 4),
|
||||
Point(5, 5),
|
||||
Point(6, 6),
|
||||
Point(7, 7),
|
||||
]
|
||||
attr_tracker = ["A", "B", "C", "D", "E", "F", "G", "H"]
|
||||
|
||||
left_gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": geo_left,
|
||||
"attr_tracker": attr_tracker,
|
||||
"duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||
"attr1": [True, True, True, True, True, True, True, True],
|
||||
"attr2": [True, True, True, True, True, True, True, True],
|
||||
}
|
||||
)
|
||||
|
||||
right_gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": geo_right,
|
||||
"duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||
"attr1": [True, True, False, False, True, True, False, False],
|
||||
"attr2": [True, True, False, False, False, False, False, False],
|
||||
}
|
||||
)
|
||||
|
||||
return left_gdf, right_gdf
|
||||
|
||||
|
||||
class TestSpatialJoin:
|
||||
@pytest.mark.parametrize(
|
||||
"how, lsuffix, rsuffix, expected_cols",
|
||||
@@ -113,6 +160,7 @@ class TestSpatialJoin:
|
||||
joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
|
||||
assert set(joined.columns) == expected_cols | {"geometry"}
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
@pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True)
|
||||
def test_crs_mismatch(self, dfs):
|
||||
index, df1, df2, expected = dfs
|
||||
@@ -120,31 +168,6 @@ class TestSpatialJoin:
|
||||
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
|
||||
sjoin(df1, df2)
|
||||
|
||||
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
|
||||
@pytest.mark.parametrize("op", ["intersects", "contains", "within"])
|
||||
def test_deprecated_op_param(self, dfs, op):
|
||||
_, df1, df2, _ = dfs
|
||||
with pytest.warns(FutureWarning, match="`op` parameter is deprecated"):
|
||||
sjoin(df1, df2, op=op)
|
||||
|
||||
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
|
||||
@pytest.mark.parametrize("op", ["intersects", "contains", "within"])
|
||||
@pytest.mark.parametrize("predicate", ["contains", "within"])
|
||||
def test_deprecated_op_param_nondefault_predicate(self, dfs, op, predicate):
|
||||
_, df1, df2, _ = dfs
|
||||
match = "use the `predicate` parameter instead"
|
||||
if op != predicate:
|
||||
warntype = UserWarning
|
||||
match = (
|
||||
"`predicate` will be overridden by the value of `op`" # noqa: ISC003
|
||||
+ r"(.|\s)*"
|
||||
+ match
|
||||
)
|
||||
else:
|
||||
warntype = FutureWarning
|
||||
with pytest.warns(warntype, match=match):
|
||||
sjoin(df1, df2, predicate=predicate, op=op)
|
||||
|
||||
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
|
||||
def test_unknown_kwargs(self, dfs):
|
||||
_, df1, df2, _ = dfs
|
||||
@@ -154,7 +177,6 @@ class TestSpatialJoin:
|
||||
):
|
||||
sjoin(df1, df2, extra_param="test")
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"dfs",
|
||||
[
|
||||
@@ -167,12 +189,10 @@ class TestSpatialJoin:
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
|
||||
@pytest.mark.parametrize("predicate_kw", ["predicate", "op"])
|
||||
def test_inner(self, predicate, predicate_kw, dfs):
|
||||
def test_inner(self, predicate, dfs):
|
||||
index, df1, df2, expected = dfs
|
||||
|
||||
res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate})
|
||||
|
||||
res = sjoin(df1, df2, how="inner", predicate=predicate)
|
||||
exp = expected[predicate].dropna().copy()
|
||||
exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"})
|
||||
exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64")
|
||||
@@ -182,7 +202,7 @@ class TestSpatialJoin:
|
||||
].astype("int64")
|
||||
if index == "named-index":
|
||||
exp[["df1_ix", "df2_ix"]] = exp[["df1_ix", "df2_ix"]].astype("int64")
|
||||
exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
|
||||
exp = exp.set_index("df1_ix")
|
||||
if index in ["default-index", "string-index"]:
|
||||
exp = exp.set_index("index_left")
|
||||
exp.index.name = None
|
||||
@@ -192,11 +212,7 @@ class TestSpatialJoin:
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
if index == "named-multi-index":
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
|
||||
columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"])
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -232,7 +248,7 @@ class TestSpatialJoin:
|
||||
res["index_right"] = res["index_right"].astype(float)
|
||||
elif index == "named-index":
|
||||
exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64")
|
||||
exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
|
||||
exp = exp.set_index("df1_ix")
|
||||
if index in ["default-index", "string-index"]:
|
||||
exp = exp.set_index("index_left")
|
||||
exp.index.name = None
|
||||
@@ -242,10 +258,7 @@ class TestSpatialJoin:
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
if index == "named-multi-index":
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
|
||||
columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"])
|
||||
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
@@ -348,7 +361,7 @@ class TestSpatialJoin:
|
||||
res["index_left"] = res["index_left"].astype(float)
|
||||
elif index == "named-index":
|
||||
exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64")
|
||||
exp = exp.set_index("df2_ix").rename(columns={"df1_ix": "index_left"})
|
||||
exp = exp.set_index("df2_ix")
|
||||
if index in ["default-index", "string-index"]:
|
||||
exp = exp.set_index("index_right")
|
||||
exp = exp.reindex(columns=res.columns)
|
||||
@@ -359,20 +372,431 @@ class TestSpatialJoin:
|
||||
)
|
||||
exp.index.names = df2.index.names
|
||||
if index == "named-multi-index":
|
||||
exp = exp.set_index(["df2_ix1", "df2_ix2"]).rename(
|
||||
columns={"df1_ix1": "index_left0", "df1_ix2": "index_left1"}
|
||||
)
|
||||
exp.index.names = df2.index.names
|
||||
exp = exp.set_index(["df2_ix1", "df2_ix2"])
|
||||
|
||||
if predicate == "within":
|
||||
exp = exp.sort_index()
|
||||
|
||||
assert_frame_equal(res, exp, check_index_type=False)
|
||||
|
||||
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="`dwithin` requires GEOS 3.10")
|
||||
@pytest.mark.parametrize("how", ["inner"])
|
||||
@pytest.mark.parametrize(
|
||||
"geo_left, geo_right, expected_left, expected_right, distance",
|
||||
[
|
||||
(
|
||||
# Distance is number, 2x1
|
||||
[Point(0, 0), Point(1, 1)],
|
||||
[Point(1, 1)],
|
||||
[0, 1],
|
||||
[0, 0],
|
||||
math.sqrt(2),
|
||||
),
|
||||
# Distance is number, 2x2
|
||||
(
|
||||
[Point(0, 0), Point(1, 1)],
|
||||
[Point(0, 0), Point(1, 1)],
|
||||
[0, 1, 0, 1],
|
||||
[0, 0, 1, 1],
|
||||
math.sqrt(2),
|
||||
),
|
||||
# Distance is array, matches len(left)
|
||||
(
|
||||
[Point(0, 0), Point(0, 0), Point(-1, -1)],
|
||||
[Point(1, 1)],
|
||||
[1, 2],
|
||||
[0, 0],
|
||||
[0, math.sqrt(2), math.sqrt(8)],
|
||||
),
|
||||
# Distance is np.array, matches len(left),
|
||||
# inner join sorts the right GeoDataFrame
|
||||
(
|
||||
[Point(0, 0), Point(0, 0), Point(-1, -1)],
|
||||
[Point(1, 1), Point(0.5, 0.5)],
|
||||
[1, 2, 1, 2],
|
||||
[1, 1, 0, 0],
|
||||
np.array([0, math.sqrt(2), math.sqrt(8)]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sjoin_dwithin(
|
||||
self,
|
||||
geo_left,
|
||||
geo_right,
|
||||
expected_left: Sequence[int],
|
||||
expected_right: Sequence[int],
|
||||
distance,
|
||||
how,
|
||||
):
|
||||
left = geopandas.GeoDataFrame({"geometry": geo_left})
|
||||
right = geopandas.GeoDataFrame({"geometry": geo_right})
|
||||
expected_gdf = left.iloc[expected_left].copy()
|
||||
expected_gdf["index_right"] = expected_right
|
||||
joined = sjoin(left, right, how=how, predicate="dwithin", distance=distance)
|
||||
assert_frame_equal(expected_gdf.sort_index(), joined.sort_index())
|
||||
|
||||
# GH3239
|
||||
@pytest.mark.parametrize(
|
||||
"predicate",
|
||||
[
|
||||
"contains",
|
||||
"contains_properly",
|
||||
"covered_by",
|
||||
"covers",
|
||||
"crosses",
|
||||
"intersects",
|
||||
"touches",
|
||||
"within",
|
||||
],
|
||||
)
|
||||
def test_sjoin_left_order(self, predicate):
|
||||
# a set of points in random order -> that order should be preserved
|
||||
# with a left join
|
||||
pts = GeoDataFrame(
|
||||
geometry=points_from_xy([0.1, 0.4, 0.3, 0.7], [0.8, 0.6, 0.9, 0.1])
|
||||
)
|
||||
polys = GeoDataFrame(
|
||||
{"id": [1, 2, 3, 4]},
|
||||
geometry=[
|
||||
box(0, 0, 0.5, 0.5),
|
||||
box(0, 0.5, 0.5, 1),
|
||||
box(0.5, 0, 1, 0.5),
|
||||
box(0.5, 0.5, 1, 1),
|
||||
],
|
||||
)
|
||||
|
||||
joined = sjoin(pts, polys, predicate=predicate, how="left")
|
||||
assert_index_equal(joined.index, pts.index)
|
||||
|
||||
def test_sjoin_shared_attribute(self, naturalearth_lowres, naturalearth_cities):
|
||||
countries = read_file(naturalearth_lowres)
|
||||
cities = read_file(naturalearth_cities)
|
||||
countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
|
||||
|
||||
# Add first letter of country/city as an attribute column to be compared
|
||||
countries["firstLetter"] = countries["country"].astype(str).str[0]
|
||||
cities["firstLetter"] = cities["name"].astype(str).str[0]
|
||||
|
||||
result = sjoin(cities, countries, on_attribute="firstLetter")
|
||||
assert (
|
||||
result["country"].astype(str).str[0] == result["name"].astype(str).str[0]
|
||||
).all()
|
||||
assert result.shape == (23, 5)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr1_key_change_dict, attr2_key_change_dict",
|
||||
[
|
||||
pytest.param(
|
||||
{True: "merge", False: "no_merge"},
|
||||
{True: "merge", False: "no_merge"},
|
||||
id="merge on string attributes",
|
||||
),
|
||||
pytest.param(
|
||||
{True: 2, False: 1},
|
||||
{True: 2, False: 1},
|
||||
id="merge on integer attributes",
|
||||
),
|
||||
pytest.param(
|
||||
{True: True, False: False},
|
||||
{True: True, False: False},
|
||||
id="merge on boolean attributes",
|
||||
),
|
||||
pytest.param(
|
||||
{True: True, False: False},
|
||||
{True: "merge", False: "no_merge"},
|
||||
id="merge on mixed attributes",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sjoin_multiple_attributes_datatypes(
|
||||
self, dfs_shared_attribute, attr1_key_change_dict, attr2_key_change_dict
|
||||
):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
left_gdf["attr1"] = left_gdf["attr1"].map(attr1_key_change_dict)
|
||||
left_gdf["attr2"] = left_gdf["attr2"].map(attr2_key_change_dict)
|
||||
right_gdf["attr1"] = right_gdf["attr1"].map(attr1_key_change_dict)
|
||||
right_gdf["attr2"] = right_gdf["attr2"].map(attr2_key_change_dict)
|
||||
|
||||
joined = sjoin(left_gdf, right_gdf, on_attribute=("attr1", "attr2"))
|
||||
assert (["A", "B"] == joined["attr_tracker"].values).all()
|
||||
|
||||
def test_sjoin_multiple_attributes_check_header(self, dfs_shared_attribute):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
joined = sjoin(left_gdf, right_gdf, on_attribute=["attr1"])
|
||||
|
||||
assert (["A", "B", "E", "F"] == joined["attr_tracker"].values).all()
|
||||
assert {"attr2_left", "attr2_right", "attr1"}.issubset(joined.columns)
|
||||
assert "attr1_left" not in joined
|
||||
|
||||
def test_sjoin_error_column_does_not_exist(self, dfs_shared_attribute):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
right_gdf_dropped_attr = right_gdf.drop("attr1", axis=1)
|
||||
left_gdf_dropped_attr = left_gdf.drop("attr1", axis=1)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Expected column attr1 is missing from the right dataframe.",
|
||||
):
|
||||
sjoin(left_gdf, right_gdf_dropped_attr, on_attribute="attr1")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Expected column attr1 is missing from the left dataframe.",
|
||||
):
|
||||
sjoin(left_gdf_dropped_attr, right_gdf, on_attribute="attr1")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Expected column attr1 is missing from both of the dataframes.",
|
||||
):
|
||||
sjoin(left_gdf_dropped_attr, right_gdf_dropped_attr, on_attribute="attr1")
|
||||
|
||||
def test_sjoin_error_use_geometry_column(self, dfs_shared_attribute):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Active geometry column cannot be used as an input for "
|
||||
"on_attribute parameter.",
|
||||
):
|
||||
sjoin(left_gdf, right_gdf, on_attribute="geometry")
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Active geometry column cannot be used as an input for "
|
||||
"on_attribute parameter.",
|
||||
):
|
||||
sjoin(left_gdf, right_gdf, on_attribute=["attr1", "geometry"])
|
||||
|
||||
|
||||
class TestIndexNames:
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_preserve_index_names(self, how):
|
||||
# preserve names of both left and right index
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"geometry": geoms}, index=pd.Index([1, 2], name="myidx1"))
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx2")
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"myidx1": [1, 2], "geometry": geoms, "myidx2": ["a", "b"]}
|
||||
).set_index("myidx1")
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"myidx2": ["a", "b"], "myidx1": [1, 2], "geometry": geoms},
|
||||
).set_index("myidx2")
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
# but also add suffixes if both left and right have the same index
|
||||
df1.index.name = "myidx"
|
||||
df2.index.name = "myidx"
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
|
||||
).set_index("myidx_left")
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_right": ["a", "b"], "myidx_left": [1, 2], "geometry": geoms},
|
||||
).set_index("myidx_right")
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_preserve_index_names_multiindex(self, how):
|
||||
# preserve names of both left and right index
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame(
|
||||
{"geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx1", "level2"]
|
||||
),
|
||||
)
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("c", 3), ("d", 4)], names=["myidx2", None]
|
||||
),
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
expected_base = GeoDataFrame(
|
||||
{
|
||||
"myidx1": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
"geometry": geoms,
|
||||
"myidx2": ["c", "d"],
|
||||
"index_right1": [3, 4],
|
||||
}
|
||||
)
|
||||
if how in ("inner", "left"):
|
||||
expected = expected_base.set_index(["myidx1", "level2"])
|
||||
else:
|
||||
# right join
|
||||
expected = expected_base.set_index(["myidx2", "index_right1"])
|
||||
# if it was originally None, that is preserved
|
||||
expected.index.names = ["myidx2", None]
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
# but also add suffixes if both left and right have the same index
|
||||
df1.index.names = ["myidx", "level2"]
|
||||
df2.index.names = ["myidx", None]
|
||||
result = sjoin(df1, df2, how=how)
|
||||
expected_base = GeoDataFrame(
|
||||
{
|
||||
"myidx_left": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
"geometry": geoms,
|
||||
"myidx_right": ["c", "d"],
|
||||
"index_right1": [3, 4],
|
||||
}
|
||||
)
|
||||
if how in ("inner", "left"):
|
||||
expected = expected_base.set_index(["myidx_left", "level2"])
|
||||
else:
|
||||
# right join
|
||||
expected = expected_base.set_index(["myidx_right", "index_right1"])
|
||||
# if it was originally None, that is preserved
|
||||
expected.index.names = ["myidx_right", None]
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_duplicate_column_index_name(self, how):
|
||||
# case where a left column and the right index have the same name or the
|
||||
# other way around -> correctly add suffix or preserve index name
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx")
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
|
||||
index=pd.Index(["a", "b"], name="myidx_right"),
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
result = sjoin(df2, df1, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
|
||||
index=pd.Index(["a", "b"], name="myidx_left"),
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_left": ["a", "b"], "myidx_right": [1, 2], "geometry": geoms},
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_duplicate_column_index_name_multiindex(self, how):
|
||||
# case where a left column and the right index have the same name or the
|
||||
# other way around -> correctly add suffix or preserve index name
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx", "level2"]
|
||||
),
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"myidx_left": [1, 2],
|
||||
"geometry": geoms,
|
||||
"myidx_right": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
}
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx_right", "level2"]
|
||||
),
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
result = sjoin(df2, df1, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx_left", "level2"]
|
||||
),
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"myidx_left": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
"myidx_right": [1, 2],
|
||||
"geometry": geoms,
|
||||
},
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_conflicting_column_index_name(self, how):
|
||||
# test case where the auto-generated index name conflicts
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"index_right": [1, 2], "geometry": geoms})
|
||||
df2 = GeoDataFrame({"geometry": geoms})
|
||||
with pytest.raises(ValueError, match="'index_right' cannot be a column name"):
|
||||
sjoin(df1, df2, how=how)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_conflicting_column_with_suffix(self, how):
|
||||
# test case where the auto-generated index name conflicts
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame(
|
||||
{"column": [1, 2], "column_right": ["a", "b"], "geometry": geoms}
|
||||
)
|
||||
df2 = GeoDataFrame({"column": [0.1, 0.2], "geometry": geoms})
|
||||
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{1: [1, 2], 2: ["a", "b"], 3: geoms, 4: [0, 1], 5: [0.1, 0.2]}
|
||||
)
|
||||
expected.columns = [
|
||||
"column_left",
|
||||
"column_right",
|
||||
"geometry",
|
||||
"index_right",
|
||||
"column_right",
|
||||
]
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{1: [0, 1], 2: [1, 2], 3: ["a", "b"], 4: [0.1, 0.2], 5: geoms}
|
||||
)
|
||||
expected.columns = [
|
||||
"index_left",
|
||||
"column_left",
|
||||
"column_right",
|
||||
"column_right",
|
||||
"geometry",
|
||||
]
|
||||
expected = expected.set_geometry("geometry")
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("_setup_class_nybb_filename")
|
||||
class TestSpatialJoinNYBB:
|
||||
def setup_method(self):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
self.polydf = read_file(nybb_filename)
|
||||
self.polydf = read_file(self.nybb_filename)
|
||||
self.crs = self.polydf.crs
|
||||
N = 20
|
||||
b = [int(x) for x in self.polydf.total_bounds]
|
||||
@@ -527,7 +951,7 @@ class TestSpatialJoinNYBB:
|
||||
|
||||
def test_sjoin_empty_geometries(self):
|
||||
# https://github.com/geopandas/geopandas/issues/944
|
||||
empty = GeoDataFrame(geometry=[GeometryCollection()] * 3)
|
||||
empty = GeoDataFrame(geometry=[GeometryCollection()] * 3, crs=self.crs)
|
||||
df = sjoin(pd.concat([self.pointdf, empty]), self.polydf, how="left")
|
||||
assert df.shape == (24, 8)
|
||||
df2 = sjoin(self.pointdf, pd.concat([self.polydf, empty]), how="left")
|
||||
@@ -542,8 +966,8 @@ class TestSpatialJoinNYBB:
|
||||
assert sjoin(empty, self.pointdf, how="inner", predicate=predicate).empty
|
||||
assert sjoin(empty, self.pointdf, how="left", predicate=predicate).empty
|
||||
|
||||
def test_empty_sjoin_return_duplicated_columns(self):
|
||||
nybb = geopandas.read_file(geopandas.datasets.get_path("nybb"))
|
||||
def test_empty_sjoin_return_duplicated_columns(self, nybb_filename):
|
||||
nybb = geopandas.read_file(nybb_filename)
|
||||
nybb2 = nybb.copy()
|
||||
nybb2.geometry = nybb2.translate(200000) # to get non-overlapping
|
||||
|
||||
@@ -553,45 +977,24 @@ class TestSpatialJoinNYBB:
|
||||
assert "BoroCode_left" in result.columns
|
||||
|
||||
|
||||
class TestSpatialJoinNaturalEarth:
|
||||
def setup_method(self):
|
||||
world_path = geopandas.datasets.get_path("naturalearth_lowres")
|
||||
cities_path = geopandas.datasets.get_path("naturalearth_cities")
|
||||
self.world = read_file(world_path)
|
||||
self.cities = read_file(cities_path)
|
||||
|
||||
def test_sjoin_inner(self):
|
||||
# GH637
|
||||
countries = self.world[["geometry", "name"]]
|
||||
countries = countries.rename(columns={"name": "country"})
|
||||
cities_with_country = sjoin(
|
||||
self.cities, countries, how="inner", predicate="intersects"
|
||||
)
|
||||
assert cities_with_country.shape == (213, 4)
|
||||
@pytest.fixture
|
||||
def world(naturalearth_lowres):
|
||||
return read_file(naturalearth_lowres)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
TEST_NEAREST,
|
||||
reason=("This test can only be run _without_ PyGEOS >= 0.10 installed"),
|
||||
)
|
||||
def test_no_nearest_all():
|
||||
df1 = geopandas.GeoDataFrame({"geometry": []})
|
||||
df2 = geopandas.GeoDataFrame({"geometry": []})
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports",
|
||||
):
|
||||
sjoin_nearest(df1, df2)
|
||||
@pytest.fixture
|
||||
def cities(naturalearth_cities):
|
||||
return read_file(naturalearth_cities)
|
||||
|
||||
|
||||
def test_sjoin_inner(world, cities):
|
||||
# GH637
|
||||
countries = world[["geometry", "name"]]
|
||||
countries = countries.rename(columns={"name": "country"})
|
||||
cities_with_country = sjoin(cities, countries, how="inner", predicate="intersects")
|
||||
assert cities_with_country.shape == (213, 4)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not TEST_NEAREST,
|
||||
reason=(
|
||||
"PyGEOS >= 0.10.0"
|
||||
" must be installed and activated via the geopandas.compat module to"
|
||||
" test sjoin_nearest"
|
||||
),
|
||||
)
|
||||
class TestNearest:
|
||||
@pytest.mark.parametrize(
|
||||
"how_kwargs", ({}, {"how": "inner"}, {"how": "left"}, {"how": "right"})
|
||||
@@ -900,10 +1303,10 @@ class TestNearest:
|
||||
assert_geodataframe_equal(expected_gdf, joined)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS")
|
||||
def test_sjoin_nearest_inner(self):
|
||||
def test_sjoin_nearest_inner(self, naturalearth_lowres, naturalearth_cities):
|
||||
# check equivalency of left and inner join
|
||||
countries = read_file(geopandas.datasets.get_path("naturalearth_lowres"))
|
||||
cities = read_file(geopandas.datasets.get_path("naturalearth_cities"))
|
||||
countries = read_file(naturalearth_lowres)
|
||||
cities = read_file(naturalearth_cities)
|
||||
countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
|
||||
|
||||
# default: inner and left give the same result
|
||||
@@ -927,19 +1330,8 @@ class TestNearest:
|
||||
result5["index_right"] = result5["index_right"].astype("int64")
|
||||
assert_geodataframe_equal(result5, result4, check_like=True)
|
||||
|
||||
expected_index_uncapped = (
|
||||
[1, 3, 3, 1, 2] if compat.PANDAS_GE_22 else [1, 1, 3, 3, 2]
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_SHAPELY_20),
|
||||
reason=(
|
||||
"shapely >= 2.0 is required to run sjoin_nearest"
|
||||
"with parameter `exclusive` set"
|
||||
),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"max_distance,expected", [(None, expected_index_uncapped), (1.1, [3, 3, 1, 2])]
|
||||
"max_distance,expected", [(None, [1, 3, 3, 1, 2]), (1.1, [3, 3, 1, 2])]
|
||||
)
|
||||
def test_sjoin_nearest_exclusive(self, max_distance, expected):
|
||||
geoms = shapely.points(np.arange(3), np.arange(3))
|
||||
|
||||
Reference in New Issue
Block a user