venv
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
from .clip import clip
|
||||
from .geocoding import geocode, reverse_geocode
|
||||
from .overlay import overlay
|
||||
from .sjoin import sjoin, sjoin_nearest
|
||||
from .util import collect
|
||||
from .clip import clip
|
||||
|
||||
__all__ = [
|
||||
"collect",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,6 +1,7 @@
|
||||
from warnings import warn
|
||||
|
||||
import numpy
|
||||
|
||||
from shapely.geometry import MultiPoint
|
||||
|
||||
from geopandas.array import from_shapely, points_from_xy
|
||||
@@ -64,7 +65,7 @@ def _uniform_line(geom, size, generator):
|
||||
"""
|
||||
|
||||
fracs = generator.uniform(size=size)
|
||||
return from_shapely(geom.interpolate(fracs, normalized=True)).unary_union()
|
||||
return from_shapely(geom.interpolate(fracs, normalized=True)).union_all()
|
||||
|
||||
|
||||
def _uniform_polygon(geom, size, generator):
|
||||
@@ -80,4 +81,4 @@ def _uniform_polygon(geom, size, generator):
|
||||
)
|
||||
valid_samples = batch[batch.sindex.query(geom, predicate="contains")]
|
||||
candidates.extend(valid_samples)
|
||||
return GeoSeries(candidates[:size]).unary_union
|
||||
return GeoSeries(candidates[:size]).union_all()
|
||||
|
||||
@@ -58,33 +58,27 @@ def _get_C_info():
|
||||
geos_dir = None
|
||||
|
||||
try:
|
||||
import fiona
|
||||
import pyogrio
|
||||
|
||||
gdal_version = fiona.env.get_gdal_release_name()
|
||||
gdal_version = pyogrio.__gdal_version_string__
|
||||
gdal_dir = pyogrio.get_gdal_data_path()
|
||||
except Exception:
|
||||
gdal_version = None
|
||||
try:
|
||||
import fiona
|
||||
|
||||
gdal_dir = fiona.env.GDALDataFinder().search()
|
||||
except Exception:
|
||||
gdal_dir = None
|
||||
|
||||
if gdal_version is None:
|
||||
try:
|
||||
import pyogrio
|
||||
import fiona
|
||||
|
||||
gdal_version = pyogrio.__gdal_version_string__
|
||||
gdal_dir = None
|
||||
gdal_version = fiona.env.get_gdal_release_name()
|
||||
except Exception:
|
||||
pass
|
||||
gdal_version = None
|
||||
try:
|
||||
# get_gdal_data_path is only available in pyogrio >= 0.4.2
|
||||
from pyogrio import get_gdal_data_path
|
||||
import fiona
|
||||
|
||||
gdal_dir = get_gdal_data_path()
|
||||
gdal_dir = fiona.env.GDALDataFinder().search()
|
||||
except Exception:
|
||||
pass
|
||||
gdal_dir = None
|
||||
|
||||
blob = [
|
||||
("GEOS", geos_version),
|
||||
@@ -114,16 +108,15 @@ def _get_deps_info():
|
||||
"pyproj",
|
||||
"shapely",
|
||||
# optional deps
|
||||
"fiona",
|
||||
"pyogrio",
|
||||
"geoalchemy2",
|
||||
"geopy",
|
||||
"matplotlib",
|
||||
"mapclassify",
|
||||
"pygeos",
|
||||
"pyogrio",
|
||||
"fiona",
|
||||
"psycopg",
|
||||
"psycopg2",
|
||||
"pyarrow",
|
||||
"rtree",
|
||||
]
|
||||
|
||||
def get_version(module):
|
||||
|
||||
@@ -5,11 +5,13 @@ geopandas.clip
|
||||
A module to clip vector data using GeoPandas.
|
||||
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas.api.types
|
||||
from shapely.geometry import Polygon, MultiPolygon, box
|
||||
|
||||
from shapely.geometry import MultiPolygon, Polygon, box
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas.array import _check_crs, _crs_mismatch_warn
|
||||
@@ -21,7 +23,7 @@ def _mask_is_list_like_rectangle(mask):
|
||||
)
|
||||
|
||||
|
||||
def _clip_gdf_with_mask(gdf, mask):
|
||||
def _clip_gdf_with_mask(gdf, mask, sort=False):
|
||||
"""Clip geometry to the polygon/rectangle extent.
|
||||
|
||||
Clip an input GeoDataFrame to the polygon extent of the polygon
|
||||
@@ -35,6 +37,10 @@ def _clip_gdf_with_mask(gdf, mask):
|
||||
mask : (Multi)Polygon, list-like
|
||||
Reference polygon/rectangle for clipping.
|
||||
|
||||
sort : boolean, default False
|
||||
If True, the results will be sorted in ascending order using the
|
||||
geometries' indexes as the primary key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
@@ -47,7 +53,9 @@ def _clip_gdf_with_mask(gdf, mask):
|
||||
else:
|
||||
intersection_polygon = mask
|
||||
|
||||
gdf_sub = gdf.iloc[gdf.sindex.query(intersection_polygon, predicate="intersects")]
|
||||
gdf_sub = gdf.iloc[
|
||||
gdf.sindex.query(intersection_polygon, predicate="intersects", sort=sort)
|
||||
]
|
||||
|
||||
# For performance reasons points don't need to be intersected with poly
|
||||
non_point_mask = gdf_sub.geom_type != "Point"
|
||||
@@ -60,13 +68,13 @@ def _clip_gdf_with_mask(gdf, mask):
|
||||
if isinstance(gdf_sub, GeoDataFrame):
|
||||
clipped = gdf_sub.copy()
|
||||
if clipping_by_rectangle:
|
||||
clipped.loc[
|
||||
non_point_mask, clipped._geometry_column_name
|
||||
] = gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
|
||||
clipped.loc[non_point_mask, clipped._geometry_column_name] = (
|
||||
gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
|
||||
)
|
||||
else:
|
||||
clipped.loc[
|
||||
non_point_mask, clipped._geometry_column_name
|
||||
] = gdf_sub.geometry.values[non_point_mask].intersection(mask)
|
||||
clipped.loc[non_point_mask, clipped._geometry_column_name] = (
|
||||
gdf_sub.geometry.values[non_point_mask].intersection(mask)
|
||||
)
|
||||
else:
|
||||
# GeoSeries
|
||||
clipped = gdf_sub.copy()
|
||||
@@ -81,7 +89,7 @@ def _clip_gdf_with_mask(gdf, mask):
|
||||
return clipped
|
||||
|
||||
|
||||
def clip(gdf, mask, keep_geom_type=False):
|
||||
def clip(gdf, mask, keep_geom_type=False, sort=False):
|
||||
"""Clip points, lines, or polygon geometries to the mask extent.
|
||||
|
||||
Both layers must be in the same Coordinate Reference System (CRS).
|
||||
@@ -112,6 +120,9 @@ def clip(gdf, mask, keep_geom_type=False):
|
||||
If True, return only geometries of original type in case of intersection
|
||||
resulting in multiple geometry types or GeometryCollections.
|
||||
If False, return all resulting geometries (potentially mixed-types).
|
||||
sort : boolean, default False
|
||||
If True, the results will be sorted in ascending order using the
|
||||
geometries' indexes as the primary key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -185,11 +196,11 @@ def clip(gdf, mask, keep_geom_type=False):
|
||||
return gdf.iloc[:0]
|
||||
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
combined_mask = mask.geometry.unary_union
|
||||
combined_mask = mask.geometry.union_all()
|
||||
else:
|
||||
combined_mask = mask
|
||||
|
||||
clipped = _clip_gdf_with_mask(gdf, combined_mask)
|
||||
clipped = _clip_gdf_with_mask(gdf, combined_mask, sort=sort)
|
||||
|
||||
if keep_geom_type:
|
||||
geomcoll_concat = (clipped.geom_type == "GeometryCollection").any()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from collections import defaultdict
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@@ -120,8 +120,8 @@ def reverse_geocode(points, provider=None, **kwargs):
|
||||
|
||||
def _query(data, forward, provider, throttle_time, **kwargs):
|
||||
# generic wrapper for calls over lists to geopy Geocoders
|
||||
from geopy.geocoders.base import GeocoderQueryError
|
||||
from geopy.geocoders import get_geocoder_for_service
|
||||
from geopy.geocoders.base import GeocoderQueryError
|
||||
|
||||
if forward:
|
||||
if not isinstance(data, pd.Series):
|
||||
|
||||
@@ -4,8 +4,8 @@ from functools import reduce
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import _compat as compat
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import PANDAS_GE_30
|
||||
from geopandas.array import _check_crs, _crs_mismatch_warn
|
||||
|
||||
|
||||
@@ -15,12 +15,15 @@ def _ensure_geometry_column(df):
|
||||
If another column with that name exists, it will be dropped.
|
||||
"""
|
||||
if not df._geometry_column_name == "geometry":
|
||||
if "geometry" in df.columns:
|
||||
df.drop("geometry", axis=1, inplace=True)
|
||||
df.rename(
|
||||
columns={df._geometry_column_name: "geometry"}, copy=False, inplace=True
|
||||
)
|
||||
df.set_geometry("geometry", inplace=True)
|
||||
if PANDAS_GE_30:
|
||||
if "geometry" in df.columns:
|
||||
df = df.drop("geometry", axis=1)
|
||||
df = df.rename_geometry("geometry")
|
||||
else:
|
||||
if "geometry" in df.columns:
|
||||
df.drop("geometry", axis=1, inplace=True)
|
||||
df.rename_geometry("geometry", inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def _overlay_intersection(df1, df2):
|
||||
@@ -37,7 +40,7 @@ def _overlay_intersection(df1, df2):
|
||||
right.reset_index(drop=True, inplace=True)
|
||||
intersections = left.intersection(right)
|
||||
poly_ix = intersections.geom_type.isin(["Polygon", "MultiPolygon"])
|
||||
intersections.loc[poly_ix] = intersections[poly_ix].buffer(0)
|
||||
intersections.loc[poly_ix] = intersections[poly_ix].make_valid()
|
||||
|
||||
# only keep actual intersecting geometries
|
||||
pairs_intersect = pd.DataFrame({"__idx1": idx1, "__idx2": idx2})
|
||||
@@ -66,8 +69,8 @@ def _overlay_intersection(df1, df2):
|
||||
right_index=True,
|
||||
suffixes=("_1", "_2"),
|
||||
)
|
||||
result["__idx1"] = None
|
||||
result["__idx2"] = None
|
||||
result["__idx1"] = np.nan
|
||||
result["__idx2"] = np.nan
|
||||
return result[
|
||||
result.columns.drop(df1.geometry.name).tolist() + [df1.geometry.name]
|
||||
]
|
||||
@@ -94,10 +97,7 @@ def _overlay_difference(df1, df2):
|
||||
new_g.append(new)
|
||||
differences = GeoSeries(new_g, index=df1.index, crs=df1.crs)
|
||||
poly_ix = differences.geom_type.isin(["Polygon", "MultiPolygon"])
|
||||
if compat.USE_PYGEOS:
|
||||
differences.loc[poly_ix] = differences[poly_ix].make_valid()
|
||||
else:
|
||||
differences.loc[poly_ix] = differences[poly_ix].buffer(0)
|
||||
differences.loc[poly_ix] = differences[poly_ix].make_valid()
|
||||
geom_diff = differences[~differences.is_empty].copy()
|
||||
dfdiff = df1[~differences.is_empty].copy()
|
||||
dfdiff[dfdiff._geometry_column_name] = geom_diff
|
||||
@@ -115,8 +115,8 @@ def _overlay_symmetric_diff(df1, df2):
|
||||
dfdiff1["__idx2"] = np.nan
|
||||
dfdiff2["__idx1"] = np.nan
|
||||
# ensure geometry name (otherwise merge goes wrong)
|
||||
_ensure_geometry_column(dfdiff1)
|
||||
_ensure_geometry_column(dfdiff2)
|
||||
dfdiff1 = _ensure_geometry_column(dfdiff1)
|
||||
dfdiff2 = _ensure_geometry_column(dfdiff2)
|
||||
# combine both 'difference' dataframes
|
||||
dfsym = dfdiff1.merge(
|
||||
dfdiff2, on=["__idx1", "__idx2"], how="outer", suffixes=("_1", "_2")
|
||||
@@ -170,7 +170,7 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
|
||||
which will set keep_geom_type to True but warn upon dropping
|
||||
geometries.
|
||||
make_valid : bool, default True
|
||||
If True, any invalid input geometries are corrected with a call to `buffer(0)`,
|
||||
If True, any invalid input geometries are corrected with a call to make_valid(),
|
||||
if False, a `ValueError` is raised if any input geometries are invalid.
|
||||
|
||||
Returns
|
||||
@@ -190,40 +190,40 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
|
||||
>>> df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2_data':[1,2]})
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='union')
|
||||
df1_data df2_data geometry
|
||||
0 1.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
|
||||
1 2.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
|
||||
2 2.0 2.0 POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
|
||||
3 1.0 NaN POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
|
||||
4 2.0 NaN MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...
|
||||
5 NaN 1.0 MULTIPOLYGON (((2.00000 3.00000, 2.00000 2.000...
|
||||
6 NaN 2.0 POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....
|
||||
df1_data df2_data geometry
|
||||
0 1.0 1.0 POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
|
||||
1 2.0 1.0 POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
|
||||
2 2.0 2.0 POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
|
||||
3 1.0 NaN POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
|
||||
4 2.0 NaN MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
|
||||
5 NaN 1.0 MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
|
||||
6 NaN 2.0 POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='intersection')
|
||||
df1_data df2_data geometry
|
||||
0 1 1 POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
|
||||
1 2 1 POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
|
||||
2 2 2 POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
|
||||
df1_data df2_data geometry
|
||||
0 1 1 POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
|
||||
1 2 1 POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
|
||||
2 2 2 POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='symmetric_difference')
|
||||
df1_data df2_data geometry
|
||||
0 1.0 NaN POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
|
||||
1 2.0 NaN MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...
|
||||
2 NaN 1.0 MULTIPOLYGON (((2.00000 3.00000, 2.00000 2.000...
|
||||
3 NaN 2.0 POLYGON ((3.00000 5.00000, 5.00000 5.00000, 5....
|
||||
df1_data df2_data geometry
|
||||
0 1.0 NaN POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
|
||||
1 2.0 NaN MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
|
||||
2 NaN 1.0 MULTIPOLYGON (((2 3, 2 2, 1 2, 1 3, 2 3)), ((3...
|
||||
3 NaN 2.0 POLYGON ((3 5, 5 5, 5 3, 4 3, 4 4, 3 4, 3 5))
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='difference')
|
||||
geometry df1_data
|
||||
0 POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0.... 1
|
||||
1 MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000... 2
|
||||
geometry df1_data
|
||||
0 POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0)) 1
|
||||
1 MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4... 2
|
||||
|
||||
>>> geopandas.overlay(df1, df2, how='identity')
|
||||
df1_data df2_data geometry
|
||||
0 1.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 1.00000, 1....
|
||||
1 2.0 1.0 POLYGON ((2.00000 2.00000, 2.00000 3.00000, 3....
|
||||
2 2.0 2.0 POLYGON ((4.00000 4.00000, 4.00000 3.00000, 3....
|
||||
3 1.0 NaN POLYGON ((2.00000 0.00000, 0.00000 0.00000, 0....
|
||||
4 2.0 NaN MULTIPOLYGON (((3.00000 4.00000, 3.00000 3.000...
|
||||
0 1.0 1.0 POLYGON ((2 2, 2 1, 1 1, 1 2, 2 2))
|
||||
1 2.0 1.0 POLYGON ((2 2, 2 3, 3 3, 3 2, 2 2))
|
||||
2 2.0 2.0 POLYGON ((4 4, 4 3, 3 3, 3 4, 4 4))
|
||||
3 1.0 NaN POLYGON ((2 0, 0 0, 0 2, 1 2, 1 1, 2 1, 2 0))
|
||||
4 2.0 NaN MULTIPOLYGON (((3 4, 3 3, 2 3, 2 4, 3 4)), ((4...
|
||||
|
||||
See also
|
||||
--------
|
||||
@@ -300,7 +300,7 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
|
||||
mask = ~df.geometry.is_valid
|
||||
col = df._geometry_column_name
|
||||
if make_valid:
|
||||
df.loc[mask, col] = df.loc[mask, col].buffer(0)
|
||||
df.loc[mask, col] = df.loc[mask, col].make_valid()
|
||||
elif mask.any():
|
||||
raise ValueError(
|
||||
"You have passed make_valid=False along with "
|
||||
@@ -362,7 +362,7 @@ def overlay(df1, df2, how="intersection", keep_geom_type=None, make_valid=True):
|
||||
|
||||
# level_0 created with above reset_index operation
|
||||
# and represents the original geometry collections
|
||||
# TODO avoiding dissolve to call unary_union in this case could further
|
||||
# TODO avoiding dissolve to call union_all in this case could further
|
||||
# improve performance (we only need to collect geometries in their
|
||||
# respective Multi version)
|
||||
dissolved = exploded.dissolve(by="level_0")
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from typing import Optional
|
||||
import warnings
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas import _compat as compat
|
||||
from geopandas._compat import PANDAS_GE_30
|
||||
from geopandas.array import _check_crs, _crs_mismatch_warn
|
||||
|
||||
|
||||
@@ -16,6 +17,8 @@ def sjoin(
|
||||
predicate="intersects",
|
||||
lsuffix="left",
|
||||
rsuffix="right",
|
||||
distance=None,
|
||||
on_attribute=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Spatial join of two GeoDataFrames.
|
||||
@@ -43,6 +46,16 @@ def sjoin(
|
||||
Suffix to apply to overlapping column names (left GeoDataFrame).
|
||||
rsuffix : string, default 'right'
|
||||
Suffix to apply to overlapping column names (right GeoDataFrame).
|
||||
distance : number or array_like, optional
|
||||
Distance(s) around each input geometry within which to query the tree
|
||||
for the 'dwithin' predicate. If array_like, must be
|
||||
one-dimesional with length equal to length of left GeoDataFrame.
|
||||
Required if ``predicate='dwithin'``.
|
||||
on_attribute : string, list or tuple
|
||||
Column name(s) to join on as an additional join restriction on top
|
||||
of the spatial predicate. These must be found in both DataFrames.
|
||||
If set, observations are joined only if the predicate applies
|
||||
and values in specified columns match.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -74,12 +87,12 @@ def sjoin(
|
||||
|
||||
>>> groceries_w_communities = geopandas.sjoin(groceries, chicago)
|
||||
>>> groceries_w_communities.head() # doctest: +SKIP
|
||||
OBJECTID Ycoord Xcoord ... GonorrF GonorrM Tuberc
|
||||
0 16 41.973266 -87.657073 ... 170.8 468.7 13.6
|
||||
87 365 41.961707 -87.654058 ... 170.8 468.7 13.6
|
||||
90 373 41.963131 -87.656352 ... 170.8 468.7 13.6
|
||||
140 582 41.969131 -87.674882 ... 170.8 468.7 13.6
|
||||
1 18 41.696367 -87.681315 ... 800.5 741.1 2.6
|
||||
OBJECTID community geometry
|
||||
0 16 UPTOWN MULTIPOINT ((-87.65661 41.97321))
|
||||
1 18 MORGAN PARK MULTIPOINT ((-87.68136 41.69713))
|
||||
2 22 NEAR WEST SIDE MULTIPOINT ((-87.63918 41.86847))
|
||||
3 23 NEAR WEST SIDE MULTIPOINT ((-87.65495 41.87783))
|
||||
4 27 CHATHAM MULTIPOINT ((-87.62715 41.73623))
|
||||
[5 rows x 95 columns]
|
||||
|
||||
See also
|
||||
@@ -92,40 +105,42 @@ def sjoin(
|
||||
Every operation in GeoPandas is planar, i.e. the potential third
|
||||
dimension is not taken into account.
|
||||
"""
|
||||
if "op" in kwargs:
|
||||
op = kwargs.pop("op")
|
||||
deprecation_message = (
|
||||
"The `op` parameter is deprecated and will be removed"
|
||||
" in a future release. Please use the `predicate` parameter"
|
||||
" instead."
|
||||
)
|
||||
if predicate != "intersects" and op != predicate:
|
||||
override_message = (
|
||||
"A non-default value for `predicate` was passed"
|
||||
f' (got `predicate="{predicate}"`'
|
||||
f' in combination with `op="{op}"`).'
|
||||
" The value of `predicate` will be overridden by the value of `op`,"
|
||||
" , which may result in unexpected behavior."
|
||||
f"\n{deprecation_message}"
|
||||
)
|
||||
warnings.warn(override_message, UserWarning, stacklevel=4)
|
||||
else:
|
||||
warnings.warn(deprecation_message, FutureWarning, stacklevel=4)
|
||||
predicate = op
|
||||
if kwargs:
|
||||
first = next(iter(kwargs.keys()))
|
||||
raise TypeError(f"sjoin() got an unexpected keyword argument '{first}'")
|
||||
|
||||
_basic_checks(left_df, right_df, how, lsuffix, rsuffix)
|
||||
on_attribute = _maybe_make_list(on_attribute)
|
||||
|
||||
indices = _geom_predicate_query(left_df, right_df, predicate)
|
||||
_basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=on_attribute),
|
||||
|
||||
joined = _frame_join(indices, left_df, right_df, how, lsuffix, rsuffix)
|
||||
indices = _geom_predicate_query(
|
||||
left_df, right_df, predicate, distance, on_attribute=on_attribute
|
||||
)
|
||||
|
||||
joined, _ = _frame_join(
|
||||
left_df,
|
||||
right_df,
|
||||
indices,
|
||||
None,
|
||||
how,
|
||||
lsuffix,
|
||||
rsuffix,
|
||||
predicate,
|
||||
on_attribute=on_attribute,
|
||||
)
|
||||
|
||||
return joined
|
||||
|
||||
|
||||
def _basic_checks(left_df, right_df, how, lsuffix, rsuffix):
|
||||
def _maybe_make_list(obj):
|
||||
if isinstance(obj, tuple):
|
||||
return list(obj)
|
||||
if obj is not None and not isinstance(obj, list):
|
||||
return [obj]
|
||||
return obj
|
||||
|
||||
|
||||
def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
|
||||
"""Checks the validity of join input parameters.
|
||||
|
||||
`how` must be one of the valid options.
|
||||
@@ -142,6 +157,8 @@ def _basic_checks(left_df, right_df, how, lsuffix, rsuffix):
|
||||
left index suffix
|
||||
rsuffix : str
|
||||
right index suffix
|
||||
on_attribute : list, default None
|
||||
list of column names to merge on along with geometry
|
||||
"""
|
||||
if not isinstance(left_df, GeoDataFrame):
|
||||
raise ValueError(
|
||||
@@ -162,20 +179,28 @@ def _basic_checks(left_df, right_df, how, lsuffix, rsuffix):
|
||||
if not _check_crs(left_df, right_df):
|
||||
_crs_mismatch_warn(left_df, right_df, stacklevel=4)
|
||||
|
||||
index_left = "index_{}".format(lsuffix)
|
||||
index_right = "index_{}".format(rsuffix)
|
||||
|
||||
# due to GH 352
|
||||
if any(left_df.columns.isin([index_left, index_right])) or any(
|
||||
right_df.columns.isin([index_left, index_right])
|
||||
):
|
||||
raise ValueError(
|
||||
"'{0}' and '{1}' cannot be names in the frames being"
|
||||
" joined".format(index_left, index_right)
|
||||
)
|
||||
if on_attribute:
|
||||
for attr in on_attribute:
|
||||
if (attr not in left_df) and (attr not in right_df):
|
||||
raise ValueError(
|
||||
f"Expected column {attr} is missing from both of the dataframes."
|
||||
)
|
||||
if attr not in left_df:
|
||||
raise ValueError(
|
||||
f"Expected column {attr} is missing from the left dataframe."
|
||||
)
|
||||
if attr not in right_df:
|
||||
raise ValueError(
|
||||
f"Expected column {attr} is missing from the right dataframe."
|
||||
)
|
||||
if attr in (left_df.geometry.name, right_df.geometry.name):
|
||||
raise ValueError(
|
||||
"Active geometry column cannot be used as an input "
|
||||
"for on_attribute parameter."
|
||||
)
|
||||
|
||||
|
||||
def _geom_predicate_query(left_df, right_df, predicate):
|
||||
def _geom_predicate_query(left_df, right_df, predicate, distance, on_attribute=None):
|
||||
"""Compute geometric comparisons and get matching indices.
|
||||
|
||||
Parameters
|
||||
@@ -184,6 +209,9 @@ def _geom_predicate_query(left_df, right_df, predicate):
|
||||
right_df : GeoDataFrame
|
||||
predicate : string
|
||||
Binary predicate to query.
|
||||
on_attribute: list, default None
|
||||
list of column names to merge on along with geometry
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -191,163 +219,292 @@ def _geom_predicate_query(left_df, right_df, predicate):
|
||||
DataFrame with matching indices in
|
||||
columns named `_key_left` and `_key_right`.
|
||||
"""
|
||||
with warnings.catch_warnings():
|
||||
# We don't need to show our own warning here
|
||||
# TODO remove this once the deprecation has been enforced
|
||||
warnings.filterwarnings(
|
||||
"ignore", "Generated spatial index is empty", FutureWarning
|
||||
)
|
||||
|
||||
original_predicate = predicate
|
||||
original_predicate = predicate
|
||||
|
||||
if predicate == "within":
|
||||
# within is implemented as the inverse of contains
|
||||
# contains is a faster predicate
|
||||
# see discussion at https://github.com/geopandas/geopandas/pull/1421
|
||||
predicate = "contains"
|
||||
sindex = left_df.sindex
|
||||
input_geoms = right_df.geometry
|
||||
else:
|
||||
# all other predicates are symmetric
|
||||
# keep them the same
|
||||
sindex = right_df.sindex
|
||||
input_geoms = left_df.geometry
|
||||
if predicate == "within":
|
||||
# within is implemented as the inverse of contains
|
||||
# contains is a faster predicate
|
||||
# see discussion at https://github.com/geopandas/geopandas/pull/1421
|
||||
predicate = "contains"
|
||||
sindex = left_df.sindex
|
||||
input_geoms = right_df.geometry
|
||||
else:
|
||||
# all other predicates are symmetric
|
||||
# keep them the same
|
||||
sindex = right_df.sindex
|
||||
input_geoms = left_df.geometry
|
||||
|
||||
if sindex:
|
||||
l_idx, r_idx = sindex.query(input_geoms, predicate=predicate, sort=False)
|
||||
indices = pd.DataFrame({"_key_left": l_idx, "_key_right": r_idx})
|
||||
l_idx, r_idx = sindex.query(
|
||||
input_geoms, predicate=predicate, sort=False, distance=distance
|
||||
)
|
||||
else:
|
||||
# when sindex is empty / has no valid geometries
|
||||
indices = pd.DataFrame(columns=["_key_left", "_key_right"], dtype=float)
|
||||
l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
|
||||
|
||||
if original_predicate == "within":
|
||||
# within is implemented as the inverse of contains
|
||||
# flip back the results
|
||||
indices = indices.rename(
|
||||
columns={"_key_left": "_key_right", "_key_right": "_key_left"}
|
||||
r_idx, l_idx = l_idx, r_idx
|
||||
indexer = np.lexsort((r_idx, l_idx))
|
||||
l_idx = l_idx[indexer]
|
||||
r_idx = r_idx[indexer]
|
||||
|
||||
if on_attribute:
|
||||
for attr in on_attribute:
|
||||
(l_idx, r_idx), _ = _filter_shared_attribute(
|
||||
left_df, right_df, l_idx, r_idx, attr
|
||||
)
|
||||
|
||||
return l_idx, r_idx
|
||||
|
||||
|
||||
def _reset_index_with_suffix(df, suffix, other):
|
||||
"""
|
||||
Equivalent of df.reset_index(), but with adding 'suffix' to auto-generated
|
||||
column names.
|
||||
"""
|
||||
index_original = df.index.names
|
||||
if PANDAS_GE_30:
|
||||
df_reset = df.reset_index()
|
||||
else:
|
||||
# we already made a copy of the dataframe in _frame_join before getting here
|
||||
df_reset = df
|
||||
df_reset.reset_index(inplace=True)
|
||||
column_names = df_reset.columns.to_numpy(copy=True)
|
||||
for i, label in enumerate(index_original):
|
||||
# if the original label was None, add suffix to auto-generated name
|
||||
if label is None:
|
||||
new_label = column_names[i]
|
||||
if "level" in new_label:
|
||||
# reset_index of MultiIndex gives "level_i" names, preserve the "i"
|
||||
lev = new_label.split("_")[1]
|
||||
new_label = f"index_{suffix}{lev}"
|
||||
else:
|
||||
new_label = f"index_{suffix}"
|
||||
# check new label will not be in other dataframe
|
||||
if new_label in df.columns or new_label in other.columns:
|
||||
raise ValueError(
|
||||
"'{0}' cannot be a column name in the frames being"
|
||||
" joined".format(new_label)
|
||||
)
|
||||
column_names[i] = new_label
|
||||
return df_reset, pd.Index(column_names)
|
||||
|
||||
|
||||
def _process_column_names_with_suffix(
|
||||
left: pd.Index, right: pd.Index, suffixes, left_df, right_df
|
||||
):
|
||||
"""
|
||||
Add suffixes to overlapping labels (ignoring the geometry column).
|
||||
|
||||
This is based on pandas' merge logic at https://github.com/pandas-dev/pandas/blob/
|
||||
a0779adb183345a8eb4be58b3ad00c223da58768/pandas/core/reshape/merge.py#L2300-L2370
|
||||
"""
|
||||
to_rename = left.intersection(right)
|
||||
if len(to_rename) == 0:
|
||||
return left, right
|
||||
|
||||
lsuffix, rsuffix = suffixes
|
||||
|
||||
if not lsuffix and not rsuffix:
|
||||
raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
|
||||
|
||||
def renamer(x, suffix, geometry):
|
||||
if x in to_rename and x != geometry and suffix is not None:
|
||||
return f"{x}_{suffix}"
|
||||
return x
|
||||
|
||||
lrenamer = partial(
|
||||
renamer,
|
||||
suffix=lsuffix,
|
||||
geometry=getattr(left_df, "_geometry_column_name", None),
|
||||
)
|
||||
rrenamer = partial(
|
||||
renamer,
|
||||
suffix=rsuffix,
|
||||
geometry=getattr(right_df, "_geometry_column_name", None),
|
||||
)
|
||||
|
||||
# TODO retain index name?
|
||||
left_renamed = pd.Index([lrenamer(lab) for lab in left])
|
||||
right_renamed = pd.Index([rrenamer(lab) for lab in right])
|
||||
|
||||
dups = []
|
||||
if not left_renamed.is_unique:
|
||||
# Only warn when duplicates are caused because of suffixes, already duplicated
|
||||
# columns in origin should not warn
|
||||
dups = left_renamed[(left_renamed.duplicated()) & (~left.duplicated())].tolist()
|
||||
if not right_renamed.is_unique:
|
||||
dups.extend(
|
||||
right_renamed[(right_renamed.duplicated()) & (~right.duplicated())].tolist()
|
||||
)
|
||||
# TODO turn this into an error (pandas has done so as well)
|
||||
if dups:
|
||||
warnings.warn(
|
||||
f"Passing 'suffixes' which cause duplicate columns {set(dups)} in the "
|
||||
f"result is deprecated and will raise a MergeError in a future version.",
|
||||
FutureWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
return indices
|
||||
return left_renamed, right_renamed
|
||||
|
||||
|
||||
def _frame_join(join_df, left_df, right_df, how, lsuffix, rsuffix):
|
||||
def _restore_index(joined, index_names, index_names_original):
|
||||
"""
|
||||
Set back the the original index columns, and restoring their name as `None`
|
||||
if they didn't have a name originally.
|
||||
"""
|
||||
if PANDAS_GE_30:
|
||||
joined = joined.set_index(list(index_names))
|
||||
else:
|
||||
joined.set_index(list(index_names), inplace=True)
|
||||
|
||||
# restore the fact that the index didn't have a name
|
||||
joined_index_names = list(joined.index.names)
|
||||
for i, label in enumerate(index_names_original):
|
||||
if label is None:
|
||||
joined_index_names[i] = None
|
||||
joined.index.names = joined_index_names
|
||||
return joined
|
||||
|
||||
|
||||
def _adjust_indexers(indices, distances, original_length, how, predicate):
|
||||
"""
|
||||
The left/right indexers from the query represents an inner join.
|
||||
For a left or right join, we need to adjust them to include the rows
|
||||
that would not be present in an inner join.
|
||||
"""
|
||||
# the indices represent an inner join, no adjustment needed
|
||||
if how == "inner":
|
||||
return indices, distances
|
||||
|
||||
l_idx, r_idx = indices
|
||||
|
||||
if how == "right":
|
||||
# re-sort so it is sorted by the right indexer
|
||||
indexer = np.lexsort((l_idx, r_idx))
|
||||
l_idx, r_idx = l_idx[indexer], r_idx[indexer]
|
||||
if distances is not None:
|
||||
distances = distances[indexer]
|
||||
|
||||
# switch order
|
||||
r_idx, l_idx = l_idx, r_idx
|
||||
|
||||
# determine which indices are missing and where they would need to be inserted
|
||||
idx = np.arange(original_length)
|
||||
l_idx_missing = idx[~np.isin(idx, l_idx)]
|
||||
insert_idx = np.searchsorted(l_idx, l_idx_missing)
|
||||
# for the left indexer, insert those missing indices
|
||||
l_idx = np.insert(l_idx, insert_idx, l_idx_missing)
|
||||
# for the right indexer, insert -1 -> to get missing values in pandas' reindexing
|
||||
r_idx = np.insert(r_idx, insert_idx, -1)
|
||||
# for the indices, already insert those missing values manually
|
||||
if distances is not None:
|
||||
distances = np.insert(distances, insert_idx, np.nan)
|
||||
|
||||
if how == "right":
|
||||
# switch back
|
||||
l_idx, r_idx = r_idx, l_idx
|
||||
|
||||
return (l_idx, r_idx), distances
|
||||
|
||||
|
||||
def _frame_join(
|
||||
left_df,
|
||||
right_df,
|
||||
indices,
|
||||
distances,
|
||||
how,
|
||||
lsuffix,
|
||||
rsuffix,
|
||||
predicate,
|
||||
on_attribute=None,
|
||||
):
|
||||
"""Join the GeoDataFrames at the DataFrame level.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
join_df : DataFrame
|
||||
Indices and join data returned by the geometric join.
|
||||
Must have columns `_key_left` and `_key_right`
|
||||
with integer indices representing the matches
|
||||
from `left_df` and `right_df` respectively.
|
||||
Additional columns may be included and will be copied to
|
||||
the resultant GeoDataFrame.
|
||||
left_df : GeoDataFrame
|
||||
right_df : GeoDataFrame
|
||||
indices : tuple of ndarray
|
||||
Indices returned by the geometric join. Tuple with with integer
|
||||
indices representing the matches from `left_df` and `right_df`
|
||||
respectively.
|
||||
distances : ndarray, optional
|
||||
Passed trough and adapted based on the indices, if needed.
|
||||
how : string
|
||||
The type of join to use on the DataFrame level.
|
||||
lsuffix : string
|
||||
Suffix to apply to overlapping column names (left GeoDataFrame).
|
||||
rsuffix : string
|
||||
Suffix to apply to overlapping column names (right GeoDataFrame).
|
||||
how : string
|
||||
The type of join to use on the DataFrame level.
|
||||
on_attribute: list, default None
|
||||
list of column names to merge on along with geometry
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
Joined GeoDataFrame.
|
||||
"""
|
||||
# the spatial index only allows limited (numeric) index types, but an
|
||||
# index in geopandas may be any arbitrary dtype. so reset both indices now
|
||||
# and store references to the original indices, to be reaffixed later.
|
||||
# GH 352
|
||||
index_left = "index_{}".format(lsuffix)
|
||||
left_df = left_df.copy(deep=True)
|
||||
try:
|
||||
left_index_name = left_df.index.name
|
||||
left_df.index = left_df.index.rename(index_left)
|
||||
except TypeError:
|
||||
index_left = [
|
||||
"index_{}".format(lsuffix + str(pos))
|
||||
for pos, ix in enumerate(left_df.index.names)
|
||||
]
|
||||
left_index_name = left_df.index.names
|
||||
left_df.index = left_df.index.rename(index_left)
|
||||
left_df = left_df.reset_index()
|
||||
if on_attribute: # avoid renaming or duplicating shared column
|
||||
right_df = right_df.drop(on_attribute, axis=1)
|
||||
|
||||
index_right = "index_{}".format(rsuffix)
|
||||
right_df = right_df.copy(deep=True)
|
||||
try:
|
||||
right_index_name = right_df.index.name
|
||||
right_df.index = right_df.index.rename(index_right)
|
||||
except TypeError:
|
||||
index_right = [
|
||||
"index_{}".format(rsuffix + str(pos))
|
||||
for pos, ix in enumerate(right_df.index.names)
|
||||
]
|
||||
right_index_name = right_df.index.names
|
||||
right_df.index = right_df.index.rename(index_right)
|
||||
right_df = right_df.reset_index()
|
||||
if how in ("inner", "left"):
|
||||
right_df = right_df.drop(right_df.geometry.name, axis=1)
|
||||
else: # how == 'right':
|
||||
left_df = left_df.drop(left_df.geometry.name, axis=1)
|
||||
|
||||
left_df = left_df.copy(deep=False)
|
||||
left_nlevels = left_df.index.nlevels
|
||||
left_index_original = left_df.index.names
|
||||
left_df, left_column_names = _reset_index_with_suffix(left_df, lsuffix, right_df)
|
||||
|
||||
right_df = right_df.copy(deep=False)
|
||||
right_nlevels = right_df.index.nlevels
|
||||
right_index_original = right_df.index.names
|
||||
right_df, right_column_names = _reset_index_with_suffix(right_df, rsuffix, left_df)
|
||||
|
||||
# if conflicting names in left and right, add suffix
|
||||
left_column_names, right_column_names = _process_column_names_with_suffix(
|
||||
left_column_names,
|
||||
right_column_names,
|
||||
(lsuffix, rsuffix),
|
||||
left_df,
|
||||
right_df,
|
||||
)
|
||||
left_df.columns = left_column_names
|
||||
right_df.columns = right_column_names
|
||||
left_index = left_df.columns[:left_nlevels]
|
||||
right_index = right_df.columns[:right_nlevels]
|
||||
|
||||
# perform join on the dataframes
|
||||
if how == "inner":
|
||||
join_df = join_df.set_index("_key_left")
|
||||
joined = (
|
||||
left_df.merge(join_df, left_index=True, right_index=True)
|
||||
.merge(
|
||||
right_df.drop(right_df.geometry.name, axis=1),
|
||||
left_on="_key_right",
|
||||
right_index=True,
|
||||
suffixes=("_{}".format(lsuffix), "_{}".format(rsuffix)),
|
||||
)
|
||||
.set_index(index_left)
|
||||
.drop(["_key_right"], axis=1)
|
||||
)
|
||||
if isinstance(index_left, list):
|
||||
joined.index.names = left_index_name
|
||||
else:
|
||||
joined.index.name = left_index_name
|
||||
|
||||
elif how == "left":
|
||||
join_df = join_df.set_index("_key_left")
|
||||
joined = (
|
||||
left_df.merge(join_df, left_index=True, right_index=True, how="left")
|
||||
.merge(
|
||||
right_df.drop(right_df.geometry.name, axis=1),
|
||||
how="left",
|
||||
left_on="_key_right",
|
||||
right_index=True,
|
||||
suffixes=("_{}".format(lsuffix), "_{}".format(rsuffix)),
|
||||
)
|
||||
.set_index(index_left)
|
||||
.drop(["_key_right"], axis=1)
|
||||
)
|
||||
if isinstance(index_left, list):
|
||||
joined.index.names = left_index_name
|
||||
else:
|
||||
joined.index.name = left_index_name
|
||||
original_length = len(right_df) if how == "right" else len(left_df)
|
||||
(l_idx, r_idx), distances = _adjust_indexers(
|
||||
indices, distances, original_length, how, predicate
|
||||
)
|
||||
# the `take` method doesn't allow introducing NaNs with -1 indices
|
||||
# left = left_df.take(l_idx)
|
||||
# therefore we are using the private _reindex_with_indexers as workaround
|
||||
new_index = pd.RangeIndex(len(l_idx))
|
||||
left = left_df._reindex_with_indexers({0: (new_index, l_idx)})
|
||||
right = right_df._reindex_with_indexers({0: (new_index, r_idx)})
|
||||
if PANDAS_GE_30:
|
||||
kwargs = {}
|
||||
else:
|
||||
kwargs = dict(copy=False)
|
||||
joined = pd.concat([left, right], axis=1, **kwargs)
|
||||
|
||||
if how in ("inner", "left"):
|
||||
joined = _restore_index(joined, left_index, left_index_original)
|
||||
else: # how == 'right':
|
||||
joined = (
|
||||
left_df.drop(left_df.geometry.name, axis=1)
|
||||
.merge(
|
||||
join_df.merge(
|
||||
right_df, left_on="_key_right", right_index=True, how="right"
|
||||
),
|
||||
left_index=True,
|
||||
right_on="_key_left",
|
||||
how="right",
|
||||
suffixes=("_{}".format(lsuffix), "_{}".format(rsuffix)),
|
||||
)
|
||||
.set_index(index_right)
|
||||
.drop(["_key_left", "_key_right"], axis=1)
|
||||
.set_geometry(right_df.geometry.name)
|
||||
)
|
||||
if isinstance(index_right, list):
|
||||
joined.index.names = right_index_name
|
||||
else:
|
||||
joined.index.name = right_index_name
|
||||
joined = joined.set_geometry(right_df.geometry.name)
|
||||
joined = _restore_index(joined, right_index, right_index_original)
|
||||
|
||||
return joined
|
||||
return joined, distances
|
||||
|
||||
|
||||
def _nearest_query(
|
||||
@@ -357,13 +514,8 @@ def _nearest_query(
|
||||
how: str,
|
||||
return_distance: bool,
|
||||
exclusive: bool,
|
||||
on_attribute: Optional[list] = None,
|
||||
):
|
||||
if not (compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_010)):
|
||||
raise NotImplementedError(
|
||||
"Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports "
|
||||
"`nearest_all`. " + compat.INSTALL_PYGEOS_ERROR
|
||||
)
|
||||
|
||||
# use the opposite of the join direction for the index
|
||||
use_left_as_sindex = how == "right"
|
||||
if use_left_as_sindex:
|
||||
@@ -393,15 +545,37 @@ def _nearest_query(
|
||||
distances = distances[sort_order]
|
||||
else:
|
||||
l_idx, r_idx = input_idx, tree_idx
|
||||
join_df = pd.DataFrame(
|
||||
{"_key_left": l_idx, "_key_right": r_idx, "distances": distances}
|
||||
)
|
||||
else:
|
||||
# when sindex is empty / has no valid geometries
|
||||
join_df = pd.DataFrame(
|
||||
columns=["_key_left", "_key_right", "distances"], dtype=float
|
||||
)
|
||||
return join_df
|
||||
l_idx, r_idx = np.array([], dtype=np.intp), np.array([], dtype=np.intp)
|
||||
if return_distance:
|
||||
distances = np.array([], dtype=np.float64)
|
||||
else:
|
||||
distances = None
|
||||
|
||||
if on_attribute:
|
||||
for attr in on_attribute:
|
||||
(l_idx, r_idx), shared_attribute_rows = _filter_shared_attribute(
|
||||
left_df, right_df, l_idx, r_idx, attr
|
||||
)
|
||||
distances = distances[shared_attribute_rows]
|
||||
|
||||
return (l_idx, r_idx), distances
|
||||
|
||||
|
||||
def _filter_shared_attribute(left_df, right_df, l_idx, r_idx, attribute):
|
||||
"""
|
||||
Returns the indices for the left and right dataframe that share the same entry
|
||||
in the attribute column. Also returns a Boolean `shared_attribute_rows` for rows
|
||||
with the same entry.
|
||||
"""
|
||||
shared_attribute_rows = (
|
||||
left_df[attribute].iloc[l_idx].values == right_df[attribute].iloc[r_idx].values
|
||||
)
|
||||
|
||||
l_idx = l_idx[shared_attribute_rows]
|
||||
r_idx = r_idx[shared_attribute_rows]
|
||||
return (l_idx, r_idx), shared_attribute_rows
|
||||
|
||||
|
||||
def sjoin_nearest(
|
||||
@@ -453,7 +627,6 @@ def sjoin_nearest(
|
||||
exclusive : bool, default False
|
||||
If True, the nearest geometries that are equal to the input geometry
|
||||
will not be returned, default False.
|
||||
Requires Shapely >= 2.0.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -466,7 +639,7 @@ def sjoin_nearest(
|
||||
... ).to_crs(groceries.crs)
|
||||
|
||||
>>> chicago.head() # doctest: +SKIP
|
||||
ComAreaID ... geometry
|
||||
ComAreaID ... geometry
|
||||
0 35 ... POLYGON ((-87.60914 41.84469, -87.60915 41.844...
|
||||
1 36 ... POLYGON ((-87.59215 41.81693, -87.59231 41.816...
|
||||
2 37 ... POLYGON ((-87.62880 41.80189, -87.62879 41.801...
|
||||
@@ -475,19 +648,19 @@ def sjoin_nearest(
|
||||
[5 rows x 87 columns]
|
||||
|
||||
>>> groceries.head() # doctest: +SKIP
|
||||
OBJECTID Ycoord ... Category geometry
|
||||
0 16 41.973266 ... NaN MULTIPOINT (-87.65661 41.97321)
|
||||
1 18 41.696367 ... NaN MULTIPOINT (-87.68136 41.69713)
|
||||
2 22 41.868634 ... NaN MULTIPOINT (-87.63918 41.86847)
|
||||
3 23 41.877590 ... new MULTIPOINT (-87.65495 41.87783)
|
||||
4 27 41.737696 ... NaN MULTIPOINT (-87.62715 41.73623)
|
||||
OBJECTID Ycoord ... Category geometry
|
||||
0 16 41.973266 ... NaN MULTIPOINT ((-87.65661 41.97321))
|
||||
1 18 41.696367 ... NaN MULTIPOINT ((-87.68136 41.69713))
|
||||
2 22 41.868634 ... NaN MULTIPOINT ((-87.63918 41.86847))
|
||||
3 23 41.877590 ... new MULTIPOINT ((-87.65495 41.87783))
|
||||
4 27 41.737696 ... NaN MULTIPOINT ((-87.62715 41.73623))
|
||||
[5 rows x 8 columns]
|
||||
|
||||
>>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago)
|
||||
>>> groceries_w_communities[["Chain", "community", "geometry"]].head(2)
|
||||
Chain community geometry
|
||||
0 VIET HOA PLAZA UPTOWN MULTIPOINT (1168268.672 1933554.350)
|
||||
87 JEWEL OSCO UPTOWN MULTIPOINT (1168837.980 1929246.962)
|
||||
Chain community geometry
|
||||
0 VIET HOA PLAZA UPTOWN MULTIPOINT ((1168268.672 1933554.35))
|
||||
1 COUNTY FAIR FOODS MORGAN PARK MULTIPOINT ((1162302.618 1832900.224))
|
||||
|
||||
|
||||
To include the distances:
|
||||
@@ -495,10 +668,10 @@ def sjoin_nearest(
|
||||
>>> groceries_w_communities = geopandas.sjoin_nearest(groceries, chicago, \
|
||||
distance_col="distances")
|
||||
>>> groceries_w_communities[["Chain", "community", \
|
||||
"distances"]].head(2) # doctest: +SKIP
|
||||
Chain community distances
|
||||
0 VIET HOA PLAZA UPTOWN 0.0
|
||||
87 JEWEL OSCO UPTOWN 0.0
|
||||
"distances"]].head(2)
|
||||
Chain community distances
|
||||
0 VIET HOA PLAZA UPTOWN 0.0
|
||||
1 COUNTY FAIR FOODS MORGAN PARK 0.0
|
||||
|
||||
In the following example, we get multiple groceries for Uptown because all
|
||||
results are equidistant (in this case zero because they intersect).
|
||||
@@ -508,7 +681,7 @@ distance_col="distances")
|
||||
distance_col="distances", how="right")
|
||||
>>> uptown_results = \
|
||||
chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
|
||||
>>> uptown_results[["Chain", "community"]] # doctest: +SKIP
|
||||
>>> uptown_results[["Chain", "community"]]
|
||||
Chain community
|
||||
30 VIET HOA PLAZA UPTOWN
|
||||
30 JEWEL OSCO UPTOWN
|
||||
@@ -528,6 +701,7 @@ chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
|
||||
Every operation in GeoPandas is planar, i.e. the potential third
|
||||
dimension is not taken into account.
|
||||
"""
|
||||
|
||||
_basic_checks(left_df, right_df, how, lsuffix, rsuffix)
|
||||
|
||||
left_df.geometry.values.check_geographic_crs(stacklevel=1)
|
||||
@@ -535,19 +709,26 @@ chicago_w_groceries[chicago_w_groceries["community"] == "UPTOWN"]
|
||||
|
||||
return_distance = distance_col is not None
|
||||
|
||||
join_df = _nearest_query(
|
||||
left_df, right_df, max_distance, how, return_distance, exclusive
|
||||
indices, distances = _nearest_query(
|
||||
left_df,
|
||||
right_df,
|
||||
max_distance,
|
||||
how,
|
||||
return_distance,
|
||||
exclusive,
|
||||
)
|
||||
joined, distances = _frame_join(
|
||||
left_df,
|
||||
right_df,
|
||||
indices,
|
||||
distances,
|
||||
how,
|
||||
lsuffix,
|
||||
rsuffix,
|
||||
None,
|
||||
)
|
||||
|
||||
if return_distance:
|
||||
join_df = join_df.rename(columns={"distances": distance_col})
|
||||
else:
|
||||
join_df.pop("distances")
|
||||
|
||||
joined = _frame_join(join_df, left_df, right_df, how, lsuffix, rsuffix)
|
||||
|
||||
if return_distance:
|
||||
columns = [c for c in joined.columns if c != distance_col] + [distance_col]
|
||||
joined = joined[columns]
|
||||
joined[distance_col] = distances
|
||||
|
||||
return joined
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,28 +1,28 @@
|
||||
"""Tests for the clip module."""
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import (
|
||||
Polygon,
|
||||
Point,
|
||||
LineString,
|
||||
LinearRing,
|
||||
GeometryCollection,
|
||||
LinearRing,
|
||||
LineString,
|
||||
MultiPoint,
|
||||
Point,
|
||||
Polygon,
|
||||
box,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, GeoSeries, clip
|
||||
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
import pytest
|
||||
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.tools.clip import _mask_is_list_like_rectangle
|
||||
|
||||
pytestmark = pytest.mark.skip_no_sindex
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from pandas.testing import assert_index_equal
|
||||
|
||||
mask_variants_single_rectangle = [
|
||||
"single_rectangle_gdf",
|
||||
"single_rectangle_gdf_list_bounds",
|
||||
@@ -43,6 +43,14 @@ def point_gdf():
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def point_gdf2():
|
||||
"""Create a point GeoDataFrame."""
|
||||
pts = np.array([[5, 5], [2, 2], [4, 4], [0, 0], [3, 3], [1, 1]])
|
||||
gdf = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857")
|
||||
return gdf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pointsoutside_nooverlap_gdf():
|
||||
"""Create a point GeoDataFrame. Its points are all outside the single
|
||||
@@ -137,7 +145,7 @@ def two_line_gdf():
|
||||
@pytest.fixture
|
||||
def multi_poly_gdf(donut_geometry):
|
||||
"""Create a multi-polygon GeoDataFrame."""
|
||||
multi_poly = donut_geometry.unary_union
|
||||
multi_poly = donut_geometry.union_all()
|
||||
out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:3857")
|
||||
out_df["attr"] = ["pool"]
|
||||
return out_df
|
||||
@@ -148,7 +156,7 @@ def multi_line(two_line_gdf):
|
||||
"""Create a multi-line GeoDataFrame.
|
||||
This GDF has one multiline and one regular line."""
|
||||
# Create a single and multi line object
|
||||
multiline_feat = two_line_gdf.unary_union
|
||||
multiline_feat = two_line_gdf.union_all()
|
||||
linec = LineString([(2, 1), (3, 1), (4, 1), (5, 2)])
|
||||
out_df = GeoDataFrame(geometry=GeoSeries([multiline_feat, linec]), crs="EPSG:3857")
|
||||
out_df["attr"] = ["road", "stream"]
|
||||
@@ -158,7 +166,7 @@ def multi_line(two_line_gdf):
|
||||
@pytest.fixture
|
||||
def multi_point(point_gdf):
|
||||
"""Create a multi-point GeoDataFrame."""
|
||||
multi_point = point_gdf.unary_union
|
||||
multi_point = point_gdf.union_all()
|
||||
out_df = GeoDataFrame(
|
||||
geometry=GeoSeries(
|
||||
[multi_point, Point(2, 5), Point(-11, -14), Point(-10, -12)]
|
||||
@@ -321,7 +329,7 @@ class TestClipWithSingleRectangleGdf:
|
||||
)
|
||||
assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt
|
||||
shape_for_points = (
|
||||
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.unary_union
|
||||
box(*mask) if _mask_is_list_like_rectangle(mask) else mask.union_all()
|
||||
)
|
||||
assert all(clipped.intersects(shape_for_points))
|
||||
|
||||
@@ -398,6 +406,7 @@ def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
|
||||
assert "GeometryCollection" in clipped.geom_type[0]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
|
||||
def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf):
|
||||
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
|
||||
clip(point_gdf, single_rectangle_gdf.to_crs(4326))
|
||||
@@ -460,3 +469,16 @@ def test_clip_empty_mask(buffered_locations, mask):
|
||||
)
|
||||
clipped = clip(buffered_locations.geometry, mask)
|
||||
assert_geoseries_equal(clipped, GeoSeries([], crs="EPSG:3857"))
|
||||
|
||||
|
||||
def test_clip_sorting(point_gdf2):
|
||||
"""Test the sorting kwarg in clip"""
|
||||
bbox = shapely.geometry.box(0, 0, 2, 2)
|
||||
unsorted_clipped_gdf = point_gdf2.clip(bbox)
|
||||
sorted_clipped_gdf = point_gdf2.clip(bbox, sort=True)
|
||||
|
||||
expected_sorted_index = pd.Index([1, 3, 5])
|
||||
|
||||
assert not (sorted(unsorted_clipped_gdf.index) == unsorted_clipped_gdf.index).all()
|
||||
assert (sorted(sorted_clipped_gdf.index) == sorted_clipped_gdf.index).all()
|
||||
assert_index_equal(expected_sorted_index, sorted_clipped_gdf.index)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import numpy as np
|
||||
|
||||
from shapely.geometry import Point
|
||||
from shapely.wkt import loads
|
||||
|
||||
|
||||
@@ -1,28 +1,46 @@
|
||||
import pytest
|
||||
import numpy
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
|
||||
import geopandas
|
||||
from geopandas.tools._random import uniform
|
||||
|
||||
multipolygons = geopandas.read_file(geopandas.datasets.get_path("nybb")).geometry
|
||||
polygons = multipolygons.explode(ignore_index=True).geometry
|
||||
multilinestrings = multipolygons.boundary
|
||||
linestrings = polygons.boundary
|
||||
points = multipolygons.centroid
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multipolygons(nybb_filename):
|
||||
return geopandas.read_file(nybb_filename).geometry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def polygons(multipolygons):
|
||||
return multipolygons.explode(ignore_index=True).geometry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def multilinestrings(multipolygons):
|
||||
return multipolygons.boundary
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def linestrings(polygons):
|
||||
return polygons.boundary
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def points(multipolygons):
|
||||
return multipolygons.centroid
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
|
||||
reason="array input in interpolate not implemented for shapely<2",
|
||||
)
|
||||
@pytest.mark.parametrize("size", [10, 100])
|
||||
@pytest.mark.parametrize(
|
||||
"geom", [multipolygons[0], polygons[0], multilinestrings[0], linestrings[0]]
|
||||
"geom_fixture", ["multipolygons", "polygons", "multilinestrings", "linestrings"]
|
||||
)
|
||||
def test_uniform(geom, size):
|
||||
def test_uniform(geom_fixture, size, request):
|
||||
geom = request.getfixturevalue(geom_fixture)[0]
|
||||
sample = uniform(geom, size=size, rng=1)
|
||||
sample_series = geopandas.GeoSeries(sample).explode().reset_index(drop=True)
|
||||
sample_series = (
|
||||
geopandas.GeoSeries(sample).explode(index_parts=True).reset_index(drop=True)
|
||||
)
|
||||
assert len(sample_series) == size
|
||||
sample_in_geom = sample_series.buffer(0.00000001).sindex.query(
|
||||
geom, predicate="intersects"
|
||||
@@ -30,21 +48,13 @@ def test_uniform(geom, size):
|
||||
assert len(sample_in_geom) == size
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
|
||||
reason="array input in interpolate not implemented for shapely<2",
|
||||
)
|
||||
def test_uniform_unsupported():
|
||||
def test_uniform_unsupported(points):
|
||||
with pytest.warns(UserWarning, match="Sampling is not supported"):
|
||||
sample = uniform(points[0], size=10, rng=1)
|
||||
assert sample.is_empty
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_PYGEOS or compat.USE_SHAPELY_20),
|
||||
reason="array input in interpolate not implemented for shapely<2",
|
||||
)
|
||||
def test_uniform_generator():
|
||||
def test_uniform_generator(polygons):
|
||||
sample = uniform(polygons[0], size=10, rng=1)
|
||||
sample2 = uniform(polygons[0], size=10, rng=1)
|
||||
assert sample.equals(sample2)
|
||||
|
||||
@@ -3,23 +3,24 @@ from typing import Sequence
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import shapely
|
||||
|
||||
from shapely.geometry import Point, Polygon, GeometryCollection
|
||||
import shapely
|
||||
from shapely.geometry import GeometryCollection, Point, Polygon, box
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, GeoSeries, read_file, sjoin, sjoin_nearest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from geopandas import (
|
||||
GeoDataFrame,
|
||||
GeoSeries,
|
||||
points_from_xy,
|
||||
read_file,
|
||||
sjoin,
|
||||
sjoin_nearest,
|
||||
)
|
||||
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
import pytest
|
||||
|
||||
|
||||
TEST_NEAREST = compat.USE_SHAPELY_20 or (compat.PYGEOS_GE_010 and compat.USE_PYGEOS)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skip_no_sindex
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@@ -95,6 +96,52 @@ def dfs(request):
|
||||
return [request.param, df1, df2, expected]
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def dfs_shared_attribute():
|
||||
geo_left = [
|
||||
Point(0, 0),
|
||||
Point(1, 1),
|
||||
Point(2, 2),
|
||||
Point(3, 3),
|
||||
Point(4, 4),
|
||||
Point(5, 5),
|
||||
Point(6, 6),
|
||||
Point(7, 7),
|
||||
]
|
||||
geo_right = [
|
||||
Point(0, 0),
|
||||
Point(1, 1),
|
||||
Point(2, 2),
|
||||
Point(3, 3),
|
||||
Point(4, 4),
|
||||
Point(5, 5),
|
||||
Point(6, 6),
|
||||
Point(7, 7),
|
||||
]
|
||||
attr_tracker = ["A", "B", "C", "D", "E", "F", "G", "H"]
|
||||
|
||||
left_gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": geo_left,
|
||||
"attr_tracker": attr_tracker,
|
||||
"duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||
"attr1": [True, True, True, True, True, True, True, True],
|
||||
"attr2": [True, True, True, True, True, True, True, True],
|
||||
}
|
||||
)
|
||||
|
||||
right_gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": geo_right,
|
||||
"duplicate_column": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||
"attr1": [True, True, False, False, True, True, False, False],
|
||||
"attr2": [True, True, False, False, False, False, False, False],
|
||||
}
|
||||
)
|
||||
|
||||
return left_gdf, right_gdf
|
||||
|
||||
|
||||
class TestSpatialJoin:
|
||||
@pytest.mark.parametrize(
|
||||
"how, lsuffix, rsuffix, expected_cols",
|
||||
@@ -113,6 +160,7 @@ class TestSpatialJoin:
|
||||
joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
|
||||
assert set(joined.columns) == expected_cols | {"geometry"}
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
|
||||
@pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True)
|
||||
def test_crs_mismatch(self, dfs):
|
||||
index, df1, df2, expected = dfs
|
||||
@@ -120,31 +168,6 @@ class TestSpatialJoin:
|
||||
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
|
||||
sjoin(df1, df2)
|
||||
|
||||
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
|
||||
@pytest.mark.parametrize("op", ["intersects", "contains", "within"])
|
||||
def test_deprecated_op_param(self, dfs, op):
|
||||
_, df1, df2, _ = dfs
|
||||
with pytest.warns(FutureWarning, match="`op` parameter is deprecated"):
|
||||
sjoin(df1, df2, op=op)
|
||||
|
||||
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
|
||||
@pytest.mark.parametrize("op", ["intersects", "contains", "within"])
|
||||
@pytest.mark.parametrize("predicate", ["contains", "within"])
|
||||
def test_deprecated_op_param_nondefault_predicate(self, dfs, op, predicate):
|
||||
_, df1, df2, _ = dfs
|
||||
match = "use the `predicate` parameter instead"
|
||||
if op != predicate:
|
||||
warntype = UserWarning
|
||||
match = (
|
||||
"`predicate` will be overridden by the value of `op`" # noqa: ISC003
|
||||
+ r"(.|\s)*"
|
||||
+ match
|
||||
)
|
||||
else:
|
||||
warntype = FutureWarning
|
||||
with pytest.warns(warntype, match=match):
|
||||
sjoin(df1, df2, predicate=predicate, op=op)
|
||||
|
||||
@pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
|
||||
def test_unknown_kwargs(self, dfs):
|
||||
_, df1, df2, _ = dfs
|
||||
@@ -154,7 +177,6 @@ class TestSpatialJoin:
|
||||
):
|
||||
sjoin(df1, df2, extra_param="test")
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"dfs",
|
||||
[
|
||||
@@ -167,12 +189,10 @@ class TestSpatialJoin:
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
|
||||
@pytest.mark.parametrize("predicate_kw", ["predicate", "op"])
|
||||
def test_inner(self, predicate, predicate_kw, dfs):
|
||||
def test_inner(self, predicate, dfs):
|
||||
index, df1, df2, expected = dfs
|
||||
|
||||
res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate})
|
||||
|
||||
res = sjoin(df1, df2, how="inner", predicate=predicate)
|
||||
exp = expected[predicate].dropna().copy()
|
||||
exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"})
|
||||
exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64")
|
||||
@@ -182,7 +202,7 @@ class TestSpatialJoin:
|
||||
].astype("int64")
|
||||
if index == "named-index":
|
||||
exp[["df1_ix", "df2_ix"]] = exp[["df1_ix", "df2_ix"]].astype("int64")
|
||||
exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
|
||||
exp = exp.set_index("df1_ix")
|
||||
if index in ["default-index", "string-index"]:
|
||||
exp = exp.set_index("index_left")
|
||||
exp.index.name = None
|
||||
@@ -192,11 +212,7 @@ class TestSpatialJoin:
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
if index == "named-multi-index":
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
|
||||
columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"])
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -232,7 +248,7 @@ class TestSpatialJoin:
|
||||
res["index_right"] = res["index_right"].astype(float)
|
||||
elif index == "named-index":
|
||||
exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64")
|
||||
exp = exp.set_index("df1_ix").rename(columns={"df2_ix": "index_right"})
|
||||
exp = exp.set_index("df1_ix")
|
||||
if index in ["default-index", "string-index"]:
|
||||
exp = exp.set_index("index_left")
|
||||
exp.index.name = None
|
||||
@@ -242,10 +258,7 @@ class TestSpatialJoin:
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
if index == "named-multi-index":
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(
|
||||
columns={"df2_ix1": "index_right0", "df2_ix2": "index_right1"}
|
||||
)
|
||||
exp.index.names = df1.index.names
|
||||
exp = exp.set_index(["df1_ix1", "df1_ix2"])
|
||||
|
||||
assert_frame_equal(res, exp)
|
||||
|
||||
@@ -348,7 +361,7 @@ class TestSpatialJoin:
|
||||
res["index_left"] = res["index_left"].astype(float)
|
||||
elif index == "named-index":
|
||||
exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64")
|
||||
exp = exp.set_index("df2_ix").rename(columns={"df1_ix": "index_left"})
|
||||
exp = exp.set_index("df2_ix")
|
||||
if index in ["default-index", "string-index"]:
|
||||
exp = exp.set_index("index_right")
|
||||
exp = exp.reindex(columns=res.columns)
|
||||
@@ -359,20 +372,431 @@ class TestSpatialJoin:
|
||||
)
|
||||
exp.index.names = df2.index.names
|
||||
if index == "named-multi-index":
|
||||
exp = exp.set_index(["df2_ix1", "df2_ix2"]).rename(
|
||||
columns={"df1_ix1": "index_left0", "df1_ix2": "index_left1"}
|
||||
)
|
||||
exp.index.names = df2.index.names
|
||||
exp = exp.set_index(["df2_ix1", "df2_ix2"])
|
||||
|
||||
if predicate == "within":
|
||||
exp = exp.sort_index()
|
||||
|
||||
assert_frame_equal(res, exp, check_index_type=False)
|
||||
|
||||
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="`dwithin` requires GEOS 3.10")
|
||||
@pytest.mark.parametrize("how", ["inner"])
|
||||
@pytest.mark.parametrize(
|
||||
"geo_left, geo_right, expected_left, expected_right, distance",
|
||||
[
|
||||
(
|
||||
# Distance is number, 2x1
|
||||
[Point(0, 0), Point(1, 1)],
|
||||
[Point(1, 1)],
|
||||
[0, 1],
|
||||
[0, 0],
|
||||
math.sqrt(2),
|
||||
),
|
||||
# Distance is number, 2x2
|
||||
(
|
||||
[Point(0, 0), Point(1, 1)],
|
||||
[Point(0, 0), Point(1, 1)],
|
||||
[0, 1, 0, 1],
|
||||
[0, 0, 1, 1],
|
||||
math.sqrt(2),
|
||||
),
|
||||
# Distance is array, matches len(left)
|
||||
(
|
||||
[Point(0, 0), Point(0, 0), Point(-1, -1)],
|
||||
[Point(1, 1)],
|
||||
[1, 2],
|
||||
[0, 0],
|
||||
[0, math.sqrt(2), math.sqrt(8)],
|
||||
),
|
||||
# Distance is np.array, matches len(left),
|
||||
# inner join sorts the right GeoDataFrame
|
||||
(
|
||||
[Point(0, 0), Point(0, 0), Point(-1, -1)],
|
||||
[Point(1, 1), Point(0.5, 0.5)],
|
||||
[1, 2, 1, 2],
|
||||
[1, 1, 0, 0],
|
||||
np.array([0, math.sqrt(2), math.sqrt(8)]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sjoin_dwithin(
|
||||
self,
|
||||
geo_left,
|
||||
geo_right,
|
||||
expected_left: Sequence[int],
|
||||
expected_right: Sequence[int],
|
||||
distance,
|
||||
how,
|
||||
):
|
||||
left = geopandas.GeoDataFrame({"geometry": geo_left})
|
||||
right = geopandas.GeoDataFrame({"geometry": geo_right})
|
||||
expected_gdf = left.iloc[expected_left].copy()
|
||||
expected_gdf["index_right"] = expected_right
|
||||
joined = sjoin(left, right, how=how, predicate="dwithin", distance=distance)
|
||||
assert_frame_equal(expected_gdf.sort_index(), joined.sort_index())
|
||||
|
||||
# GH3239
|
||||
@pytest.mark.parametrize(
|
||||
"predicate",
|
||||
[
|
||||
"contains",
|
||||
"contains_properly",
|
||||
"covered_by",
|
||||
"covers",
|
||||
"crosses",
|
||||
"intersects",
|
||||
"touches",
|
||||
"within",
|
||||
],
|
||||
)
|
||||
def test_sjoin_left_order(self, predicate):
|
||||
# a set of points in random order -> that order should be preserved
|
||||
# with a left join
|
||||
pts = GeoDataFrame(
|
||||
geometry=points_from_xy([0.1, 0.4, 0.3, 0.7], [0.8, 0.6, 0.9, 0.1])
|
||||
)
|
||||
polys = GeoDataFrame(
|
||||
{"id": [1, 2, 3, 4]},
|
||||
geometry=[
|
||||
box(0, 0, 0.5, 0.5),
|
||||
box(0, 0.5, 0.5, 1),
|
||||
box(0.5, 0, 1, 0.5),
|
||||
box(0.5, 0.5, 1, 1),
|
||||
],
|
||||
)
|
||||
|
||||
joined = sjoin(pts, polys, predicate=predicate, how="left")
|
||||
assert_index_equal(joined.index, pts.index)
|
||||
|
||||
def test_sjoin_shared_attribute(self, naturalearth_lowres, naturalearth_cities):
|
||||
countries = read_file(naturalearth_lowres)
|
||||
cities = read_file(naturalearth_cities)
|
||||
countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
|
||||
|
||||
# Add first letter of country/city as an attribute column to be compared
|
||||
countries["firstLetter"] = countries["country"].astype(str).str[0]
|
||||
cities["firstLetter"] = cities["name"].astype(str).str[0]
|
||||
|
||||
result = sjoin(cities, countries, on_attribute="firstLetter")
|
||||
assert (
|
||||
result["country"].astype(str).str[0] == result["name"].astype(str).str[0]
|
||||
).all()
|
||||
assert result.shape == (23, 5)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr1_key_change_dict, attr2_key_change_dict",
|
||||
[
|
||||
pytest.param(
|
||||
{True: "merge", False: "no_merge"},
|
||||
{True: "merge", False: "no_merge"},
|
||||
id="merge on string attributes",
|
||||
),
|
||||
pytest.param(
|
||||
{True: 2, False: 1},
|
||||
{True: 2, False: 1},
|
||||
id="merge on integer attributes",
|
||||
),
|
||||
pytest.param(
|
||||
{True: True, False: False},
|
||||
{True: True, False: False},
|
||||
id="merge on boolean attributes",
|
||||
),
|
||||
pytest.param(
|
||||
{True: True, False: False},
|
||||
{True: "merge", False: "no_merge"},
|
||||
id="merge on mixed attributes",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sjoin_multiple_attributes_datatypes(
|
||||
self, dfs_shared_attribute, attr1_key_change_dict, attr2_key_change_dict
|
||||
):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
left_gdf["attr1"] = left_gdf["attr1"].map(attr1_key_change_dict)
|
||||
left_gdf["attr2"] = left_gdf["attr2"].map(attr2_key_change_dict)
|
||||
right_gdf["attr1"] = right_gdf["attr1"].map(attr1_key_change_dict)
|
||||
right_gdf["attr2"] = right_gdf["attr2"].map(attr2_key_change_dict)
|
||||
|
||||
joined = sjoin(left_gdf, right_gdf, on_attribute=("attr1", "attr2"))
|
||||
assert (["A", "B"] == joined["attr_tracker"].values).all()
|
||||
|
||||
def test_sjoin_multiple_attributes_check_header(self, dfs_shared_attribute):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
joined = sjoin(left_gdf, right_gdf, on_attribute=["attr1"])
|
||||
|
||||
assert (["A", "B", "E", "F"] == joined["attr_tracker"].values).all()
|
||||
assert {"attr2_left", "attr2_right", "attr1"}.issubset(joined.columns)
|
||||
assert "attr1_left" not in joined
|
||||
|
||||
def test_sjoin_error_column_does_not_exist(self, dfs_shared_attribute):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
right_gdf_dropped_attr = right_gdf.drop("attr1", axis=1)
|
||||
left_gdf_dropped_attr = left_gdf.drop("attr1", axis=1)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Expected column attr1 is missing from the right dataframe.",
|
||||
):
|
||||
sjoin(left_gdf, right_gdf_dropped_attr, on_attribute="attr1")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Expected column attr1 is missing from the left dataframe.",
|
||||
):
|
||||
sjoin(left_gdf_dropped_attr, right_gdf, on_attribute="attr1")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Expected column attr1 is missing from both of the dataframes.",
|
||||
):
|
||||
sjoin(left_gdf_dropped_attr, right_gdf_dropped_attr, on_attribute="attr1")
|
||||
|
||||
def test_sjoin_error_use_geometry_column(self, dfs_shared_attribute):
|
||||
left_gdf, right_gdf = dfs_shared_attribute
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Active geometry column cannot be used as an input for "
|
||||
"on_attribute parameter.",
|
||||
):
|
||||
sjoin(left_gdf, right_gdf, on_attribute="geometry")
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Active geometry column cannot be used as an input for "
|
||||
"on_attribute parameter.",
|
||||
):
|
||||
sjoin(left_gdf, right_gdf, on_attribute=["attr1", "geometry"])
|
||||
|
||||
|
||||
class TestIndexNames:
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_preserve_index_names(self, how):
|
||||
# preserve names of both left and right index
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"geometry": geoms}, index=pd.Index([1, 2], name="myidx1"))
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx2")
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"myidx1": [1, 2], "geometry": geoms, "myidx2": ["a", "b"]}
|
||||
).set_index("myidx1")
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"myidx2": ["a", "b"], "myidx1": [1, 2], "geometry": geoms},
|
||||
).set_index("myidx2")
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
# but also add suffixes if both left and right have the same index
|
||||
df1.index.name = "myidx"
|
||||
df2.index.name = "myidx"
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
|
||||
).set_index("myidx_left")
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_right": ["a", "b"], "myidx_left": [1, 2], "geometry": geoms},
|
||||
).set_index("myidx_right")
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_preserve_index_names_multiindex(self, how):
|
||||
# preserve names of both left and right index
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame(
|
||||
{"geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx1", "level2"]
|
||||
),
|
||||
)
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("c", 3), ("d", 4)], names=["myidx2", None]
|
||||
),
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
expected_base = GeoDataFrame(
|
||||
{
|
||||
"myidx1": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
"geometry": geoms,
|
||||
"myidx2": ["c", "d"],
|
||||
"index_right1": [3, 4],
|
||||
}
|
||||
)
|
||||
if how in ("inner", "left"):
|
||||
expected = expected_base.set_index(["myidx1", "level2"])
|
||||
else:
|
||||
# right join
|
||||
expected = expected_base.set_index(["myidx2", "index_right1"])
|
||||
# if it was originally None, that is preserved
|
||||
expected.index.names = ["myidx2", None]
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
# but also add suffixes if both left and right have the same index
|
||||
df1.index.names = ["myidx", "level2"]
|
||||
df2.index.names = ["myidx", None]
|
||||
result = sjoin(df1, df2, how=how)
|
||||
expected_base = GeoDataFrame(
|
||||
{
|
||||
"myidx_left": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
"geometry": geoms,
|
||||
"myidx_right": ["c", "d"],
|
||||
"index_right1": [3, 4],
|
||||
}
|
||||
)
|
||||
if how in ("inner", "left"):
|
||||
expected = expected_base.set_index(["myidx_left", "level2"])
|
||||
else:
|
||||
# right join
|
||||
expected = expected_base.set_index(["myidx_right", "index_right1"])
|
||||
# if it was originally None, that is preserved
|
||||
expected.index.names = ["myidx_right", None]
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_duplicate_column_index_name(self, how):
|
||||
# case where a left column and the right index have the same name or the
|
||||
# other way around -> correctly add suffix or preserve index name
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms}, index=pd.Index(["a", "b"], name="myidx")
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_left": [1, 2], "geometry": geoms, "myidx_right": ["a", "b"]}
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
|
||||
index=pd.Index(["a", "b"], name="myidx_right"),
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
result = sjoin(df2, df1, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
|
||||
index=pd.Index(["a", "b"], name="myidx_left"),
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"myidx_left": ["a", "b"], "myidx_right": [1, 2], "geometry": geoms},
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_duplicate_column_index_name_multiindex(self, how):
|
||||
# case where a left column and the right index have the same name or the
|
||||
# other way around -> correctly add suffix or preserve index name
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"myidx": [1, 2], "geometry": geoms})
|
||||
df2 = GeoDataFrame(
|
||||
{"geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx", "level2"]
|
||||
),
|
||||
)
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"myidx_left": [1, 2],
|
||||
"geometry": geoms,
|
||||
"myidx_right": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
}
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{"index_left": [0, 1], "myidx_left": [1, 2], "geometry": geoms},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx_right", "level2"]
|
||||
),
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
result = sjoin(df2, df1, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{"geometry": geoms, "index_right": [0, 1], "myidx_right": [1, 2]},
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[("a", 1), ("b", 2)], names=["myidx_left", "level2"]
|
||||
),
|
||||
)
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
"myidx_left": ["a", "b"],
|
||||
"level2": [1, 2],
|
||||
"myidx_right": [1, 2],
|
||||
"geometry": geoms,
|
||||
},
|
||||
)
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_conflicting_column_index_name(self, how):
|
||||
# test case where the auto-generated index name conflicts
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame({"index_right": [1, 2], "geometry": geoms})
|
||||
df2 = GeoDataFrame({"geometry": geoms})
|
||||
with pytest.raises(ValueError, match="'index_right' cannot be a column name"):
|
||||
sjoin(df1, df2, how=how)
|
||||
|
||||
@pytest.mark.parametrize("how", ["inner", "left", "right"])
|
||||
def test_conflicting_column_with_suffix(self, how):
|
||||
# test case where the auto-generated index name conflicts
|
||||
geoms = [Point(1, 1), Point(2, 2)]
|
||||
df1 = GeoDataFrame(
|
||||
{"column": [1, 2], "column_right": ["a", "b"], "geometry": geoms}
|
||||
)
|
||||
df2 = GeoDataFrame({"column": [0.1, 0.2], "geometry": geoms})
|
||||
|
||||
result = sjoin(df1, df2, how=how)
|
||||
if how in ("inner", "left"):
|
||||
expected = GeoDataFrame(
|
||||
{1: [1, 2], 2: ["a", "b"], 3: geoms, 4: [0, 1], 5: [0.1, 0.2]}
|
||||
)
|
||||
expected.columns = [
|
||||
"column_left",
|
||||
"column_right",
|
||||
"geometry",
|
||||
"index_right",
|
||||
"column_right",
|
||||
]
|
||||
else:
|
||||
# right join
|
||||
expected = GeoDataFrame(
|
||||
{1: [0, 1], 2: [1, 2], 3: ["a", "b"], 4: [0.1, 0.2], 5: geoms}
|
||||
)
|
||||
expected.columns = [
|
||||
"index_left",
|
||||
"column_left",
|
||||
"column_right",
|
||||
"column_right",
|
||||
"geometry",
|
||||
]
|
||||
expected = expected.set_geometry("geometry")
|
||||
assert_geodataframe_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("_setup_class_nybb_filename")
|
||||
class TestSpatialJoinNYBB:
|
||||
def setup_method(self):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
self.polydf = read_file(nybb_filename)
|
||||
self.polydf = read_file(self.nybb_filename)
|
||||
self.crs = self.polydf.crs
|
||||
N = 20
|
||||
b = [int(x) for x in self.polydf.total_bounds]
|
||||
@@ -527,7 +951,7 @@ class TestSpatialJoinNYBB:
|
||||
|
||||
def test_sjoin_empty_geometries(self):
|
||||
# https://github.com/geopandas/geopandas/issues/944
|
||||
empty = GeoDataFrame(geometry=[GeometryCollection()] * 3)
|
||||
empty = GeoDataFrame(geometry=[GeometryCollection()] * 3, crs=self.crs)
|
||||
df = sjoin(pd.concat([self.pointdf, empty]), self.polydf, how="left")
|
||||
assert df.shape == (24, 8)
|
||||
df2 = sjoin(self.pointdf, pd.concat([self.polydf, empty]), how="left")
|
||||
@@ -542,8 +966,8 @@ class TestSpatialJoinNYBB:
|
||||
assert sjoin(empty, self.pointdf, how="inner", predicate=predicate).empty
|
||||
assert sjoin(empty, self.pointdf, how="left", predicate=predicate).empty
|
||||
|
||||
def test_empty_sjoin_return_duplicated_columns(self):
|
||||
nybb = geopandas.read_file(geopandas.datasets.get_path("nybb"))
|
||||
def test_empty_sjoin_return_duplicated_columns(self, nybb_filename):
|
||||
nybb = geopandas.read_file(nybb_filename)
|
||||
nybb2 = nybb.copy()
|
||||
nybb2.geometry = nybb2.translate(200000) # to get non-overlapping
|
||||
|
||||
@@ -553,45 +977,24 @@ class TestSpatialJoinNYBB:
|
||||
assert "BoroCode_left" in result.columns
|
||||
|
||||
|
||||
class TestSpatialJoinNaturalEarth:
|
||||
def setup_method(self):
|
||||
world_path = geopandas.datasets.get_path("naturalearth_lowres")
|
||||
cities_path = geopandas.datasets.get_path("naturalearth_cities")
|
||||
self.world = read_file(world_path)
|
||||
self.cities = read_file(cities_path)
|
||||
|
||||
def test_sjoin_inner(self):
|
||||
# GH637
|
||||
countries = self.world[["geometry", "name"]]
|
||||
countries = countries.rename(columns={"name": "country"})
|
||||
cities_with_country = sjoin(
|
||||
self.cities, countries, how="inner", predicate="intersects"
|
||||
)
|
||||
assert cities_with_country.shape == (213, 4)
|
||||
@pytest.fixture
|
||||
def world(naturalearth_lowres):
|
||||
return read_file(naturalearth_lowres)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
TEST_NEAREST,
|
||||
reason=("This test can only be run _without_ PyGEOS >= 0.10 installed"),
|
||||
)
|
||||
def test_no_nearest_all():
|
||||
df1 = geopandas.GeoDataFrame({"geometry": []})
|
||||
df2 = geopandas.GeoDataFrame({"geometry": []})
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Currently, only PyGEOS >= 0.10.0 or Shapely >= 2.0 supports",
|
||||
):
|
||||
sjoin_nearest(df1, df2)
|
||||
@pytest.fixture
|
||||
def cities(naturalearth_cities):
|
||||
return read_file(naturalearth_cities)
|
||||
|
||||
|
||||
def test_sjoin_inner(world, cities):
|
||||
# GH637
|
||||
countries = world[["geometry", "name"]]
|
||||
countries = countries.rename(columns={"name": "country"})
|
||||
cities_with_country = sjoin(cities, countries, how="inner", predicate="intersects")
|
||||
assert cities_with_country.shape == (213, 4)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not TEST_NEAREST,
|
||||
reason=(
|
||||
"PyGEOS >= 0.10.0"
|
||||
" must be installed and activated via the geopandas.compat module to"
|
||||
" test sjoin_nearest"
|
||||
),
|
||||
)
|
||||
class TestNearest:
|
||||
@pytest.mark.parametrize(
|
||||
"how_kwargs", ({}, {"how": "inner"}, {"how": "left"}, {"how": "right"})
|
||||
@@ -900,10 +1303,10 @@ class TestNearest:
|
||||
assert_geodataframe_equal(expected_gdf, joined)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS")
|
||||
def test_sjoin_nearest_inner(self):
|
||||
def test_sjoin_nearest_inner(self, naturalearth_lowres, naturalearth_cities):
|
||||
# check equivalency of left and inner join
|
||||
countries = read_file(geopandas.datasets.get_path("naturalearth_lowres"))
|
||||
cities = read_file(geopandas.datasets.get_path("naturalearth_cities"))
|
||||
countries = read_file(naturalearth_lowres)
|
||||
cities = read_file(naturalearth_cities)
|
||||
countries = countries[["geometry", "name"]].rename(columns={"name": "country"})
|
||||
|
||||
# default: inner and left give the same result
|
||||
@@ -927,19 +1330,8 @@ class TestNearest:
|
||||
result5["index_right"] = result5["index_right"].astype("int64")
|
||||
assert_geodataframe_equal(result5, result4, check_like=True)
|
||||
|
||||
expected_index_uncapped = (
|
||||
[1, 3, 3, 1, 2] if compat.PANDAS_GE_22 else [1, 1, 3, 3, 2]
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not (compat.USE_SHAPELY_20),
|
||||
reason=(
|
||||
"shapely >= 2.0 is required to run sjoin_nearest"
|
||||
"with parameter `exclusive` set"
|
||||
),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"max_distance,expected", [(None, expected_index_uncapped), (1.1, [3, 3, 1, 2])]
|
||||
"max_distance,expected", [(None, [1, 3, 3, 1, 2]), (1.1, [3, 3, 1, 2])]
|
||||
)
|
||||
def test_sjoin_nearest_exclusive(self, max_distance, expected):
|
||||
geoms = shapely.points(np.arange(3), np.arange(3))
|
||||
|
||||
Reference in New Issue
Block a user