258 lines
9.0 KiB
Python
258 lines
9.0 KiB
Python
"""
|
|
geopandas.clip
|
|
==============
|
|
|
|
A module to clip vector data using GeoPandas.
|
|
|
|
"""
|
|
|
|
import warnings
|
|
|
|
import numpy as np
|
|
import pandas.api.types
|
|
|
|
from shapely.geometry import MultiPolygon, Polygon, box
|
|
|
|
from geopandas import GeoDataFrame, GeoSeries
|
|
from geopandas.array import _check_crs, _crs_mismatch_warn
|
|
|
|
|
|
def _mask_is_list_like_rectangle(mask):
|
|
return pandas.api.types.is_list_like(mask) and not isinstance(
|
|
mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)
|
|
)
|
|
|
|
|
|
def _clip_gdf_with_mask(gdf, mask, sort=False):
|
|
"""Clip geometry to the polygon/rectangle extent.
|
|
|
|
Clip an input GeoDataFrame to the polygon extent of the polygon
|
|
parameter.
|
|
|
|
Parameters
|
|
----------
|
|
gdf : GeoDataFrame, GeoSeries
|
|
Dataframe to clip.
|
|
|
|
mask : (Multi)Polygon, list-like
|
|
Reference polygon/rectangle for clipping.
|
|
|
|
sort : boolean, default False
|
|
If True, the results will be sorted in ascending order using the
|
|
geometries' indexes as the primary key.
|
|
|
|
Returns
|
|
-------
|
|
GeoDataFrame
|
|
The returned GeoDataFrame is a clipped subset of gdf
|
|
that intersects with polygon/rectangle.
|
|
"""
|
|
clipping_by_rectangle = _mask_is_list_like_rectangle(mask)
|
|
if clipping_by_rectangle:
|
|
intersection_polygon = box(*mask)
|
|
else:
|
|
intersection_polygon = mask
|
|
|
|
gdf_sub = gdf.iloc[
|
|
gdf.sindex.query(intersection_polygon, predicate="intersects", sort=sort)
|
|
]
|
|
|
|
# For performance reasons points don't need to be intersected with poly
|
|
non_point_mask = gdf_sub.geom_type != "Point"
|
|
|
|
if not non_point_mask.any():
|
|
# only points, directly return
|
|
return gdf_sub
|
|
|
|
# Clip the data with the polygon
|
|
if isinstance(gdf_sub, GeoDataFrame):
|
|
clipped = gdf_sub.copy()
|
|
if clipping_by_rectangle:
|
|
clipped.loc[non_point_mask, clipped._geometry_column_name] = (
|
|
gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask)
|
|
)
|
|
else:
|
|
clipped.loc[non_point_mask, clipped._geometry_column_name] = (
|
|
gdf_sub.geometry.values[non_point_mask].intersection(mask)
|
|
)
|
|
else:
|
|
# GeoSeries
|
|
clipped = gdf_sub.copy()
|
|
if clipping_by_rectangle:
|
|
clipped[non_point_mask] = gdf_sub.values[non_point_mask].clip_by_rect(*mask)
|
|
else:
|
|
clipped[non_point_mask] = gdf_sub.values[non_point_mask].intersection(mask)
|
|
|
|
if clipping_by_rectangle:
|
|
# clip_by_rect might return empty geometry collections in edge cases
|
|
clipped = clipped[~clipped.is_empty]
|
|
return clipped
|
|
|
|
|
|
def clip(gdf, mask, keep_geom_type=False, sort=False):
|
|
"""Clip points, lines, or polygon geometries to the mask extent.
|
|
|
|
Both layers must be in the same Coordinate Reference System (CRS).
|
|
The ``gdf`` will be clipped to the full extent of the clip object.
|
|
|
|
If there are multiple polygons in mask, data from ``gdf`` will be
|
|
clipped to the total boundary of all polygons in mask.
|
|
|
|
If the ``mask`` is list-like with four elements ``(minx, miny, maxx, maxy)``, a
|
|
faster rectangle clipping algorithm will be used. Note that this can lead to
|
|
slightly different results in edge cases, e.g. if a line would be reduced to a
|
|
point, this point might not be returned.
|
|
The geometry is clipped in a fast but possibly dirty way. The output is not
|
|
guaranteed to be valid. No exceptions will be raised for topological errors.
|
|
|
|
Parameters
|
|
----------
|
|
gdf : GeoDataFrame or GeoSeries
|
|
Vector layer (point, line, polygon) to be clipped to mask.
|
|
mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like
|
|
Polygon vector layer used to clip ``gdf``.
|
|
The mask's geometry is dissolved into one geometric feature
|
|
and intersected with ``gdf``.
|
|
If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``,
|
|
``clip`` will use a faster rectangle clipping (:meth:`~GeoSeries.clip_by_rect`),
|
|
possibly leading to slightly different results.
|
|
keep_geom_type : boolean, default False
|
|
If True, return only geometries of original type in case of intersection
|
|
resulting in multiple geometry types or GeometryCollections.
|
|
If False, return all resulting geometries (potentially mixed-types).
|
|
sort : boolean, default False
|
|
If True, the results will be sorted in ascending order using the
|
|
geometries' indexes as the primary key.
|
|
|
|
Returns
|
|
-------
|
|
GeoDataFrame or GeoSeries
|
|
Vector data (points, lines, polygons) from ``gdf`` clipped to
|
|
polygon boundary from mask.
|
|
|
|
See also
|
|
--------
|
|
GeoDataFrame.clip : equivalent GeoDataFrame method
|
|
GeoSeries.clip : equivalent GeoSeries method
|
|
|
|
Examples
|
|
--------
|
|
Clip points (grocery stores) with polygons (the Near West Side community):
|
|
|
|
>>> import geodatasets
|
|
>>> chicago = geopandas.read_file(
|
|
... geodatasets.get_path("geoda.chicago_health")
|
|
... )
|
|
>>> near_west_side = chicago[chicago["community"] == "NEAR WEST SIDE"]
|
|
>>> groceries = geopandas.read_file(
|
|
... geodatasets.get_path("geoda.groceries")
|
|
... ).to_crs(chicago.crs)
|
|
>>> groceries.shape
|
|
(148, 8)
|
|
|
|
>>> nws_groceries = geopandas.clip(groceries, near_west_side)
|
|
>>> nws_groceries.shape
|
|
(7, 8)
|
|
"""
|
|
if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
|
|
raise TypeError(
|
|
"'gdf' should be GeoDataFrame or GeoSeries, got {}".format(type(gdf))
|
|
)
|
|
|
|
mask_is_list_like = _mask_is_list_like_rectangle(mask)
|
|
if (
|
|
not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon))
|
|
and not mask_is_list_like
|
|
):
|
|
raise TypeError(
|
|
"'mask' should be GeoDataFrame, GeoSeries,"
|
|
f"(Multi)Polygon or list-like, got {type(mask)}"
|
|
)
|
|
|
|
if mask_is_list_like and len(mask) != 4:
|
|
raise TypeError(
|
|
"If 'mask' is list-like, it must have four values (minx, miny, maxx, maxy)"
|
|
)
|
|
|
|
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
|
if not _check_crs(gdf, mask):
|
|
_crs_mismatch_warn(gdf, mask, stacklevel=3)
|
|
|
|
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
|
box_mask = mask.total_bounds
|
|
elif mask_is_list_like:
|
|
box_mask = mask
|
|
else:
|
|
# Avoid empty tuple returned by .bounds when geometry is empty. A tuple of
|
|
# all nan values is consistent with the behavior of
|
|
# {GeoSeries, GeoDataFrame}.total_bounds for empty geometries.
|
|
# TODO(shapely) can simpely use mask.bounds once relying on Shapely 2.0
|
|
box_mask = mask.bounds if not mask.is_empty else (np.nan,) * 4
|
|
box_gdf = gdf.total_bounds
|
|
if not (
|
|
((box_mask[0] <= box_gdf[2]) and (box_gdf[0] <= box_mask[2]))
|
|
and ((box_mask[1] <= box_gdf[3]) and (box_gdf[1] <= box_mask[3]))
|
|
):
|
|
return gdf.iloc[:0]
|
|
|
|
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
|
combined_mask = mask.geometry.union_all()
|
|
else:
|
|
combined_mask = mask
|
|
|
|
clipped = _clip_gdf_with_mask(gdf, combined_mask, sort=sort)
|
|
|
|
if keep_geom_type:
|
|
geomcoll_concat = (clipped.geom_type == "GeometryCollection").any()
|
|
geomcoll_orig = (gdf.geom_type == "GeometryCollection").any()
|
|
|
|
new_collection = geomcoll_concat and not geomcoll_orig
|
|
|
|
if geomcoll_orig:
|
|
warnings.warn(
|
|
"keep_geom_type can not be called on a "
|
|
"GeoDataFrame with GeometryCollection.",
|
|
stacklevel=2,
|
|
)
|
|
else:
|
|
polys = ["Polygon", "MultiPolygon"]
|
|
lines = ["LineString", "MultiLineString", "LinearRing"]
|
|
points = ["Point", "MultiPoint"]
|
|
|
|
# Check that the gdf for multiple geom types (points, lines and/or polys)
|
|
orig_types_total = sum(
|
|
[
|
|
gdf.geom_type.isin(polys).any(),
|
|
gdf.geom_type.isin(lines).any(),
|
|
gdf.geom_type.isin(points).any(),
|
|
]
|
|
)
|
|
|
|
# Check how many geometry types are in the clipped GeoDataFrame
|
|
clip_types_total = sum(
|
|
[
|
|
clipped.geom_type.isin(polys).any(),
|
|
clipped.geom_type.isin(lines).any(),
|
|
clipped.geom_type.isin(points).any(),
|
|
]
|
|
)
|
|
|
|
# Check there aren't any new geom types in the clipped GeoDataFrame
|
|
more_types = orig_types_total < clip_types_total
|
|
|
|
if orig_types_total > 1:
|
|
warnings.warn(
|
|
"keep_geom_type can not be called on a mixed type GeoDataFrame.",
|
|
stacklevel=2,
|
|
)
|
|
elif new_collection or more_types:
|
|
orig_type = gdf.geom_type.iloc[0]
|
|
if new_collection:
|
|
clipped = clipped.explode(index_parts=False)
|
|
if orig_type in polys:
|
|
clipped = clipped.loc[clipped.geom_type.isin(polys)]
|
|
elif orig_type in lines:
|
|
clipped = clipped.loc[clipped.geom_type.isin(lines)]
|
|
|
|
return clipped
|