that's too much!

This commit is contained in:
2024-12-19 20:22:56 -08:00
parent 0020a609dd
commit 32cd60e92b
8443 changed files with 1446950 additions and 42 deletions

View File

@@ -0,0 +1,9 @@
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
]
}

View File

@@ -0,0 +1,38 @@
import subprocess
import sys
def test_no_additional_imports():
# test that 'import geopandas' does not import any of the optional or
# development dependencies
blacklist = {
"pytest",
"py",
"ipython",
# fiona actually gets imported if installed (but error suppressed until used)
# "fiona",
# "matplotlib", # matplotlib gets imported by pandas, see below
"mapclassify",
# 'rtree', # rtree actually gets imported if installed
"sqlalchemy",
"psycopg2",
"geopy",
"geoalchemy2",
"matplotlib",
}
code = """
import sys
import geopandas
blacklist = {0!r}
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
if mods:
sys.stderr.write('err: geopandas should not import: {{}}'.format(', '.join(mods)))
sys.exit(len(mods))
""".format(
blacklist
)
call = [sys.executable, "-c", code]
returncode = subprocess.run(call).returncode
assert returncode == 0

View File

@@ -0,0 +1,986 @@
import random
import numpy as np
import pandas as pd
from pyproj import CRS
import shapely
import shapely.affinity
import shapely.geometry
from shapely.geometry.base import CAP_STYLE, JOIN_STYLE, BaseGeometry
import shapely.wkb
import shapely.wkt
try:
from shapely import geos_version
except ImportError:
from shapely._buildcfg import geos_version
import geopandas
from geopandas.array import (
GeometryArray,
from_shapely,
from_wkb,
from_wkt,
points_from_xy,
to_wkb,
to_wkt,
_check_crs,
_crs_mismatch_warn,
)
import geopandas._compat as compat
import pytest
triangle_no_missing = [
shapely.geometry.Polygon([(random.random(), random.random()) for i in range(3)])
for _ in range(10)
]
triangles = triangle_no_missing + [shapely.wkt.loads("POLYGON EMPTY"), None]
T = from_shapely(triangles)
points_no_missing = [
shapely.geometry.Point(random.random(), random.random()) for _ in range(20)
]
points = points_no_missing + [None]
P = from_shapely(points)
def equal_geometries(result, expected):
for r, e in zip(result, expected):
if r is None or e is None:
if not (r is None and e is None):
return False
elif not r.equals(e):
return False
return True
def test_points():
x = np.arange(10).astype(np.float64)
y = np.arange(10).astype(np.float64) ** 2
points = points_from_xy(x, y)
assert isinstance(points, GeometryArray)
for i in range(10):
assert isinstance(points[i], shapely.geometry.Point)
assert points[i].x == x[i]
assert points[i].y == y[i]
def test_points_from_xy():
# testing the top-level interface
# using DataFrame column
df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)])
gs = [shapely.geometry.Point(x, x) for x in range(10)]
gsz = [shapely.geometry.Point(x, x, x) for x in range(10)]
geometry1 = geopandas.points_from_xy(df["x"], df["y"])
geometry2 = geopandas.points_from_xy(df["x"], df["y"], df["z"])
assert isinstance(geometry1, GeometryArray)
assert isinstance(geometry2, GeometryArray)
assert list(geometry1) == gs
assert list(geometry2) == gsz
# using Series or numpy arrays or lists
for s in [pd.Series(range(10)), np.arange(10), list(range(10))]:
geometry1 = geopandas.points_from_xy(s, s)
geometry2 = geopandas.points_from_xy(s, s, s)
assert isinstance(geometry1, GeometryArray)
assert isinstance(geometry2, GeometryArray)
assert list(geometry1) == gs
assert list(geometry2) == gsz
# using different lengths should throw error
arr_10 = np.arange(10)
arr_20 = np.arange(20)
with pytest.raises(ValueError):
geopandas.points_from_xy(x=arr_10, y=arr_20)
geopandas.points_from_xy(x=arr_10, y=arr_10, z=arr_20)
# Using incomplete arguments should throw error
with pytest.raises(TypeError):
geopandas.points_from_xy(x=s)
geopandas.points_from_xy(y=s)
geopandas.points_from_xy(z=s)
def test_from_shapely():
assert isinstance(T, GeometryArray)
assert equal_geometries(T, triangles)
def test_from_shapely_geo_interface():
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
@property
def __geo_interface__(self):
return {"type": "Point", "coordinates": (self.x, self.y)}
result = from_shapely([Point(1.0, 2.0), Point(3.0, 4.0)])
expected = from_shapely(
[shapely.geometry.Point(1.0, 2.0), shapely.geometry.Point(3.0, 4.0)]
)
assert all(v.equals(t) for v, t in zip(result, expected))
def test_from_wkb():
# list
L_wkb = [p.wkb for p in points_no_missing]
res = from_wkb(L_wkb)
assert isinstance(res, GeometryArray)
assert all(v.equals(t) for v, t in zip(res, points_no_missing))
# array
res = from_wkb(np.array(L_wkb, dtype=object))
assert isinstance(res, GeometryArray)
assert all(v.equals(t) for v, t in zip(res, points_no_missing))
# missing values
# TODO(pygeos) does not support empty strings, np.nan, or pd.NA
missing_values = [None]
if not (compat.USE_SHAPELY_20 or compat.USE_PYGEOS):
missing_values.extend([b"", np.nan])
missing_values.append(pd.NA)
res = from_wkb(missing_values)
np.testing.assert_array_equal(res, np.full(len(missing_values), None))
# single MultiPolygon
multi_poly = shapely.geometry.MultiPolygon(
[shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
)
res = from_wkb([multi_poly.wkb])
assert res[0] == multi_poly
def test_from_wkb_hex():
geometry_hex = ["0101000000CDCCCCCCCCCC1440CDCCCCCCCC0C4A40"]
res = from_wkb(geometry_hex)
assert isinstance(res, GeometryArray)
# array
res = from_wkb(np.array(geometry_hex, dtype=object))
assert isinstance(res, GeometryArray)
def test_to_wkb():
P = from_shapely(points_no_missing)
res = to_wkb(P)
exp = np.array([p.wkb for p in points_no_missing], dtype=object)
assert isinstance(res, np.ndarray)
np.testing.assert_array_equal(res, exp)
res = to_wkb(P, hex=True)
exp = np.array([p.wkb_hex for p in points_no_missing], dtype=object)
assert isinstance(res, np.ndarray)
np.testing.assert_array_equal(res, exp)
# missing values
a = from_shapely([None, points_no_missing[0]])
res = to_wkb(a)
assert res[0] is None
@pytest.mark.parametrize("string_type", ["str", "bytes"])
def test_from_wkt(string_type):
if string_type == "str":
f = str
else:
def f(x):
return bytes(x, "utf8")
# list
L_wkt = [f(p.wkt) for p in points_no_missing]
res = from_wkt(L_wkt)
assert isinstance(res, GeometryArray)
tol = 0.5 * 10 ** (-6)
assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing))
assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing))
# array
res = from_wkt(np.array(L_wkt, dtype=object))
assert isinstance(res, GeometryArray)
assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing))
# missing values
# TODO(pygeos) does not support empty strings, np.nan, or pd.NA
missing_values = [None]
if not (compat.USE_SHAPELY_20 or compat.USE_PYGEOS):
missing_values.extend([f(""), np.nan])
missing_values.append(pd.NA)
res = from_wkb(missing_values)
np.testing.assert_array_equal(res, np.full(len(missing_values), None))
# single MultiPolygon
multi_poly = shapely.geometry.MultiPolygon(
[shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
)
res = from_wkt([f(multi_poly.wkt)])
assert res[0] == multi_poly
def test_to_wkt():
P = from_shapely(points_no_missing)
res = to_wkt(P, rounding_precision=-1)
exp = np.array([p.wkt for p in points_no_missing], dtype=object)
assert isinstance(res, np.ndarray)
np.testing.assert_array_equal(res, exp)
# missing values
a = from_shapely([None, points_no_missing[0]])
res = to_wkt(a)
assert res[0] is None
def test_data():
arr = from_shapely(points_no_missing)
with pytest.warns(DeprecationWarning):
np_arr = arr.data
assert isinstance(np_arr, np.ndarray)
if compat.USE_PYGEOS:
np_arr2 = arr.to_numpy()
assert isinstance(np_arr2[0], BaseGeometry)
np_arr3 = np.asarray(arr)
assert isinstance(np_arr3[0], BaseGeometry)
else:
assert arr.to_numpy() is np_arr
assert np.asarray(arr) is np_arr
def test_as_array():
arr = from_shapely(points_no_missing)
np_arr1 = np.asarray(arr)
np_arr2 = arr.to_numpy()
assert np_arr1[0] == arr[0]
np.testing.assert_array_equal(np_arr1, np_arr2)
@pytest.mark.parametrize(
"attr,args",
[
("contains", ()),
("covers", ()),
("crosses", ()),
("disjoint", ()),
("geom_equals", ()),
("intersects", ()),
("overlaps", ()),
("touches", ()),
("within", ()),
("geom_equals_exact", (0.1,)),
("geom_almost_equals", (3,)),
],
)
def test_predicates_vector_scalar(attr, args):
na_value = False
point = points[0]
tri = triangles[0]
for other in [point, tri, shapely.geometry.Polygon()]:
result = getattr(T, attr)(other, *args)
assert isinstance(result, np.ndarray)
assert result.dtype == bool
expected = [
getattr(tri, attr if "geom" not in attr else attr[5:])(other, *args)
if tri is not None
else na_value
for tri in triangles
]
assert result.tolist() == expected
# TODO other is missing
@pytest.mark.parametrize(
"attr,args",
[
("contains", ()),
("covers", ()),
("crosses", ()),
("disjoint", ()),
("geom_equals", ()),
("intersects", ()),
("overlaps", ()),
("touches", ()),
("within", ()),
("geom_equals_exact", (0.1,)),
("geom_almost_equals", (3,)),
],
)
def test_predicates_vector_vector(attr, args):
na_value = False
empty_value = True if attr == "disjoint" else False
A = (
[shapely.geometry.Polygon(), None]
+ [
shapely.geometry.Polygon(
[(random.random(), random.random()) for i in range(3)]
)
for _ in range(100)
]
+ [None]
)
B = [
shapely.geometry.Polygon([(random.random(), random.random()) for i in range(3)])
for _ in range(100)
] + [shapely.geometry.Polygon(), None, None]
vec_A = from_shapely(A)
vec_B = from_shapely(B)
result = getattr(vec_A, attr)(vec_B, *args)
assert isinstance(result, np.ndarray)
assert result.dtype == bool
expected = []
for a, b in zip(A, B):
if a is None or b is None:
expected.append(na_value)
elif a.is_empty or b.is_empty:
expected.append(empty_value)
else:
expected.append(
getattr(a, attr if "geom" not in attr else attr[5:])(b, *args)
)
assert result.tolist() == expected
@pytest.mark.parametrize(
"attr",
[
"boundary",
"centroid",
"convex_hull",
"envelope",
"exterior",
# 'interiors',
],
)
def test_unary_geo(attr):
na_value = None
result = getattr(T, attr)
expected = [getattr(t, attr) if t is not None else na_value for t in triangles]
assert equal_geometries(result, expected)
@pytest.mark.parametrize("attr", ["representative_point"])
def test_unary_geo_callable(attr):
na_value = None
result = getattr(T, attr)()
expected = [getattr(t, attr)() if t is not None else na_value for t in triangles]
assert equal_geometries(result, expected)
@pytest.mark.parametrize(
"attr", ["difference", "symmetric_difference", "union", "intersection"]
)
def test_binary_geo_vector(attr):
na_value = None
quads = [shapely.geometry.Polygon(), None]
while len(quads) < 12:
geom = shapely.geometry.Polygon(
[(random.random(), random.random()) for i in range(4)]
)
if geom.is_valid:
quads.append(geom)
Q = from_shapely(quads)
result = getattr(T, attr)(Q)
expected = [
getattr(t, attr)(q) if t is not None and q is not None else na_value
for t, q in zip(triangles, quads)
]
assert equal_geometries(result, expected)
@pytest.mark.parametrize(
"attr", ["difference", "symmetric_difference", "union", "intersection"]
)
def test_binary_geo_scalar(attr):
na_value = None
quads = []
while len(quads) < 1:
geom = shapely.geometry.Polygon(
[(random.random(), random.random()) for i in range(4)]
)
if geom.is_valid:
quads.append(geom)
q = quads[0]
for other in [q, shapely.geometry.Polygon()]:
result = getattr(T, attr)(other)
expected = [
getattr(t, attr)(other) if t is not None else na_value for t in triangles
]
assert equal_geometries(result, expected)
@pytest.mark.parametrize(
"attr",
[
"is_closed",
"is_valid",
"is_empty",
"is_simple",
"has_z",
# for is_ring we raise a warning about the value for Polygon changing
pytest.param(
"is_ring",
marks=[
pytest.mark.filterwarnings("ignore:is_ring:FutureWarning"),
],
),
],
)
def test_unary_predicates(attr):
na_value = False
if attr == "is_simple" and geos_version < (3, 8) and not compat.USE_PYGEOS:
# poly.is_simple raises an error for empty polygon for GEOS < 3.8
with pytest.raises(Exception): # noqa: B017
T.is_simple
vals = triangle_no_missing
V = from_shapely(vals)
else:
vals = triangles
V = T
result = getattr(V, attr)
if attr == "is_simple" and geos_version < (3, 8):
# poly.is_simple raises an error for empty polygon for GEOS < 3.8
# with shapely, pygeos always returns False for all GEOS versions
expected = [
getattr(t, attr) if t is not None and not t.is_empty else na_value
for t in vals
]
elif attr == "is_ring":
expected = [
getattr(t.exterior, attr)
if t is not None and t.exterior is not None
else na_value
for t in vals
]
# empty Linearring.is_ring gives False with Shapely < 2.0
if compat.USE_PYGEOS and not compat.SHAPELY_GE_20:
expected[-2] = True
elif (
attr == "is_closed"
and compat.USE_PYGEOS
and compat.SHAPELY_GE_182
and not compat.SHAPELY_GE_20
):
# In shapely 1.8.2, is_closed was changed to return always True for
# Polygon/MultiPolygon, while PyGEOS returns always False
expected = [False] * len(vals)
else:
expected = [getattr(t, attr) if t is not None else na_value for t in vals]
assert result.tolist() == expected
# for is_ring we raise a warning about the value for Polygon changing
@pytest.mark.filterwarnings("ignore:is_ring:FutureWarning")
def test_is_ring():
g = [
shapely.geometry.LinearRing([(0, 0), (1, 1), (1, -1)]),
shapely.geometry.LineString([(0, 0), (1, 1), (1, -1)]),
shapely.geometry.LineString([(0, 0), (1, 1), (1, -1), (0, 0)]),
shapely.geometry.Polygon([(0, 0), (1, 1), (1, -1)]),
shapely.wkt.loads("POLYGON EMPTY"),
None,
]
expected = [True, False, True, True, True, False]
if not compat.USE_PYGEOS and not compat.SHAPELY_GE_20:
# empty polygon is_ring gives False with Shapely < 2.0
expected[-2] = False
result = from_shapely(g).is_ring
assert result.tolist() == expected
@pytest.mark.parametrize("attr", ["area", "length"])
def test_unary_float(attr):
na_value = np.nan
result = getattr(T, attr)
assert isinstance(result, np.ndarray)
assert result.dtype == np.dtype("float64")
expected = [getattr(t, attr) if t is not None else na_value for t in triangles]
np.testing.assert_allclose(result, expected)
def test_geom_types():
cat = T.geom_type
# empty polygon has GeometryCollection type
assert list(cat) == ["Polygon"] * (len(T) - 1) + [None]
def test_geom_types_null_mixed():
geoms = [
shapely.geometry.Polygon([(0, 0), (0, 1), (1, 1)]),
None,
shapely.geometry.Point(0, 1),
]
G = from_shapely(geoms)
cat = G.geom_type
assert list(cat) == ["Polygon", None, "Point"]
def test_binary_distance():
attr = "distance"
na_value = np.nan
# also use nan for empty
# vector - vector
result = P[: len(T)].distance(T[::-1])
expected = [
getattr(p, attr)(t)
if not ((t is None or t.is_empty) or (p is None or p.is_empty))
else na_value
for t, p in zip(triangles[::-1], points)
]
np.testing.assert_allclose(result, expected)
# vector - scalar
p = points[0]
result = T.distance(p)
expected = [
getattr(t, attr)(p) if not (t is None or t.is_empty) else na_value
for t in triangles
]
np.testing.assert_allclose(result, expected)
# other is empty
result = T.distance(shapely.geometry.Polygon())
expected = [na_value] * len(T)
np.testing.assert_allclose(result, expected)
# TODO other is None
def test_binary_relate():
attr = "relate"
na_value = None
# vector - vector
result = getattr(P[: len(T)], attr)(T[::-1])
expected = [
getattr(p, attr)(t) if t is not None and p is not None else na_value
for t, p in zip(triangles[::-1], points)
]
assert list(result) == expected
# vector - scalar
p = points[0]
result = getattr(T, attr)(p)
expected = [getattr(t, attr)(p) if t is not None else na_value for t in triangles]
assert list(result) == expected
@pytest.mark.parametrize("normalized", [True, False])
def test_binary_project(normalized):
na_value = np.nan
lines = (
[None]
+ [
shapely.geometry.LineString(
[(random.random(), random.random()) for _ in range(2)]
)
for _ in range(len(P) - 2)
]
+ [None]
)
L = from_shapely(lines)
result = L.project(P, normalized=normalized)
expected = [
line.project(p, normalized=normalized)
if line is not None and p is not None
else na_value
for p, line in zip(points, lines)
]
np.testing.assert_allclose(result, expected)
@pytest.mark.parametrize("cap_style", [CAP_STYLE.round, CAP_STYLE.square])
@pytest.mark.parametrize("join_style", [JOIN_STYLE.round, JOIN_STYLE.bevel])
@pytest.mark.parametrize("resolution", [16, 25])
def test_buffer(resolution, cap_style, join_style):
if compat.USE_PYGEOS:
# TODO(pygeos) need to further investigate why this test fails
if cap_style == 1 and join_style == 3:
pytest.skip("failing TODO")
na_value = None
expected = [
p.buffer(0.1, resolution=resolution, cap_style=cap_style, join_style=join_style)
if p is not None
else na_value
for p in points
]
result = P.buffer(
0.1, resolution=resolution, cap_style=cap_style, join_style=join_style
)
assert equal_geometries(expected, result)
dist = np.array([0.1] * len(P))
result = P.buffer(
dist, resolution=resolution, cap_style=cap_style, join_style=join_style
)
assert equal_geometries(expected, result)
def test_simplify():
triangles = [
shapely.geometry.Polygon(
[(random.random(), random.random()) for i in range(3)]
).buffer(10)
for _ in range(10)
]
T = from_shapely(triangles)
result = T.simplify(1)
expected = [t.simplify(1) for t in triangles]
assert all(a.equals(b) for a, b in zip(expected, result))
def test_unary_union():
geoms = [
shapely.geometry.Polygon([(0, 0), (0, 1), (1, 1)]),
shapely.geometry.Polygon([(0, 0), (1, 0), (1, 1)]),
]
G = from_shapely(geoms)
u = G.unary_union()
expected = shapely.geometry.Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
assert u.equals(expected)
@pytest.mark.parametrize(
"attr, arg",
[
("affine_transform", ([0, 1, 1, 0, 0, 0],)),
("translate", ()),
("rotate", (10,)),
("scale", ()),
("skew", ()),
],
)
def test_affinity_methods(attr, arg):
result = getattr(T, attr)(*arg)
expected = [
getattr(shapely.affinity, attr)(t, *arg) if not (t is None or t.is_empty) else t
for t in triangles
]
assert equal_geometries(result, expected)
# def test_coords():
# L = T.exterior.coords
# assert L == [tuple(t.exterior.coords) for t in triangles]
def test_coords_x_y():
na_value = np.nan
result = P.x
expected = [p.x if p is not None else na_value for p in points]
np.testing.assert_allclose(result, expected)
result = P.y
expected = [p.y if p is not None else na_value for p in points]
np.testing.assert_allclose(result, expected)
def test_bounds():
result = T.bounds
expected = [
t.bounds if not (t is None or t.is_empty) else [np.nan] * 4 for t in triangles
]
np.testing.assert_allclose(result, expected)
# additional check for one empty / missing
for geom in [None, shapely.geometry.Polygon()]:
E = from_shapely([geom])
result = E.bounds
assert result.ndim == 2
assert result.dtype == "float64"
np.testing.assert_allclose(result, np.array([[np.nan] * 4]))
# empty array (https://github.com/geopandas/geopandas/issues/1195)
E = from_shapely([])
result = E.bounds
assert result.shape == (0, 4)
assert result.dtype == "float64"
def test_total_bounds():
result = T.total_bounds
bounds = np.array(
[t.bounds if not (t is None or t.is_empty) else [np.nan] * 4 for t in triangles]
)
expected = np.array(
[
np.nanmin(bounds[:, 0]), # minx
np.nanmin(bounds[:, 1]), # miny
np.nanmax(bounds[:, 2]), # maxx
np.nanmax(bounds[:, 3]), # maxy
]
)
np.testing.assert_allclose(result, expected)
# additional check for empty array or one empty / missing
for geoms in [[], [None], [shapely.geometry.Polygon()]]:
E = from_shapely(geoms)
result = E.total_bounds
assert result.ndim == 1
assert result.dtype == "float64"
np.testing.assert_allclose(result, np.array([np.nan] * 4))
def test_getitem():
points = [shapely.geometry.Point(i, i) for i in range(10)]
P = from_shapely(points)
P2 = P[P.area > 0.3]
assert isinstance(P2, GeometryArray)
P3 = P[[1, 3, 5]]
assert len(P3) == 3
assert isinstance(P3, GeometryArray)
assert [p.x for p in P3] == [1, 3, 5]
P4 = P[1::2]
assert len(P4) == 5
assert isinstance(P3, GeometryArray)
assert [p.x for p in P4] == [1, 3, 5, 7, 9]
P5 = P[1]
assert isinstance(P5, shapely.geometry.Point)
assert P5.equals(points[1])
@pytest.mark.parametrize(
"item",
[
geopandas.GeoDataFrame(
geometry=[shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])]
),
geopandas.GeoSeries(
[shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])]
),
np.array([shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])]),
[shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])],
shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
],
)
def test_setitem(item):
points = [shapely.geometry.Point(i, i) for i in range(10)]
P = from_shapely(points)
P[[0]] = item
assert isinstance(P[0], shapely.geometry.Polygon)
def test_equality_ops():
with pytest.raises(ValueError):
P[:5] == P[:7]
a1 = from_shapely([points[1], points[2], points[3]])
a2 = from_shapely([points[1], points[0], points[3]])
res = a1 == a2
assert res.tolist() == [True, False, True]
res = a1 != a2
assert res.tolist() == [False, True, False]
# check the correct expansion of list-like geometry
multi_poly = shapely.geometry.MultiPolygon(
[shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
)
a3 = from_shapely([points[1], points[2], points[3], multi_poly])
res = a3 == multi_poly
assert res.tolist() == [False, False, False, True]
def test_dir():
assert "contains" in dir(P)
assert "data" in dir(P)
def test_chaining():
# contains will give False for empty / missing
T = from_shapely(triangle_no_missing)
assert T.contains(T.centroid).all()
def test_pickle():
import pickle
T2 = pickle.loads(pickle.dumps(T))
# assert (T.data != T2.data).all()
assert T2[-1] is None
assert T2[-2].is_empty
assert T[:-2].geom_equals(T2[:-2]).all()
def test_raise_on_bad_sizes():
with pytest.raises(ValueError) as info:
T.contains(P)
assert "lengths" in str(info.value).lower()
assert "12" in str(info.value)
assert "21" in str(info.value)
def test_buffer_single_multipolygon():
# https://github.com/geopandas/geopandas/issues/1130
multi_poly = shapely.geometry.MultiPolygon(
[shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
)
arr = from_shapely([multi_poly])
result = arr.buffer(1)
expected = [multi_poly.buffer(1)]
equal_geometries(result, expected)
result = arr.buffer(np.array([1]))
equal_geometries(result, expected)
def test_astype_multipolygon():
# https://github.com/geopandas/geopandas/issues/1145
multi_poly = shapely.geometry.MultiPolygon(
[shapely.geometry.box(0, 0, 1, 1), shapely.geometry.box(3, 3, 4, 4)]
)
arr = from_shapely([multi_poly])
result = arr.astype(str)
assert isinstance(result[0], str)
assert result[0] == multi_poly.wkt
# astype(object) does not convert to string
result = arr.astype(object)
assert isinstance(result[0], shapely.geometry.base.BaseGeometry)
# astype(np_dtype) honors the dtype
result = arr.astype(np.dtype("U10"))
assert result.dtype == np.dtype("U10")
assert result[0] == multi_poly.wkt[:10]
def test_check_crs():
t1 = T.copy()
t1.crs = 4326
assert _check_crs(t1, T) is False
assert _check_crs(t1, t1) is True
assert _check_crs(t1, T, allow_none=True) is True
def test_crs_mismatch_warn():
t1 = T.copy()
t2 = T.copy()
t1.crs = 4326
t2.crs = 3857
# two different CRS
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
_crs_mismatch_warn(t1, t2)
# left None
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
_crs_mismatch_warn(T, t2)
# right None
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
_crs_mismatch_warn(t1, T)
@pytest.mark.parametrize("NA", [None, np.nan])
def test_isna(NA):
t1 = T.copy()
t1[0] = NA
assert t1[0] is None
def test_isna_pdNA():
t1 = T.copy()
t1[0] = pd.NA
assert t1[0] is None
def test_shift_has_crs():
t = T.copy()
t.crs = 4326
assert t.shift(1).crs == t.crs
assert t.shift(0).crs == t.crs
assert t.shift(-1).crs == t.crs
def test_unique_has_crs():
t = T.copy()
t.crs = 4326
assert t.unique().crs == t.crs
class TestEstimateUtmCrs:
def setup_method(self):
self.esb = shapely.geometry.Point(-73.9847, 40.7484)
self.sol = shapely.geometry.Point(-74.0446, 40.6893)
self.landmarks = from_shapely([self.esb, self.sol], crs="epsg:4326")
def test_estimate_utm_crs__geographic(self):
assert self.landmarks.estimate_utm_crs() == CRS("EPSG:32618")
assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918")
def test_estimate_utm_crs__projected(self):
assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == CRS(
"EPSG:32618"
)
def test_estimate_utm_crs__antimeridian(self):
antimeridian = from_shapely(
[
shapely.geometry.Point(1722483.900174921, 5228058.6143420935),
shapely.geometry.Point(4624385.494808555, 8692574.544944234),
],
crs="EPSG:3851",
)
assert antimeridian.estimate_utm_crs() == CRS("EPSG:32760")
def test_estimate_utm_crs__out_of_bounds(self):
with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"):
from_shapely(
[shapely.geometry.Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326"
).estimate_utm_crs()
def test_estimate_utm_crs__missing_crs(self):
with pytest.raises(RuntimeError, match="crs must be set"):
from_shapely(
[shapely.geometry.Polygon([(0, 90), (1, 90), (2, 90)])]
).estimate_utm_crs()

View File

@@ -0,0 +1,30 @@
import pytest
from geopandas._compat import import_optional_dependency
def test_import_optional_dependency_present():
# pandas is not optional, but we know it is present
pandas = import_optional_dependency("pandas")
assert pandas is not None
# module imported normally must be same
import pandas as pd
assert pandas == pd
def test_import_optional_dependency_absent():
with pytest.raises(ImportError, match="Missing optional dependency 'foo'"):
import_optional_dependency("foo")
with pytest.raises(ImportError, match="foo is required"):
import_optional_dependency("foo", extra="foo is required")
@pytest.mark.parametrize(
"bad_import", [["foo"], 0, False, True, {}, {"foo"}, {"foo": "bar"}]
)
def test_import_optional_dependency_invalid(bad_import):
with pytest.raises(ValueError, match="Invalid module name"):
import_optional_dependency(bad_import)

View File

@@ -0,0 +1,47 @@
import geopandas
import pytest
def test_options():
assert "display_precision: " in repr(geopandas.options)
assert set(dir(geopandas.options)) == {
"display_precision",
"use_pygeos",
"io_engine",
}
with pytest.raises(AttributeError):
geopandas.options.non_existing_option
with pytest.raises(AttributeError):
geopandas.options.non_existing_option = 10
def test_options_display_precision():
assert geopandas.options.display_precision is None
geopandas.options.display_precision = 5
assert geopandas.options.display_precision == 5
with pytest.raises(ValueError):
geopandas.options.display_precision = "abc"
with pytest.raises(ValueError):
geopandas.options.display_precision = -1
geopandas.options.display_precision = None
def test_options_io_engine():
assert geopandas.options.io_engine is None
geopandas.options.io_engine = "pyogrio"
assert geopandas.options.io_engine == "pyogrio"
with pytest.raises(ValueError):
geopandas.options.io_engine = "abc"
with pytest.raises(ValueError):
geopandas.options.io_engine = -1
geopandas.options.io_engine = None

View File

@@ -0,0 +1,733 @@
import random
import numpy as np
import pandas as pd
import pyproj
import pytest
from shapely.geometry import Point, Polygon, LineString
from geopandas import GeoSeries, GeoDataFrame, points_from_xy, datasets, read_file
from geopandas.array import from_shapely, from_wkb, from_wkt, GeometryArray
from geopandas.testing import assert_geodataframe_equal
def _create_df(x, y=None, crs=None):
y = y or x
x = np.asarray(x)
y = np.asarray(y)
return GeoDataFrame(
{"geometry": points_from_xy(x, y), "value1": x + y, "value2": x * y}, crs=crs
)
def df_epsg26918():
# EPSG:26918
# Center coordinates
# -1683723.64 6689139.23
return _create_df(
x=range(-1683723, -1683723 + 10, 1),
y=range(6689139, 6689139 + 10, 1),
crs="epsg:26918",
)
def test_to_crs_transform():
df = df_epsg26918()
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_transform__missing_data():
# https://github.com/geopandas/geopandas/issues/1573
df = df_epsg26918()
df.loc[3, "geometry"] = None
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_transform__empty_data():
df = df_epsg26918().iloc[:0]
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_inplace():
df = df_epsg26918()
lonlat = df.to_crs(epsg=4326)
df.to_crs(epsg=4326, inplace=True)
assert_geodataframe_equal(df, lonlat, check_less_precise=True)
def test_to_crs_geo_column_name():
# Test to_crs() with different geometry column name (GH#339)
df = df_epsg26918()
df = df.rename(columns={"geometry": "geom"})
df.set_geometry("geom", inplace=True)
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert lonlat.geometry.name == "geom"
assert utm.geometry.name == "geom"
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_dimension_z():
# preserve z dimension
arr = points_from_xy([1, 2], [2, 3], [3, 4], crs=4326)
assert arr.has_z.all()
result = arr.to_crs(epsg=3857)
assert result.has_z.all()
def test_to_crs_dimension_mixed():
s = GeoSeries([Point(1, 2), LineString([(1, 2, 3), (4, 5, 6)])], crs=2056)
result = s.to_crs(epsg=4326)
assert not result[0].is_empty
assert result.has_z.tolist() == [False, True]
roundtrip = result.to_crs(epsg=2056)
# TODO replace with assert_geoseries_equal once we expose tolerance keyword
# assert_geoseries_equal(roundtrip, s, check_less_precise=True)
for a, b in zip(roundtrip, s):
np.testing.assert_allclose(a.coords[:], b.coords[:], atol=0.01)
# -----------------------------------------------------------------------------
# Test different supported formats for CRS specification
@pytest.fixture(
params=[
4326,
"epsg:4326",
pytest.param(
{"init": "epsg:4326"},
),
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs",
{"proj": "latlong", "ellps": "WGS84", "datum": "WGS84", "no_defs": True},
],
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
)
def epsg4326(request):
if isinstance(request.param, int):
return {"epsg": request.param}
return {"crs": request.param}
@pytest.fixture(
params=[
26918,
"epsg:26918",
pytest.param(
{"init": "epsg:26918", "no_defs": True},
),
"+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ",
{"proj": "utm", "zone": 18, "datum": "NAD83", "units": "m", "no_defs": True},
],
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
)
def epsg26918(request):
if isinstance(request.param, int):
return {"epsg": request.param}
return {"crs": request.param}
@pytest.mark.filterwarnings("ignore:'\\+init:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'\\+init:FutureWarning")
def test_transform2(epsg4326, epsg26918):
# with PROJ >= 7, the transformation using EPSG code vs proj4 string is
# slightly different due to use of grid files or not -> turn off network
# to not use grid files at all for this test
pyproj.network.set_network_enabled(False)
df = df_epsg26918()
lonlat = df.to_crs(**epsg4326)
utm = lonlat.to_crs(**epsg26918)
# can't check for CRS equality, as the formats differ although representing
# the same CRS
assert_geodataframe_equal(df, utm, check_less_precise=True, check_crs=False)
def test_crs_axis_order__always_xy():
df = GeoDataFrame(geometry=[Point(-1683723, 6689139)], crs="epsg:26918")
lonlat = df.to_crs("epsg:4326")
test_lonlat = GeoDataFrame(
geometry=[Point(-110.1399901, 55.1350011)], crs="epsg:4326"
)
assert_geodataframe_equal(lonlat, test_lonlat, check_less_precise=True)
def test_skip_exact_same():
df = df_epsg26918()
utm = df.to_crs(df.crs)
assert_geodataframe_equal(df, utm, check_less_precise=True)
# Test CRS on GeometryArray level
class TestGeometryArrayCRS:
def setup_method(self):
self.osgb = pyproj.CRS(27700)
self.wgs = pyproj.CRS(4326)
self.geoms = [Point(0, 0), Point(1, 1)]
self.polys = [
Polygon([(random.random(), random.random()) for i in range(3)])
for _ in range(10)
]
self.arr = from_shapely(self.polys, crs=27700)
def test_array(self):
arr = from_shapely(self.geoms)
arr.crs = 27700
assert arr.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
assert arr.crs == self.osgb
arr = GeometryArray(arr)
assert arr.crs == self.osgb
arr = GeometryArray(arr, crs=4326)
assert arr.crs == self.wgs
def test_series(self):
s = GeoSeries(crs=27700)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
# manually change CRS
s.crs = 4326
assert s.crs == self.wgs
assert s.values.crs == self.wgs
s = GeoSeries(self.geoms, crs=27700)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(arr)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
with pytest.raises(
ValueError,
match="CRS mismatch between CRS of the passed geometries and 'crs'",
):
s = GeoSeries(arr, crs=4326)
assert s.crs == self.osgb
def test_dataframe(self):
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame(geometry=arr)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame(geometry=s)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# different passed CRS than array CRS is now an error
match_str = "CRS mismatch between CRS of the passed geometries and 'crs'"
with pytest.raises(ValueError, match=match_str):
df = GeoDataFrame(geometry=s, crs=4326)
with pytest.raises(ValueError, match=match_str):
GeoDataFrame(geometry=s, crs=4326)
with pytest.raises(ValueError, match=match_str):
GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326)
with pytest.raises(ValueError, match=match_str):
GeoDataFrame(df, crs=4326).crs
# manually change CRS
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame(geometry=s)
df.crs = 4326
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
GeoDataFrame(self.geoms, columns=["geom"], crs=27700)
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
GeoDataFrame(crs=27700)
df = GeoDataFrame(self.geoms, columns=["geom"])
df = df.set_geometry("geom", crs=27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
assert df.geom.crs == self.osgb
assert df.geom.values.crs == self.osgb
df = GeoDataFrame(geometry=self.geoms, crs=27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# new geometry with set CRS has priority over GDF CRS
df = GeoDataFrame(geometry=self.geoms, crs=27700)
df = df.set_geometry(self.geoms, crs=4326)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame()
df = df.set_geometry(s)
assert df._geometry_column_name == "geometry"
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame()
df = df.set_geometry(arr)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
arr = from_shapely(self.geoms, crs=4326)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr})
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
# geometry column name None on init
df = GeoDataFrame({"geometry": [0, 1]})
with pytest.raises(
ValueError,
match="Assigning CRS to a GeoDataFrame without a geometry",
):
df.crs = 27700
# geometry column without geometry
df = GeoDataFrame({"geometry": [Point(0, 1)]}).assign(geometry=[0])
with pytest.raises(
ValueError,
match="Assigning CRS to a GeoDataFrame without an active geometry",
):
df.crs = 27700
with pytest.raises(
AttributeError,
match="The CRS attribute of a GeoDataFrame without an active",
):
assert df.crs == self.osgb
def test_dataframe_getitem_without_geometry_column(self):
df = GeoDataFrame({"col": range(10)}, geometry=self.arr)
df["geom2"] = df.geometry.centroid
subset = df[["col", "geom2"]]
with pytest.raises(
AttributeError,
match="The CRS attribute of a GeoDataFrame without an active",
):
assert subset.crs == self.osgb
def test_dataframe_setitem(self):
# new geometry CRS has priority over GDF CRS
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame()
with pytest.warns(
FutureWarning, match="You are adding a column named 'geometry'"
):
df["geometry"] = s
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame()
with pytest.warns(
FutureWarning, match="You are adding a column named 'geometry'"
):
df["geometry"] = arr
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# test to_crs case (GH1960)
arr = from_shapely(self.geoms)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
df["geometry"] = df["geometry"].to_crs(27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# test changing geometry crs not in the geometry column doesn't change the crs
arr = from_shapely(self.geoms)
df = GeoDataFrame(
{"col1": [1, 2], "geometry": arr, "other_geom": arr}, crs=4326
)
df["other_geom"] = from_shapely(self.geoms, crs=27700)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df["geometry"].crs == self.wgs
assert df["other_geom"].crs == self.osgb
def test_dataframe_setitem_without_geometry_column(self):
arr = from_shapely(self.geoms)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
# override geometry with non geometry
with pytest.warns(UserWarning):
df["geometry"] = 1
# assigning a list of geometry object doesn't have cached access to 4326
df["geometry"] = self.geoms
assert df.crs is None
@pytest.mark.parametrize(
"scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])]
)
def test_scalar(self, scalar):
df = GeoDataFrame()
with pytest.warns(
FutureWarning, match="You are adding a column named 'geometry'"
):
df["geometry"] = scalar
df.crs = 4326
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
@pytest.mark.filterwarnings("ignore:Accessing CRS")
def test_crs_with_no_geom_fails(self):
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
df = GeoDataFrame()
df.crs = 4326
def test_read_file(self):
nybb_filename = datasets.get_path("nybb")
df = read_file(nybb_filename)
assert df.crs == pyproj.CRS(2263)
assert df.geometry.crs == pyproj.CRS(2263)
assert df.geometry.values.crs == pyproj.CRS(2263)
def test_multiple_geoms(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
assert df.col1.crs == self.wgs
assert df.col1.values.crs == self.wgs
def test_multiple_geoms_set_geom(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
df = df.set_geometry("col1")
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
assert df["geometry"].crs == self.osgb
assert df["geometry"].values.crs == self.osgb
def test_assign_cols(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
df["geom2"] = s
df["geom3"] = s.values
df["geom4"] = from_shapely(self.geoms)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
assert df.geom2.crs == self.wgs
assert df.geom2.values.crs == self.wgs
assert df.geom3.crs == self.wgs
assert df.geom3.values.crs == self.wgs
assert df.geom4.crs is None
assert df.geom4.values.crs is None
def test_copy(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
arr_copy = arr.copy()
assert arr_copy.crs == arr.crs
s_copy = s.copy()
assert s_copy.crs == s.crs
assert s_copy.values.crs == s.values.crs
df_copy = df.copy()
assert df_copy.crs == df.crs
assert df_copy.geometry.crs == df.geometry.crs
assert df_copy.geometry.values.crs == df.geometry.values.crs
assert df_copy.col1.crs == df.col1.crs
assert df_copy.col1.values.crs == df.col1.values.crs
def test_rename(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
df = df.rename(columns={"geometry": "geom"}).set_geometry("geom")
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
df = df.rename_geometry("geom2")
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
df = df.rename(columns={"col1": "column1"})
assert df.column1.crs == self.wgs
assert df.column1.values.crs == self.wgs
def test_geoseries_to_crs(self):
s = GeoSeries(self.geoms, crs=27700)
s = s.to_crs(4326)
assert s.crs == self.wgs
assert s.values.crs == self.wgs
df = GeoDataFrame(geometry=s)
assert df.crs == self.wgs
df = df.to_crs(27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# make sure that only active geometry is transformed
arr = from_shapely(self.geoms, crs=4326)
df["col1"] = arr
df = df.to_crs(3857)
assert df.col1.crs == self.wgs
assert df.col1.values.crs == self.wgs
def test_array_to_crs(self):
arr = from_shapely(self.geoms, crs=27700)
arr = arr.to_crs(4326)
assert arr.crs == self.wgs
def test_from_shapely(self):
arr = from_shapely(self.geoms, crs=27700)
assert arr.crs == self.osgb
def test_from_wkb(self):
L_wkb = [p.wkb for p in self.geoms]
arr = from_wkb(L_wkb, crs=27700)
assert arr.crs == self.osgb
def test_from_wkt(self):
L_wkt = [p.wkt for p in self.geoms]
arr = from_wkt(L_wkt, crs=27700)
assert arr.crs == self.osgb
def test_points_from_xy(self):
df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)])
arr = points_from_xy(df["x"], df["y"], crs=27700)
assert arr.crs == self.osgb
# setting CRS in GeoSeries should not set it in passed array without CRS
def test_original(self):
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
assert arr.crs is None
assert s.crs == self.osgb
def test_ops(self):
arr = self.arr
bound = arr.boundary
assert bound.crs == self.osgb
cent = arr.centroid
assert cent.crs == self.osgb
hull = arr.convex_hull
assert hull.crs == self.osgb
envelope = arr.envelope
assert envelope.crs == self.osgb
exterior = arr.exterior
assert exterior.crs == self.osgb
representative_point = arr.representative_point()
assert representative_point.crs == self.osgb
def test_binary_ops(self):
arr = self.arr
quads = []
while len(quads) < 10:
geom = Polygon([(random.random(), random.random()) for i in range(4)])
if geom.is_valid:
quads.append(geom)
arr2 = from_shapely(quads, crs=27700)
difference = arr.difference(arr2)
assert difference.crs == self.osgb
intersection = arr.intersection(arr2)
assert intersection.crs == self.osgb
symmetric_difference = arr.symmetric_difference(arr2)
assert symmetric_difference.crs == self.osgb
union = arr.union(arr2)
assert union.crs == self.osgb
def test_other(self):
arr = self.arr
buffer = arr.buffer(5)
assert buffer.crs == self.osgb
interpolate = arr.exterior.interpolate(0.1)
assert interpolate.crs == self.osgb
simplify = arr.simplify(5)
assert simplify.crs == self.osgb
@pytest.mark.parametrize(
"attr, arg",
[
("affine_transform", ([0, 1, 1, 0, 0, 0],)),
("translate", ()),
("rotate", (10,)),
("scale", ()),
("skew", ()),
],
)
def test_affinity_methods(self, attr, arg):
result = getattr(self.arr, attr)(*arg)
assert result.crs == self.osgb
def test_slice(self):
s = GeoSeries(self.arr, crs=27700)
assert s.iloc[1:].values.crs == self.osgb
df = GeoDataFrame({"col1": self.arr}, geometry=s)
assert df.iloc[1:].geometry.values.crs == self.osgb
assert df.iloc[1:].col1.values.crs == self.osgb
def test_concat(self):
s = GeoSeries(self.arr, crs=27700)
assert pd.concat([s, s]).values.crs == self.osgb
df = GeoDataFrame({"col1": from_shapely(self.geoms, crs=4326)}, geometry=s)
assert pd.concat([df, df]).geometry.values.crs == self.osgb
assert pd.concat([df, df]).col1.values.crs == self.wgs
def test_merge(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame({"col1": s}, geometry=arr)
df2 = GeoDataFrame({"col2": s}, geometry=arr).rename_geometry("geom")
merged = df.merge(df2, left_index=True, right_index=True)
assert merged.col1.values.crs == self.wgs
assert merged.geometry.values.crs == self.osgb
assert merged.col2.values.crs == self.wgs
assert merged.geom.values.crs == self.osgb
assert merged.crs == self.osgb
# make sure that geometry column from list has CRS (__setitem__)
def test_setitem_geometry(self):
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
df["geometry"] = list(df.geometry)
assert df.geometry.values.crs == self.osgb
df2 = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
df2["geometry"] = from_shapely(self.geoms, crs=4326)
assert df2.geometry.values.crs == self.wgs
def test_astype(self):
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
df2 = df.astype({"col1": str})
assert df2.crs == self.osgb
def test_apply(self):
s = GeoSeries(self.arr)
assert s.crs == 27700
# apply preserves the CRS if the result is a GeoSeries
result = s.apply(lambda x: x.centroid)
assert result.crs == 27700
def test_apply_geodataframe(self):
df = GeoDataFrame({"col1": [0, 1]}, geometry=self.geoms, crs=27700)
assert df.crs == 27700
# apply preserves the CRS if the result is a GeoDataFrame
result = df.apply(lambda col: col, axis=0)
assert result.crs == 27700
result = df.apply(lambda row: row, axis=1)
assert result.crs == 27700
class TestSetCRS:
@pytest.mark.parametrize(
"constructor",
[
lambda geoms, crs: GeoSeries(geoms, crs=crs),
lambda geoms, crs: GeoDataFrame(geometry=geoms, crs=crs),
],
ids=["geoseries", "geodataframe"],
)
def test_set_crs(self, constructor):
naive = constructor([Point(0, 0), Point(1, 1)], crs=None)
assert naive.crs is None
# by default returns a copy
result = naive.set_crs(crs="EPSG:4326")
assert result.crs == "EPSG:4326"
assert naive.crs is None
result = naive.set_crs(epsg=4326)
assert result.crs == "EPSG:4326"
assert naive.crs is None
# with inplace=True
result = naive.set_crs(crs="EPSG:4326", inplace=True)
assert result is naive
assert result.crs == naive.crs == "EPSG:4326"
# raise for non-naive when crs would be overridden
non_naive = constructor([Point(0, 0), Point(1, 1)], crs="EPSG:4326")
assert non_naive.crs == "EPSG:4326"
with pytest.raises(ValueError, match="already has a CRS"):
non_naive.set_crs("EPSG:3857")
# allow for equal crs
result = non_naive.set_crs("EPSG:4326")
assert result.crs == "EPSG:4326"
# replace with allow_override=True
result = non_naive.set_crs("EPSG:3857", allow_override=True)
assert non_naive.crs == "EPSG:4326"
assert result.crs == "EPSG:3857"
result = non_naive.set_crs("EPSG:3857", allow_override=True, inplace=True)
assert non_naive.crs == "EPSG:3857"
assert result.crs == "EPSG:3857"
# raise error when no crs is passed
with pytest.raises(ValueError):
naive.set_crs(crs=None, epsg=None)

View File

@@ -0,0 +1,12 @@
from geopandas import GeoDataFrame, read_file
from geopandas.datasets import get_path
import pytest
@pytest.mark.parametrize(
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"]
)
def test_read_paths(test_dataset):
with pytest.warns(FutureWarning, match="The geopandas.dataset module is"):
assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)

View File

@@ -0,0 +1,91 @@
from textwrap import dedent
from geopandas._decorator import doc
@doc(method="cumsum", operation="sum")
def cumsum(whatever):
"""
This is the {method} method.
It computes the cumulative {operation}.
"""
...
@doc(
cumsum,
dedent(
"""
Examples
--------
>>> cumavg([1, 2, 3])
2
"""
),
method="cumavg",
operation="average",
)
def cumavg(whatever):
...
@doc(cumsum, method="cummax", operation="maximum")
def cummax(whatever):
...
@doc(cummax, method="cummin", operation="minimum")
def cummin(whatever):
...
def test_docstring_formatting():
docstr = dedent(
"""
This is the cumsum method.
It computes the cumulative sum.
"""
)
assert cumsum.__doc__ == docstr
def test_docstring_appending():
docstr = dedent(
"""
This is the cumavg method.
It computes the cumulative average.
Examples
--------
>>> cumavg([1, 2, 3])
2
"""
)
assert cumavg.__doc__ == docstr
def test_doc_template_from_func():
docstr = dedent(
"""
This is the cummax method.
It computes the cumulative maximum.
"""
)
assert cummax.__doc__ == docstr
def test_inherit_doc_template():
docstr = dedent(
"""
This is the cummin method.
It computes the cumulative minimum.
"""
)
assert cummin.__doc__ == docstr

View File

@@ -0,0 +1,350 @@
import warnings
import numpy as np
import pandas as pd
import geopandas
from geopandas import GeoDataFrame, read_file
from pandas.testing import assert_frame_equal
import pytest
from geopandas._compat import PANDAS_GE_15, PANDAS_GE_20
from geopandas.testing import assert_geodataframe_equal, geom_almost_equals
@pytest.fixture
def nybb_polydf():
nybb_filename = geopandas.datasets.get_path("nybb")
nybb_polydf = read_file(nybb_filename)
nybb_polydf = nybb_polydf[["geometry", "BoroName", "BoroCode"]]
nybb_polydf = nybb_polydf.rename(columns={"geometry": "myshapes"})
nybb_polydf = nybb_polydf.set_geometry("myshapes")
nybb_polydf["manhattan_bronx"] = 5
nybb_polydf.loc[3:4, "manhattan_bronx"] = 6
nybb_polydf["BoroCode"] = nybb_polydf["BoroCode"].astype("int64")
return nybb_polydf
@pytest.fixture
def merged_shapes(nybb_polydf):
# Merged geometry
manhattan_bronx = nybb_polydf.loc[3:4]
others = nybb_polydf.loc[0:2]
collapsed = [others.geometry.unary_union, manhattan_bronx.geometry.unary_union]
merged_shapes = GeoDataFrame(
{"myshapes": collapsed},
geometry="myshapes",
index=pd.Index([5, 6], name="manhattan_bronx"),
crs=nybb_polydf.crs,
)
return merged_shapes
@pytest.fixture
def first(merged_shapes):
first = merged_shapes.copy()
first["BoroName"] = ["Staten Island", "Manhattan"]
first["BoroCode"] = [5, 1]
return first
@pytest.fixture
def expected_mean(merged_shapes):
test_mean = merged_shapes.copy()
test_mean["BoroCode"] = [4, 1.5]
return test_mean
def test_geom_dissolve(nybb_polydf, first):
test = nybb_polydf.dissolve("manhattan_bronx")
assert test.geometry.name == "myshapes"
assert geom_almost_equals(test, first)
def test_dissolve_retains_existing_crs(nybb_polydf):
assert nybb_polydf.crs is not None
test = nybb_polydf.dissolve("manhattan_bronx")
assert test.crs is not None
def test_dissolve_retains_nonexisting_crs(nybb_polydf):
nybb_polydf.crs = None
test = nybb_polydf.dissolve("manhattan_bronx")
assert test.crs is None
def test_first_dissolve(nybb_polydf, first):
test = nybb_polydf.dissolve("manhattan_bronx")
assert_frame_equal(first, test, check_column_type=False)
def test_mean_dissolve(nybb_polydf, first, expected_mean):
if not PANDAS_GE_15:
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
elif PANDAS_GE_15 and not PANDAS_GE_20:
with pytest.warns(FutureWarning, match=".*used in dissolve is deprecated.*"):
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
else: # pandas 2.0
test = nybb_polydf.dissolve(
"manhattan_bronx", aggfunc="mean", numeric_only=True
)
# for non pandas "mean", numeric only cannot be applied. Drop columns manually
test2 = nybb_polydf.drop(columns=["BoroName"]).dissolve(
"manhattan_bronx", aggfunc=np.mean
)
assert_frame_equal(expected_mean, test, check_column_type=False)
assert_frame_equal(expected_mean, test2, check_column_type=False)
@pytest.mark.skipif(not PANDAS_GE_15 or PANDAS_GE_20, reason="warning for pandas 1.5.x")
def test_mean_dissolve_warning_capture(nybb_polydf, first, expected_mean):
with pytest.warns(
FutureWarning,
match=".*used in dissolve is deprecated.*",
):
nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
# test no warning for aggfunc first which doesn't have numeric only semantics
with warnings.catch_warnings():
warnings.simplefilter("error")
nybb_polydf.dissolve("manhattan_bronx", aggfunc="first")
def test_dissolve_emits_other_warnings(nybb_polydf):
# we only do something special for pandas 1.5.x, but expect this
# test to be true on any version
def sum_and_warn(group):
warnings.warn("foo") # noqa: B028
if PANDAS_GE_20:
return group.sum(numeric_only=False)
else:
return group.sum()
with pytest.warns(UserWarning, match="foo"):
nybb_polydf.dissolve("manhattan_bronx", aggfunc=sum_and_warn)
def test_multicolumn_dissolve(nybb_polydf, first):
multi = nybb_polydf.copy()
multi["dup_col"] = multi.manhattan_bronx
multi_test = multi.dissolve(["manhattan_bronx", "dup_col"], aggfunc="first")
first_copy = first.copy()
first_copy["dup_col"] = first_copy.index
first_copy = first_copy.set_index([first_copy.index, "dup_col"])
assert_frame_equal(multi_test, first_copy, check_column_type=False)
def test_reset_index(nybb_polydf, first):
test = nybb_polydf.dissolve("manhattan_bronx", as_index=False)
comparison = first.reset_index()
assert_frame_equal(comparison, test, check_column_type=False)
def test_dissolve_none(nybb_polydf):
test = nybb_polydf.dissolve(by=None)
expected = GeoDataFrame(
{
nybb_polydf.geometry.name: [nybb_polydf.geometry.unary_union],
"BoroName": ["Staten Island"],
"BoroCode": [5],
"manhattan_bronx": [5],
},
geometry=nybb_polydf.geometry.name,
crs=nybb_polydf.crs,
)
assert_frame_equal(expected, test, check_column_type=False)
def test_dissolve_none_mean(nybb_polydf):
test = nybb_polydf.dissolve(aggfunc="mean", numeric_only=True)
expected = GeoDataFrame(
{
nybb_polydf.geometry.name: [nybb_polydf.geometry.unary_union],
"BoroCode": [3.0],
"manhattan_bronx": [5.4],
},
geometry=nybb_polydf.geometry.name,
crs=nybb_polydf.crs,
)
assert_frame_equal(expected, test, check_column_type=False)
def test_dissolve_level():
gdf = geopandas.GeoDataFrame(
{
"a": [1, 1, 2, 2],
"b": [3, 4, 4, 4],
"c": [3, 4, 5, 6],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
),
}
).set_index(["a", "b", "c"])
expected_a = geopandas.GeoDataFrame(
{
"a": [1, 2],
"geometry": geopandas.array.from_wkt(
["MULTIPOINT (0 0, 1 1)", "MULTIPOINT (2 2, 3 3)"]
),
}
).set_index("a")
expected_b = geopandas.GeoDataFrame(
{
"b": [3, 4],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "MULTIPOINT (1 1, 2 2, 3 3)"]
),
}
).set_index("b")
expected_ab = geopandas.GeoDataFrame(
{
"a": [1, 1, 2],
"b": [3, 4, 4],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "MULTIPOINT (2 2, 3 3)"]
),
}
).set_index(["a", "b"])
assert_frame_equal(expected_a, gdf.dissolve(level=0))
assert_frame_equal(expected_a, gdf.dissolve(level="a"))
assert_frame_equal(expected_b, gdf.dissolve(level=1))
assert_frame_equal(expected_b, gdf.dissolve(level="b"))
assert_frame_equal(expected_ab, gdf.dissolve(level=[0, 1]))
assert_frame_equal(expected_ab, gdf.dissolve(level=["a", "b"]))
def test_dissolve_sort():
gdf = geopandas.GeoDataFrame(
{
"a": [2, 1, 1],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
),
}
)
expected_unsorted = geopandas.GeoDataFrame(
{
"a": [2, 1],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "MULTIPOINT (1 1, 2 2)"]
),
}
).set_index("a")
expected_sorted = expected_unsorted.sort_index()
assert_frame_equal(expected_sorted, gdf.dissolve("a"))
assert_frame_equal(expected_unsorted, gdf.dissolve("a", sort=False))
def test_dissolve_categorical():
gdf = geopandas.GeoDataFrame(
{
"cat": pd.Categorical(["a", "a", "b", "b"]),
"noncat": [1, 1, 1, 2],
"to_agg": [1, 2, 3, 4],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
),
}
)
# when observed=False we get an additional observation
# that wasn't in the original data
expected_gdf_observed_false = geopandas.GeoDataFrame(
{
"cat": pd.Categorical(["a", "a", "b", "b"]),
"noncat": [1, 2, 1, 2],
"geometry": geopandas.array.from_wkt(
[
"MULTIPOINT (0 0, 1 1)",
None,
"POINT (2 2)",
"POINT (3 3)",
]
),
"to_agg": [1, None, 3, 4],
}
).set_index(["cat", "noncat"])
# when observed=True we do not get any additional observations
expected_gdf_observed_true = geopandas.GeoDataFrame(
{
"cat": pd.Categorical(["a", "b", "b"]),
"noncat": [1, 1, 2],
"geometry": geopandas.array.from_wkt(
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)", "POINT (3 3)"]
),
"to_agg": [1, 3, 4],
}
).set_index(["cat", "noncat"])
assert_frame_equal(expected_gdf_observed_false, gdf.dissolve(["cat", "noncat"]))
assert_frame_equal(
expected_gdf_observed_true, gdf.dissolve(["cat", "noncat"], observed=True)
)
def test_dissolve_dropna():
gdf = geopandas.GeoDataFrame(
{
"a": [1, 1, None],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
),
}
)
expected_with_na = geopandas.GeoDataFrame(
{
"a": [1.0, np.nan],
"geometry": geopandas.array.from_wkt(
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)"]
),
}
).set_index("a")
expected_no_na = geopandas.GeoDataFrame(
{
"a": [1.0],
"geometry": geopandas.array.from_wkt(["MULTIPOINT (0 0, 1 1)"]),
}
).set_index("a")
assert_frame_equal(expected_with_na, gdf.dissolve("a", dropna=False))
assert_frame_equal(expected_no_na, gdf.dissolve("a"))
def test_dissolve_dropna_warn(nybb_polydf):
# No warning with default params
with warnings.catch_warnings(record=True) as record:
nybb_polydf.dissolve()
for r in record:
assert "dropna kwarg is not supported" not in str(r.message)
def test_dissolve_multi_agg(nybb_polydf, merged_shapes):
merged_shapes[("BoroCode", "min")] = [3, 1]
merged_shapes[("BoroCode", "max")] = [5, 2]
merged_shapes[("BoroName", "count")] = [3, 2]
with warnings.catch_warnings(record=True) as record:
test = nybb_polydf.dissolve(
by="manhattan_bronx",
aggfunc={
"BoroCode": ["min", "max"],
"BoroName": "count",
},
)
assert_geodataframe_equal(test, merged_shapes)
assert len(record) == 0

View File

@@ -0,0 +1,942 @@
import geopandas as gpd
import numpy as np
import pandas as pd
import pytest
from packaging.version import Version
folium = pytest.importorskip("folium")
branca = pytest.importorskip("branca")
matplotlib = pytest.importorskip("matplotlib")
mapclassify = pytest.importorskip("mapclassify")
geodatasets = pytest.importorskip("geodatasets")
from matplotlib import cm
from matplotlib import colors
from branca.colormap import StepColormap
BRANCA_05 = Version(branca.__version__) > Version("0.4.2")
FOLIUM_G_014 = Version(folium.__version__) > Version("0.14.0")
class TestExplore:
def setup_method(self):
self.nybb = gpd.read_file(gpd.datasets.get_path("nybb"))
self.world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
self.cities = gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
self.chicago = gpd.read_file(geodatasets.get_path("geoda.chicago_commpop"))
self.world["range"] = range(len(self.world))
self.missing = self.world.copy()
np.random.seed(42)
self.missing.loc[np.random.choice(self.missing.index, 40), "continent"] = np.nan
self.missing.loc[np.random.choice(self.missing.index, 40), "pop_est"] = np.nan
def _fetch_map_string(self, m):
out = m._parent.render()
out_str = "".join(out.split())
return out_str
def test_simple_pass(self):
"""Make sure default pass"""
self.nybb.explore()
self.world.explore()
self.cities.explore()
self.world.geometry.explore()
def test_choropleth_pass(self):
"""Make sure default choropleth pass"""
self.world.explore(column="pop_est")
def test_map_settings_default(self):
"""Check default map settings"""
m = self.world.explore()
assert m.location == [
pytest.approx(-3.1774349999999956, rel=1e-6),
pytest.approx(2.842170943040401e-14, rel=1e-6),
]
assert m.options["zoom"] == 10
assert m.options["zoomControl"] is True
assert m.position == "relative"
assert m.height == (100.0, "%")
assert m.width == (100.0, "%")
assert m.left == (0, "%")
assert m.top == (0, "%")
assert m.global_switches.no_touch is False
assert m.global_switches.disable_3d is False
assert "openstreetmap" in m.to_dict()["children"].keys()
def test_map_settings_custom(self):
"""Check custom map settings"""
m = self.nybb.explore(
zoom_control=False,
width=200,
height=200,
)
assert m.location == [
pytest.approx(40.70582377450201, rel=1e-6),
pytest.approx(-73.9778006856748, rel=1e-6),
]
assert m.options["zoom"] == 10
assert m.options["zoomControl"] is False
assert m.height == (200.0, "px")
assert m.width == (200.0, "px")
# custom XYZ tiles
m = self.nybb.explore(
zoom_control=False,
width=200,
height=200,
tiles="https://mt1.google.com/vt/lyrs=m&x={x}&y={y}&z={z}",
attr="Google",
)
out_str = self._fetch_map_string(m)
s = '"https://mt1.google.com/vt/lyrs=m\\u0026x={x}\\u0026y={y}\\u0026z={z}"'
assert s in out_str
assert '"attribution":"Google"' in out_str
m = self.nybb.explore(location=(40, 5))
assert m.location == [40, 5]
assert m.options["zoom"] == 10
m = self.nybb.explore(zoom_start=8)
assert m.location == [
pytest.approx(40.70582377450201, rel=1e-6),
pytest.approx(-73.9778006856748, rel=1e-6),
]
assert m.options["zoom"] == 8
m = self.nybb.explore(location=(40, 5), zoom_start=8)
assert m.location == [40, 5]
assert m.options["zoom"] == 8
def test_simple_color(self):
"""Check color settings"""
# single named color
m = self.nybb.explore(color="red")
out_str = self._fetch_map_string(m)
assert '"fillColor":"red"' in out_str
# list of colors
colors = ["#333333", "#367324", "#95824f", "#fcaa00", "#ffcc33"]
m2 = self.nybb.explore(color=colors)
out_str = self._fetch_map_string(m2)
for c in colors:
assert f'"fillColor":"{c}"' in out_str
# column of colors
df = self.nybb.copy()
df["colors"] = colors
m3 = df.explore(color="colors")
out_str = self._fetch_map_string(m3)
for c in colors:
assert f'"fillColor":"{c}"' in out_str
# line GeoSeries
m4 = self.nybb.boundary.explore(color="red")
out_str = self._fetch_map_string(m4)
assert '"fillColor":"red"' in out_str
def test_choropleth_linear(self):
"""Check choropleth colors"""
# default cmap
m = self.nybb.explore(column="Shape_Leng")
out_str = self._fetch_map_string(m)
assert 'color":"#440154"' in out_str
assert 'color":"#fde725"' in out_str
assert 'color":"#50c46a"' in out_str
assert 'color":"#481467"' in out_str
assert 'color":"#3d4e8a"' in out_str
# named cmap
m = self.nybb.explore(column="Shape_Leng", cmap="PuRd")
out_str = self._fetch_map_string(m)
assert 'color":"#f7f4f9"' in out_str
assert 'color":"#67001f"' in out_str
assert 'color":"#d31760"' in out_str
assert 'color":"#f0ecf5"' in out_str
assert 'color":"#d6bedc"' in out_str
def test_choropleth_mapclassify(self):
"""Mapclassify bins"""
# quantiles
m = self.nybb.explore(column="Shape_Leng", scheme="quantiles")
out_str = self._fetch_map_string(m)
assert 'color":"#21918c"' in out_str
assert 'color":"#3b528b"' in out_str
assert 'color":"#5ec962"' in out_str
assert 'color":"#fde725"' in out_str
assert 'color":"#440154"' in out_str
# headtail
m = self.world.explore(column="pop_est", scheme="headtailbreaks")
out_str = self._fetch_map_string(m)
assert '"fillColor":"#3b528b"' in out_str
assert '"fillColor":"#21918c"' in out_str
assert '"fillColor":"#5ec962"' in out_str
assert '"fillColor":"#fde725"' in out_str
assert '"fillColor":"#440154"' in out_str
# custom k
m = self.world.explore(column="pop_est", scheme="naturalbreaks", k=3)
out_str = self._fetch_map_string(m)
assert '"fillColor":"#21918c"' in out_str
assert '"fillColor":"#fde725"' in out_str
assert '"fillColor":"#440154"' in out_str
# UserDefined overriding default k
m = self.chicago.explore(
column="POP2010",
scheme="UserDefined",
classification_kwds={"bins": [25000, 50000, 75000, 100000]},
)
out_str = self._fetch_map_string(m)
assert '"fillColor":"#fde725"' in out_str
assert '"fillColor":"#35b779"' in out_str
assert '"fillColor":"#31688e"' in out_str
assert '"fillColor":"#440154"' in out_str
def test_categorical(self):
"""Categorical maps"""
# auto detection
m = self.world.explore(column="continent")
out_str = self._fetch_map_string(m)
assert 'color":"#9467bd","continent":"Europe"' in out_str
assert 'color":"#c49c94","continent":"NorthAmerica"' in out_str
assert 'color":"#1f77b4","continent":"Africa"' in out_str
assert 'color":"#98df8a","continent":"Asia"' in out_str
assert 'color":"#ff7f0e","continent":"Antarctica"' in out_str
assert 'color":"#9edae5","continent":"SouthAmerica"' in out_str
assert 'color":"#7f7f7f","continent":"Oceania"' in out_str
assert 'color":"#dbdb8d","continent":"Sevenseas(openocean)"' in out_str
# forced categorical
m = self.nybb.explore(column="BoroCode", categorical=True)
out_str = self._fetch_map_string(m)
assert 'color":"#9edae5"' in out_str
assert 'color":"#c7c7c7"' in out_str
assert 'color":"#8c564b"' in out_str
assert 'color":"#1f77b4"' in out_str
assert 'color":"#98df8a"' in out_str
# pandas.Categorical
df = self.world.copy()
df["categorical"] = pd.Categorical(df["name"])
m = df.explore(column="categorical")
out_str = self._fetch_map_string(m)
for c in np.apply_along_axis(colors.to_hex, 1, cm.tab20(range(20))):
assert f'"fillColor":"{c}"' in out_str
# custom cmap
m = self.nybb.explore(column="BoroName", cmap="Set1")
out_str = self._fetch_map_string(m)
assert 'color":"#999999"' in out_str
assert 'color":"#a65628"' in out_str
assert 'color":"#4daf4a"' in out_str
assert 'color":"#e41a1c"' in out_str
assert 'color":"#ff7f00"' in out_str
# custom list of colors
cmap = ["#333432", "#3b6e8c", "#bc5b4f", "#8fa37e", "#efc758"]
m = self.nybb.explore(column="BoroName", cmap=cmap)
out_str = self._fetch_map_string(m)
for c in cmap:
assert f'"fillColor":"{c}"' in out_str
# shorter list (to make it repeat)
cmap = ["#333432", "#3b6e8c"]
m = self.nybb.explore(column="BoroName", cmap=cmap)
out_str = self._fetch_map_string(m)
for c in cmap:
assert f'"fillColor":"{c}"' in out_str
with pytest.raises(ValueError, match="'cmap' is invalid."):
self.nybb.explore(column="BoroName", cmap="nonsense")
def test_categories(self):
m = self.nybb[["BoroName", "geometry"]].explore(
column="BoroName",
categories=["Brooklyn", "Staten Island", "Queens", "Bronx", "Manhattan"],
)
out_str = self._fetch_map_string(m)
assert '"Bronx","__folium_color":"#c7c7c7"' in out_str
assert '"Manhattan","__folium_color":"#9edae5"' in out_str
assert '"Brooklyn","__folium_color":"#1f77b4"' in out_str
assert '"StatenIsland","__folium_color":"#98df8a"' in out_str
assert '"Queens","__folium_color":"#8c564b"' in out_str
df = self.nybb.copy()
df["categorical"] = pd.Categorical(df["BoroName"])
with pytest.raises(ValueError, match="Cannot specify 'categories'"):
df.explore("categorical", categories=["Brooklyn", "Staten Island"])
def test_bool(self):
df = self.nybb.copy()
df["bool"] = [True, False, True, False, True]
df["bool_extension"] = pd.array([True, False, True, False, True])
m1 = df.explore("bool")
m2 = df.explore("bool_extension")
out1_str = self._fetch_map_string(m1)
assert '"__folium_color":"#9edae5","bool":true' in out1_str
assert '"__folium_color":"#1f77b4","bool":false' in out1_str
out2_str = self._fetch_map_string(m2)
assert '"__folium_color":"#9edae5","bool":true' in out2_str
assert '"__folium_color":"#1f77b4","bool":false' in out2_str
def test_string(self):
df = self.nybb.copy()
df["string"] = pd.array([1, 2, 3, 4, 5], dtype="string")
m = df.explore("string")
out_str = self._fetch_map_string(m)
assert '"__folium_color":"#9edae5","string":"5"' in out_str
def test_column_values(self):
"""
Check that the dataframe plot method returns same values with an
input string (column in df), pd.Series, or np.array
"""
column_array = np.array(self.world["pop_est"])
m1 = self.world.explore(column="pop_est") # column name
m2 = self.world.explore(column=column_array) # np.array
m3 = self.world.explore(column=self.world["pop_est"]) # pd.Series
assert m1.location == m2.location == m3.location
m1_fields = self.world.explore(column=column_array, tooltip=True, popup=True)
out1_fields_str = self._fetch_map_string(m1_fields)
assert (
'fields=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out1_fields_str
)
assert (
'aliases=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out1_fields_str
)
m2_fields = self.world.explore(
column=self.world["pop_est"], tooltip=True, popup=True
)
out2_fields_str = self._fetch_map_string(m2_fields)
assert (
'fields=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out2_fields_str
)
assert (
'aliases=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out2_fields_str
)
# GeoDataframe and the given list have different number of rows
with pytest.raises(ValueError, match="different number of rows"):
self.world.explore(column=np.array([1, 2, 3]))
def test_no_crs(self):
"""Naive geometry get no tiles"""
df = self.world.copy()
df.crs = None
m = df.explore()
assert "openstreetmap" not in m.to_dict()["children"].keys()
def test_style_kwds(self):
"""Style keywords"""
m = self.world.explore(
style_kwds={"fillOpacity": 0.1, "weight": 0.5, "fillColor": "orange"}
)
out_str = self._fetch_map_string(m)
assert '"fillColor":"orange","fillOpacity":0.1,"weight":0.5' in out_str
m = self.world.explore(column="pop_est", style_kwds={"color": "black"})
assert '"color":"black"' in self._fetch_map_string(m)
# custom style_function - geopandas/issues/2350
m = self.world.explore(
style_kwds={
"style_function": lambda x: {
"fillColor": "red"
if x["properties"]["gdp_md_est"] < 10**6
else "green",
"color": "black"
if x["properties"]["gdp_md_est"] < 10**6
else "white",
}
}
)
# two lines with formatting instructions from style_function.
# make sure each passes test
assert all(
('"fillColor":"green"' in t and '"color":"white"' in t)
or ('"fillColor":"red"' in t and '"color":"black"' in t)
for t in [
"".join(line.split())
for line in m._parent.render().split("\n")
if "return" in line and "color" in line
]
)
# style function has to be callable
with pytest.raises(ValueError, match="'style_function' has to be a callable"):
self.world.explore(style_kwds={"style_function": "not callable"})
def test_tooltip(self):
"""Test tooltip"""
# default with no tooltip or popup
m = self.world.explore()
assert "GeoJsonTooltip" in str(m.to_dict())
assert "GeoJsonPopup" not in str(m.to_dict())
# True
m = self.world.explore(tooltip=True, popup=True)
assert "GeoJsonTooltip" in str(m.to_dict())
assert "GeoJsonPopup" in str(m.to_dict())
out_str = self._fetch_map_string(m)
assert (
'fields=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out_str
)
assert (
'aliases=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out_str
)
# True choropleth
m = self.world.explore(column="pop_est", tooltip=True, popup=True)
assert "GeoJsonTooltip" in str(m.to_dict())
assert "GeoJsonPopup" in str(m.to_dict())
out_str = self._fetch_map_string(m)
assert (
'fields=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out_str
)
assert (
'aliases=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out_str
)
# single column
m = self.world.explore(tooltip="pop_est", popup="iso_a3")
out_str = self._fetch_map_string(m)
assert 'fields=["pop_est"]' in out_str
assert 'aliases=["pop_est"]' in out_str
assert 'fields=["iso_a3"]' in out_str
assert 'aliases=["iso_a3"]' in out_str
# list
m = self.world.explore(
tooltip=["pop_est", "continent"], popup=["iso_a3", "gdp_md_est"]
)
out_str = self._fetch_map_string(m)
assert 'fields=["pop_est","continent"]' in out_str
assert 'aliases=["pop_est","continent"]' in out_str
assert 'fields=["iso_a3","gdp_md_est"' in out_str
assert 'aliases=["iso_a3","gdp_md_est"]' in out_str
# number
m = self.world.explore(tooltip=2, popup=2)
out_str = self._fetch_map_string(m)
assert 'fields=["pop_est","continent"]' in out_str
assert 'aliases=["pop_est","continent"]' in out_str
# keywords tooltip
m = self.world.explore(
tooltip=True,
popup=False,
tooltip_kwds={"aliases": [0, 1, 2, 3, 4, 5], "sticky": False},
)
out_str = self._fetch_map_string(m)
assert (
'fields=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out_str
)
assert "aliases=[0,1,2,3,4,5]" in out_str
assert '"sticky":false' in out_str
# keywords popup
m = self.world.explore(
tooltip=False,
popup=True,
popup_kwds={"aliases": [0, 1, 2, 3, 4, 5]},
)
out_str = self._fetch_map_string(m)
assert (
'fields=["pop_est","continent","name","iso_a3","gdp_md_est","range"]'
in out_str
)
assert "aliases=[0,1,2,3,4,5]" in out_str
assert "<th>${aliases[i]" in out_str
# no labels
m = self.world.explore(
tooltip=True,
popup=True,
tooltip_kwds={"labels": False},
popup_kwds={"labels": False},
)
out_str = self._fetch_map_string(m)
assert "<th>${aliases[i]" not in out_str
# named index
gdf = self.nybb.set_index("BoroName")
m = gdf.explore()
out_str = self._fetch_map_string(m)
assert "BoroName" in out_str
def test_escape_special_characters(self):
# check if special characters are escaped
gdf = self.world.copy()
gdf["name"] = """{{{what a mess}}} they are so different."""
m = gdf.explore()
out_str = self._fetch_map_string(m)
assert """{{{""" in out_str
assert """}}}""" in out_str
def test_default_markers(self):
# check overridden default for points
m = self.cities.explore()
strings = ['"radius":2', '"fill":true', "CircleMarker(latlng,opts)"]
out_str = self._fetch_map_string(m)
for s in strings:
assert s in out_str
m = self.cities.explore(marker_kwds={"radius": 5, "fill": False})
strings = ['"radius":5', '"fill":false', "CircleMarker(latlng,opts)"]
out_str = self._fetch_map_string(m)
for s in strings:
assert s in out_str
def test_custom_markers(self):
# Markers
m = self.cities.explore(
marker_type="marker",
marker_kwds={"icon": folium.Icon(icon="star")},
)
assert ""","icon":"star",""" in self._fetch_map_string(m)
# Circle Markers
m = self.cities.explore(marker_type="circle", marker_kwds={"fill_color": "red"})
assert ""","fillColor":"red",""" in self._fetch_map_string(m)
# Folium Markers
m = self.cities.explore(
marker_type=folium.Circle(
radius=4, fill_color="orange", fill_opacity=0.4, color="black", weight=1
),
)
assert ""","color":"black",""" in self._fetch_map_string(m)
# Circle
m = self.cities.explore(marker_type="circle_marker", marker_kwds={"radius": 10})
assert ""","radius":10,""" in self._fetch_map_string(m)
# Unsupported Markers
with pytest.raises(
ValueError,
match="Only 'marker', 'circle', and 'circle_marker' are supported",
):
self.cities.explore(marker_type="dummy")
def test_vmin_vmax(self):
df = self.world.copy()
df["range"] = range(len(df))
m = df.explore("range", vmin=-100, vmax=1000)
out_str = self._fetch_map_string(m)
assert 'case"176":return{"color":"#3b528b","fillColor":"#3b528b"' in out_str
assert 'case"119":return{"color":"#414287","fillColor":"#414287"' in out_str
assert 'case"3":return{"color":"#482173","fillColor":"#482173"' in out_str
# test 0
df2 = self.nybb.copy()
df2["values"] = df2["BoroCode"] * 10.0
m = df2[df2["values"] >= 30].explore("values", vmin=0)
out_str = self._fetch_map_string(m)
if FOLIUM_G_014:
assert 'case"0":return{"color":"#fde725","fillColor":"#fde725"' in out_str
assert 'case"1":return{"color":"#7ad151","fillColor":"#7ad151"' in out_str
assert 'default:return{"color":"#22a884","fillColor":"#22a884"' in out_str
else:
assert 'case"1":return{"color":"#7ad151","fillColor":"#7ad151"' in out_str
assert 'case"2":return{"color":"#22a884","fillColor":"#22a884"' in out_str
assert 'default:return{"color":"#fde725","fillColor":"#fde725"' in out_str
df2["values_negative"] = df2["BoroCode"] * -10.0
m = df2[df2["values_negative"] <= 30].explore("values_negative", vmax=0)
out_str = self._fetch_map_string(m)
assert 'case"1":return{"color":"#414487","fillColor":"#414487"' in out_str
assert 'case"2":return{"color":"#2a788e","fillColor":"#2a788e"' in out_str
def test_missing_vals(self):
m = self.missing.explore("continent")
assert '"fillColor":null' in self._fetch_map_string(m)
m = self.missing.explore("pop_est")
assert '"fillColor":null' in self._fetch_map_string(m)
m = self.missing.explore("pop_est", missing_kwds={"color": "red"})
assert '"fillColor":"red"' in self._fetch_map_string(m)
m = self.missing.explore("continent", missing_kwds={"color": "red"})
assert '"fillColor":"red"' in self._fetch_map_string(m)
def test_categorical_legend(self):
m = self.world.explore("continent", legend=True)
out_str = self._fetch_map_string(m)
assert "#1f77b4'></span>Africa" in out_str
assert "#ff7f0e'></span>Antarctica" in out_str
assert "#98df8a'></span>Asia" in out_str
assert "#9467bd'></span>Europe" in out_str
assert "#c49c94'></span>NorthAmerica" in out_str
assert "#7f7f7f'></span>Oceania" in out_str
assert "#dbdb8d'></span>Sevenseas(openocean)" in out_str
assert "#9edae5'></span>SouthAmerica" in out_str
m = self.missing.explore(
"continent", legend=True, missing_kwds={"color": "red"}
)
out_str = self._fetch_map_string(m)
assert "red'></span>NaN" in out_str
def test_colorbar(self):
def quoted_in(find, s):
return find in s or find.replace("'", '"') in s
m = self.world.explore("range", legend=True)
out_str = self._fetch_map_string(m)
assert "attr(\"id\",'legend')" in out_str
assert quoted_in("text('range')", out_str)
m = self.world.explore(
"range", legend=True, legend_kwds={"caption": "my_caption"}
)
out_str = self._fetch_map_string(m)
assert "attr(\"id\",'legend')" in out_str
assert quoted_in("text('my_caption')", out_str)
m = self.missing.explore("pop_est", legend=True, missing_kwds={"color": "red"})
out_str = self._fetch_map_string(m)
assert "red'></span>NaN" in out_str
# do not scale legend
m = self.world.explore(
"pop_est",
legend=True,
legend_kwds={"scale": False},
scheme="Headtailbreaks",
)
out_str = self._fetch_map_string(m)
assert out_str.count("#440154ff") == 100
assert out_str.count("#3b528bff") == 100
assert out_str.count("#21918cff") == 100
assert out_str.count("#5ec962ff") == 100
assert out_str.count("#fde725ff") == 100
# scale legend accordingly
m = self.world.explore(
"pop_est",
legend=True,
scheme="Headtailbreaks",
)
out_str = self._fetch_map_string(m)
assert out_str.count("#440154ff") == 16
assert out_str.count("#3b528bff") == 50
assert out_str.count("#21918cff") == 138
assert out_str.count("#5ec962ff") == 290
assert out_str.count("#fde725ff") == 6
# discrete cmap
m = self.world.explore("pop_est", legend=True, cmap="Pastel2")
out_str = self._fetch_map_string(m)
assert out_str.count("b3e2cdff") == 63
assert out_str.count("fdcdacff") == 62
assert out_str.count("cbd5e8ff") == 63
assert out_str.count("f4cae4ff") == 62
assert out_str.count("e6f5c9ff") == 62
assert out_str.count("fff2aeff") == 63
assert out_str.count("f1e2ccff") == 62
assert out_str.count("ccccccff") == 63
@pytest.mark.skipif(not BRANCA_05, reason="requires branca >= 0.5.0")
def test_colorbar_max_labels(self):
import re
# linear
m = self.world.explore("pop_est", legend_kwds={"max_labels": 3})
out_str = self._fetch_map_string(m)
tick_str = re.search(r"tickValues\(\[[\',\,\.,0-9]*\]\)", out_str).group(0)
assert (
tick_str.replace(",''", "")
== "tickValues([140.0,471386328.07843137,942772516.1568627])"
)
# scheme
m = self.world.explore(
"pop_est", scheme="headtailbreaks", legend_kwds={"max_labels": 3}
)
out_str = self._fetch_map_string(m)
assert "tickValues([140.0,'',184117213.1818182,'',1382066377.0,''])" in out_str
# short cmap
m = self.world.explore("pop_est", legend_kwds={"max_labels": 3}, cmap="tab10")
out_str = self._fetch_map_string(m)
tick_str = re.search(r"tickValues\(\[[\',\,\.,0-9]*\]\)", out_str).group(0)
assert (
tick_str
== "tickValues([140.0,'','','',559086084.0,'','','',1118172028.0,'','',''])"
)
def test_xyzservices_providers(self):
xyzservices = pytest.importorskip("xyzservices")
m = self.nybb.explore(tiles=xyzservices.providers.CartoDB.PositronNoLabels)
out_str = self._fetch_map_string(m)
assert (
'"https://a.basemaps.cartocdn.com/light_nolabels/{z}/{x}/{y}{r}.png"'
in out_str
)
assert (
'attribution":"\\u0026copy;\\u003cahref=\\"https://www.openstreetmap.org'
in out_str
)
assert '"maxNativeZoom":20,"maxZoom":20,"minZoom":0' in out_str
def test_xyzservices_query_name(self):
pytest.importorskip("xyzservices")
m = self.nybb.explore(tiles="CartoDB Positron No Labels")
out_str = self._fetch_map_string(m)
assert (
'"https://a.basemaps.cartocdn.com/light_nolabels/{z}/{x}/{y}{r}.png"'
in out_str
)
assert (
'attribution":"\\u0026copy;\\u003cahref=\\"https://www.openstreetmap.org'
in out_str
)
assert '"maxNativeZoom":20,"maxZoom":20,"minZoom":0' in out_str
def test_xyzservices_providers_min_zoom_override(self):
xyzservices = pytest.importorskip("xyzservices")
m = self.nybb.explore(
tiles=xyzservices.providers.CartoDB.PositronNoLabels, min_zoom=3
)
out_str = self._fetch_map_string(m)
assert '"maxNativeZoom":20,"maxZoom":20,"minZoom":3' in out_str
def test_xyzservices_providers_max_zoom_override(self):
xyzservices = pytest.importorskip("xyzservices")
m = self.nybb.explore(
tiles=xyzservices.providers.CartoDB.PositronNoLabels, max_zoom=12
)
out_str = self._fetch_map_string(m)
assert '"maxNativeZoom":12,"maxZoom":12,"minZoom":0' in out_str
def test_xyzservices_providers_both_zooms_override(self):
xyzservices = pytest.importorskip("xyzservices")
m = self.nybb.explore(
tiles=xyzservices.providers.CartoDB.PositronNoLabels,
min_zoom=3,
max_zoom=12,
)
out_str = self._fetch_map_string(m)
assert '"maxNativeZoom":12,"maxZoom":12,"minZoom":3' in out_str
def test_linearrings(self):
rings = self.nybb.explode(index_parts=True).exterior
m = rings.explore()
out_str = self._fetch_map_string(m)
assert out_str.count("LineString") == len(rings)
def test_mapclassify_categorical_legend(self):
m = self.missing.explore(
column="pop_est",
legend=True,
scheme="naturalbreaks",
missing_kwds={"color": "red", "label": "missing"},
legend_kwds={"colorbar": False, "interval": True},
)
out_str = self._fetch_map_string(m)
strings = [
"[140.00,21803000.00]",
"(21803000.00,66834405.00]",
"(66834405.00,163046161.00]",
"(163046161.00,328239523.00]",
"(328239523.00,1397715000.00]",
"missing",
]
for s in strings:
assert s in out_str
# interval=False
m = self.missing.explore(
column="pop_est",
legend=True,
scheme="naturalbreaks",
missing_kwds={"color": "red", "label": "missing"},
legend_kwds={"colorbar": False, "interval": False},
)
out_str = self._fetch_map_string(m)
strings = [
">140.00,21803000.00",
">21803000.00,66834405.00",
">66834405.00,163046161.00",
">163046161.00,328239523.00",
">328239523.00,1397715000.00",
"missing",
]
for s in strings:
assert s in out_str
# custom labels
m = self.world.explore(
column="pop_est",
legend=True,
scheme="naturalbreaks",
k=5,
legend_kwds={"colorbar": False, "labels": ["s", "m", "l", "xl", "xxl"]},
)
out_str = self._fetch_map_string(m)
strings = [">s<", ">m<", ">l<", ">xl<", ">xxl<"]
for s in strings:
assert s in out_str
# fmt
m = self.missing.explore(
column="pop_est",
legend=True,
scheme="naturalbreaks",
missing_kwds={"color": "red", "label": "missing"},
legend_kwds={"colorbar": False, "fmt": "{:.0f}"},
)
out_str = self._fetch_map_string(m)
strings = [
">140,21803000",
">21803000,66834405",
">66834405,163046161",
">163046161,328239523",
">328239523,1397715000",
"missing",
]
for s in strings:
assert s in out_str
def test_given_m(self):
"Check that geometry is mapped onto a given folium.Map"
m = folium.Map()
self.nybb.explore(m=m, tooltip=False, highlight=False)
out_str = self._fetch_map_string(m)
assert out_str.count("BoroCode") == 5
# should not change map settings
assert m.options["zoom"] == 1
def test_highlight(self):
m = self.nybb.explore(highlight=True)
out_str = self._fetch_map_string(m)
assert '"fillOpacity":0.75' in out_str
m = self.nybb.explore(
highlight=True, highlight_kwds={"fillOpacity": 1, "color": "red"}
)
out_str = self._fetch_map_string(m)
assert '{"color":"red","fillOpacity":1}' in out_str
def test_custom_colormaps(self):
step = StepColormap(["green", "yellow", "red"], vmin=0, vmax=100000000)
m = self.world.explore("pop_est", cmap=step, tooltip=["name"], legend=True)
strings = [
'fillColor":"#008000ff"', # Green
'"fillColor":"#ffff00ff"', # Yellow
'"fillColor":"#ff0000ff"', # Red
]
out_str = self._fetch_map_string(m)
for s in strings:
assert s in out_str
assert out_str.count("008000ff") == 304
assert out_str.count("ffff00ff") == 188
assert out_str.count("ff0000ff") == 191
# Using custom function colormap
def my_color_function(field):
"""Maps low values to green and high values to red."""
if field > 100000000:
return "#ff0000"
else:
return "#008000"
m = self.world.explore("pop_est", cmap=my_color_function, legend=False)
strings = [
'"color":"#ff0000","fillColor":"#ff0000"',
'"color":"#008000","fillColor":"#008000"',
]
for s in strings:
assert s in self._fetch_map_string(m)
# matplotlib.Colormap
cmap = colors.ListedColormap(["red", "green", "blue", "white", "black"])
m = self.nybb.explore("BoroName", cmap=cmap)
strings = [
'"fillColor":"#ff0000"', # Red
'"fillColor":"#008000"', # Green
'"fillColor":"#0000ff"', # Blue
'"fillColor":"#ffffff"', # White
'"fillColor":"#000000"', # Black
]
out_str = self._fetch_map_string(m)
for s in strings:
assert s in out_str
def test_multiple_geoseries(self):
"""
Additional GeoSeries need to be removed as they cannot be converted to GeoJSON
"""
gdf = self.nybb
gdf["boundary"] = gdf.boundary
gdf["centroid"] = gdf.centroid
gdf.explore()
def test_map_kwds(self):
def check():
out_str = self._fetch_map_string(m)
assert "zoomControl:false" in out_str
assert "dragging:false" in out_str
assert "scrollWheelZoom:false" in out_str
# check that folium and leaflet Map() parameters can be passed
m = self.world.explore(
zoom_control=False, map_kwds={"dragging": False, "scrollWheelZoom": False}
)
check()
with pytest.raises(
ValueError, match="'zoom_control' cannot be specified in 'map_kwds'"
):
self.world.explore(
map_kwds={
"dragging": False,
"scrollWheelZoom": False,
"zoom_control": False,
}
)

View File

@@ -0,0 +1,588 @@
"""
This file contains a minimal set of tests for compliance with the extension
array interface test suite (by inheriting the pandas test suite), and should
contain no other tests.
Other tests (eg related to the spatial functionality or integration
with GeoSeries/GeoDataFrame) should be added to test_array.py and others.
The tests in this file are inherited from the BaseExtensionTests, and only
minimal tweaks should be applied to get the tests passing (by overwriting a
parent method).
A set of fixtures are defined to provide data for the tests (the fixtures
expected to be available to pytest by the inherited pandas tests).
"""
import operator
import numpy as np
from numpy.testing import assert_array_equal
import pandas as pd
from pandas.testing import assert_series_equal
from pandas.tests.extension import base as extension_tests
import shapely.geometry
from shapely.geometry import Point
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
from geopandas._compat import ignore_shapely2_warnings, SHAPELY_GE_20, PANDAS_GE_15
import pytest
# -----------------------------------------------------------------------------
# Compat with extension tests in older pandas versions
# -----------------------------------------------------------------------------
not_yet_implemented = pytest.mark.skip(reason="Not yet implemented")
no_minmax = pytest.mark.skip(reason="Min/max not supported")
requires_shapely2 = pytest.mark.skipif(
not SHAPELY_GE_20, reason="Requires hashable geometries"
)
# -----------------------------------------------------------------------------
# Required fixtures
# -----------------------------------------------------------------------------
@pytest.fixture
def dtype():
"""A fixture providing the ExtensionDtype to validate."""
return GeometryDtype()
def make_data():
a = np.empty(100, dtype=object)
with ignore_shapely2_warnings():
a[:] = [shapely.geometry.Point(i, i) for i in range(100)]
ga = from_shapely(a)
return ga
@pytest.fixture
def data():
"""Length-100 array for this type.
* data[0] and data[1] should both be non missing
* data[0] and data[1] should not be equal
"""
return make_data()
@pytest.fixture
def data_for_twos():
"""Length-100 array in which all the elements are two."""
raise NotImplementedError
@pytest.fixture
def data_missing():
"""Length-2 array with [NA, Valid]"""
return from_shapely([None, shapely.geometry.Point(1, 1)])
@pytest.fixture(params=["data", "data_missing"])
def all_data(request, data, data_missing):
"""Parametrized fixture giving 'data' and 'data_missing'"""
if request.param == "data":
return data
elif request.param == "data_missing":
return data_missing
@pytest.fixture
def data_repeated(data):
"""
Generate many datasets.
Parameters
----------
data : fixture implementing `data`
Returns
-------
Callable[[int], Generator]:
A callable that takes a `count` argument and
returns a generator yielding `count` datasets.
"""
def gen(count):
for _ in range(count):
yield data
return gen
@pytest.fixture
def data_for_sorting():
"""Length-3 array with a known sort order.
This should be three items [B, C, A] with
A < B < C
"""
return from_shapely([Point(0, 1), Point(1, 1), Point(0, 0)])
@pytest.fixture
def data_missing_for_sorting():
"""Length-3 array with a known sort order.
This should be three items [B, NA, A] with
A < B and NA missing.
"""
return from_shapely([Point(1, 2), None, Point(0, 0)])
@pytest.fixture
def na_cmp():
"""Binary operator for comparing NA values.
Should return a function of two arguments that returns
True if both arguments are (scalar) NA for your type.
By default, uses ``operator.or``
"""
return lambda x, y: x is None and y is None
@pytest.fixture
def na_value():
"""The scalar missing value for this type. Default 'None'"""
return None
@pytest.fixture
def data_for_grouping():
"""Data for factorization, grouping, and unique tests.
Expected to be like [B, B, NA, NA, A, A, B, C]
Where A < B < C and NA is missing
"""
return from_shapely(
[
shapely.geometry.Point(1, 1),
shapely.geometry.Point(1, 1),
None,
None,
shapely.geometry.Point(0, 0),
shapely.geometry.Point(0, 0),
shapely.geometry.Point(1, 1),
shapely.geometry.Point(2, 2),
]
)
@pytest.fixture(params=[True, False])
def box_in_series(request):
"""Whether to box the data in a Series"""
return request.param
@pytest.fixture(
params=[
lambda x: 1,
lambda x: [1] * len(x),
lambda x: pd.Series([1] * len(x)),
lambda x: x,
],
ids=["scalar", "list", "series", "object"],
)
def groupby_apply_op(request):
"""
Functions to test groupby.apply().
"""
return request.param
@pytest.fixture(params=[True, False])
def as_frame(request):
"""
Boolean fixture to support Series and Series.to_frame() comparison testing.
"""
return request.param
@pytest.fixture(params=[True, False])
def as_series(request):
"""
Boolean fixture to support arr and Series(arr) comparison testing.
"""
return request.param
@pytest.fixture(params=[True, False])
def use_numpy(request):
"""
Boolean fixture to support comparison testing of ExtensionDtype array
and numpy array.
"""
return request.param
@pytest.fixture(params=["ffill", "bfill"])
def fillna_method(request):
"""
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
Series.fillna(method=<method>) testing.
"""
return request.param
@pytest.fixture(params=[True, False])
def as_array(request):
"""
Boolean fixture to support ExtensionDtype _from_sequence method testing.
"""
return request.param
@pytest.fixture
def invalid_scalar(data):
"""
A scalar that *cannot* be held by this ExtensionArray.
The default should work for most subclasses, but is not guaranteed.
If the array can hold any item (i.e. object dtype), then use pytest.skip.
"""
return object.__new__(object)
# Fixtures defined in pandas/conftest.py that are also needed: defining them
# here instead of importing for compatibility
@pytest.fixture(
params=["sum", "max", "min", "mean", "prod", "std", "var", "median", "kurt", "skew"]
)
def all_numeric_reductions(request):
"""
Fixture for numeric reduction names
"""
return request.param
@pytest.fixture(params=["all", "any"])
def all_boolean_reductions(request):
"""
Fixture for boolean reduction names
"""
return request.param
# only == and != are support for GeometryArray
# @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
@pytest.fixture(params=["__eq__", "__ne__"])
def all_compare_operators(request):
"""
Fixture for dunder names for common compare operations
* >=
* >
* ==
* !=
* <
* <=
"""
return request.param
@pytest.fixture(params=[None, lambda x: x])
def sort_by_key(request):
"""
Simple fixture for testing keys in sorting methods.
Tests None (no key) and the identity key.
"""
return request.param
# -----------------------------------------------------------------------------
# Inherited tests
# -----------------------------------------------------------------------------
class TestDtype(extension_tests.BaseDtypeTests):
# additional tests
def test_array_type_with_arg(self, data, dtype):
assert dtype.construct_array_type() is GeometryArray
def test_registry(self, data, dtype):
s = pd.Series(np.asarray(data), dtype=object)
result = s.astype("geometry")
assert isinstance(result.array, GeometryArray)
expected = pd.Series(data)
assert_series_equal(result, expected)
class TestInterface(extension_tests.BaseInterfaceTests):
def test_array_interface(self, data):
# we are overriding this base test because the creation of `expected`
# potentially doesn't work for shapely geometries
# TODO can be removed with Shapely 2.0
result = np.array(data)
assert result[0] == data[0]
result = np.array(data, dtype=object)
# expected = np.array(list(data), dtype=object)
expected = np.empty(len(data), dtype=object)
with ignore_shapely2_warnings():
expected[:] = list(data)
assert_array_equal(result, expected)
def test_contains(self, data, data_missing):
# overridden due to the inconsistency between
# GeometryDtype.na_value = np.nan
# and None being used as NA in array
# ensure data without missing values
data = data[~data.isna()]
# first elements are non-missing
assert data[0] in data
assert data_missing[0] in data_missing
assert None in data_missing
assert None not in data
assert pd.NaT not in data_missing
class TestConstructors(extension_tests.BaseConstructorsTests):
pass
class TestReshaping(extension_tests.BaseReshapingTests):
pass
class TestGetitem(extension_tests.BaseGetitemTests):
pass
class TestSetitem(extension_tests.BaseSetitemTests):
pass
class TestMissing(extension_tests.BaseMissingTests):
def test_fillna_series(self, data_missing):
fill_value = data_missing[1]
ser = pd.Series(data_missing)
# Fill with a scalar
result = ser.fillna(fill_value)
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
assert_series_equal(result, expected)
# Fill with a series
filler = pd.Series(
from_shapely(
[
shapely.geometry.Point(1, 1),
shapely.geometry.Point(2, 2),
],
)
)
result = ser.fillna(filler)
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
assert_series_equal(result, expected)
# Fill with a series not affecting the missing values
filler = pd.Series(
from_shapely(
[
shapely.geometry.Point(2, 2),
shapely.geometry.Point(1, 1),
]
),
index=[10, 11],
)
result = ser.fillna(filler)
assert_series_equal(result, ser)
# More `GeoSeries.fillna` testcases are in
# `geopandas\tests\test_pandas_methods.py::test_fillna_scalar`
# and `geopandas\tests\test_pandas_methods.py::test_fillna_series`.
@pytest.mark.skip("fillna method not supported")
def test_fillna_limit_pad(self, data_missing):
pass
@pytest.mark.skip("fillna method not supported")
def test_fillna_limit_backfill(self, data_missing):
pass
@pytest.mark.skip("fillna method not supported")
def test_fillna_series_method(self, data_missing, method):
pass
@pytest.mark.skip("fillna method not supported")
def test_fillna_no_op_returns_copy(self, data):
pass
class TestReduce(extension_tests.BaseNoReduceTests):
@pytest.mark.skip("boolean reduce (any/all) tested in test_pandas_methods")
def test_reduce_series_boolean(self):
pass
_all_arithmetic_operators = [
"__add__",
"__radd__",
# '__sub__', '__rsub__',
"__mul__",
"__rmul__",
"__floordiv__",
"__rfloordiv__",
"__truediv__",
"__rtruediv__",
"__pow__",
"__rpow__",
"__mod__",
"__rmod__",
]
@pytest.fixture(params=_all_arithmetic_operators)
def all_arithmetic_operators(request):
"""
Fixture for dunder names for common arithmetic operations
Adapted to exclude __sub__, as this is implemented as "difference".
"""
return request.param
# an inherited test from pandas creates a Series from a list of geometries, which
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
@pytest.mark.filterwarnings(
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
)
class TestArithmeticOps(extension_tests.BaseArithmeticOpsTests):
@pytest.mark.skip(reason="not applicable")
def test_divmod_series_array(self, data, data_for_twos):
pass
@pytest.mark.skip(reason="not applicable")
def test_add_series_with_extension_array(self, data):
pass
# an inherited test from pandas creates a Series from a list of geometries, which
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
@pytest.mark.filterwarnings(
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
)
class TestComparisonOps(extension_tests.BaseComparisonOpsTests):
def _compare_other(self, s, data, op_name, other):
op = getattr(operator, op_name.strip("_"))
result = op(s, other)
expected = s.combine(other, op)
assert_series_equal(result, expected)
def test_compare_scalar(self, data, all_compare_operators):
op_name = all_compare_operators
s = pd.Series(data)
self._compare_other(s, data, op_name, data[0])
def test_compare_array(self, data, all_compare_operators):
op_name = all_compare_operators
s = pd.Series(data)
other = pd.Series([data[0]] * len(data))
self._compare_other(s, data, op_name, other)
class TestMethods(extension_tests.BaseMethodsTests):
@pytest.mark.skipif(
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
)
@pytest.mark.parametrize("dropna", [True, False])
def test_value_counts(self, all_data, dropna):
pass
@pytest.mark.skipif(
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
)
def test_value_counts_with_normalize(self, data):
pass
@requires_shapely2
@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_frame(self, data_for_sorting, ascending):
super().test_sort_values_frame(data_for_sorting, ascending)
@pytest.mark.skip(reason="searchsorted not supported")
def test_searchsorted(self, data_for_sorting, as_series):
pass
@not_yet_implemented
def test_combine_le(self):
pass
@pytest.mark.skip(reason="addition not supported")
def test_combine_add(self):
pass
@not_yet_implemented
def test_fillna_length_mismatch(self, data_missing):
msg = "Length of 'value' does not match."
with pytest.raises(ValueError, match=msg):
data_missing.fillna(data_missing.take([1]))
@no_minmax
def test_argmin_argmax(self):
pass
@no_minmax
def test_argmin_argmax_empty_array(self):
pass
@no_minmax
def test_argmin_argmax_all_na(self):
pass
@no_minmax
def test_argreduce_series(self):
pass
@no_minmax
def test_argmax_argmin_no_skipna_notimplemented(self):
pass
class TestCasting(extension_tests.BaseCastingTests):
pass
class TestGroupby(extension_tests.BaseGroupbyTests):
@requires_shapely2
@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping):
super().test_groupby_extension_agg(as_index, data_for_grouping)
@requires_shapely2
def test_groupby_extension_transform(self, data_for_grouping):
super().test_groupby_extension_transform(data_for_grouping)
@requires_shapely2
@pytest.mark.parametrize(
"op",
[
lambda x: 1,
lambda x: [1] * len(x),
lambda x: pd.Series([1] * len(x)),
lambda x: x,
],
ids=["scalar", "list", "series", "object"],
)
def test_groupby_extension_apply(self, data_for_grouping, op):
super().test_groupby_extension_apply(data_for_grouping, op)
class TestPrinting(extension_tests.BasePrintingTests):
pass
@not_yet_implemented
class TestParsing(extension_tests.BaseParsingTests):
pass

View File

@@ -0,0 +1,171 @@
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
from geopandas.tools import geocode, reverse_geocode
from geopandas.tools.geocoding import _prepare_geocode_result
from geopandas.tests.util import assert_geoseries_equal, mock
from pandas.testing import assert_series_equal
from geopandas.testing import assert_geodataframe_equal
import pytest
geopy = pytest.importorskip("geopy")
class ForwardMock(mock.MagicMock):
"""
Mock the forward geocoding function.
Returns the passed in address and (p, p+.5) where p increases
at each call
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._n = 0.0
def __call__(self, *args, **kwargs):
self.return_value = args[0], (self._n, self._n + 0.5)
self._n += 1
return super().__call__(*args, **kwargs)
class ReverseMock(mock.MagicMock):
"""
Mock the reverse geocoding function.
Returns the passed in point and 'address{p}' where p increases
at each call
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._n = 0
def __call__(self, *args, **kwargs):
self.return_value = "address{0}".format(self._n), args[0]
self._n += 1
return super().__call__(*args, **kwargs)
@pytest.fixture
def locations():
locations = ["260 Broadway, New York, NY", "77 Massachusetts Ave, Cambridge, MA"]
return locations
@pytest.fixture
def points():
points = [Point(-71.0597732, 42.3584308), Point(-77.0365305, 38.8977332)]
return points
def test_prepare_result():
# Calls _prepare_result with sample results from the geocoder call
# loop
p0 = Point(12.3, -45.6) # Treat these as lat/lon
p1 = Point(-23.4, 56.7)
d = {"a": ("address0", p0.coords[0]), "b": ("address1", p1.coords[0])}
df = _prepare_geocode_result(d)
assert type(df) is GeoDataFrame
assert df.crs == "EPSG:4326"
assert len(df) == 2
assert "address" in df
coords = df.loc["a"]["geometry"].coords[0]
test = p0.coords[0]
# Output from the df should be lon/lat
assert coords[0] == pytest.approx(test[1])
assert coords[1] == pytest.approx(test[0])
coords = df.loc["b"]["geometry"].coords[0]
test = p1.coords[0]
assert coords[0] == pytest.approx(test[1])
assert coords[1] == pytest.approx(test[0])
def test_prepare_result_none():
p0 = Point(12.3, -45.6) # Treat these as lat/lon
d = {"a": ("address0", p0.coords[0]), "b": (None, None)}
df = _prepare_geocode_result(d)
assert type(df) is GeoDataFrame
assert df.crs == "EPSG:4326"
assert len(df) == 2
assert "address" in df
row = df.loc["b"]
# The shapely.geometry.Point() is actually a GeometryCollection, and thus
# gets converted to that in conversion to pygeos. When converting back
# on access, you now get a GeometryCollection object instead of Point,
# which has no coords
# see https://github.com/Toblerity/Shapely/issues/742/#issuecomment-545296708
# TODO we should probably replace this with a missing value instead of point?
# assert len(row["geometry"].coords) == 0
assert row["geometry"].is_empty
assert row["address"] is None
@pytest.mark.parametrize("geocode_result", (None, (None, None)))
def test_prepare_geocode_result_when_result_is(geocode_result):
result = {0: geocode_result}
expected_output = GeoDataFrame(
{"geometry": [Point()], "address": [None]},
crs="EPSG:4326",
)
output = _prepare_geocode_result(result)
assert_geodataframe_equal(output, expected_output)
def test_bad_provider_forward():
from geopy.exc import GeocoderNotFound
with pytest.raises(GeocoderNotFound):
geocode(["cambridge, ma"], "badprovider")
def test_bad_provider_reverse():
from geopy.exc import GeocoderNotFound
with pytest.raises(GeocoderNotFound):
reverse_geocode([Point(0, 0)], "badprovider")
def test_forward(locations, points):
from geopy.geocoders import Photon
for provider in ["photon", Photon]:
with mock.patch("geopy.geocoders.Photon.geocode", ForwardMock()) as m:
g = geocode(locations, provider=provider, timeout=2)
assert len(locations) == m.call_count
n = len(locations)
assert isinstance(g, GeoDataFrame)
expected = GeoSeries(
[Point(float(x) + 0.5, float(x)) for x in range(n)], crs="EPSG:4326"
)
assert_geoseries_equal(expected, g["geometry"])
assert_series_equal(g["address"], pd.Series(locations, name="address"))
def test_reverse(locations, points):
from geopy.geocoders import Photon
for provider in ["photon", Photon]:
with mock.patch("geopy.geocoders.Photon.reverse", ReverseMock()) as m:
g = reverse_geocode(points, provider=provider, timeout=2)
assert len(points) == m.call_count
assert isinstance(g, GeoDataFrame)
expected = GeoSeries(points, crs="EPSG:4326")
assert_geoseries_equal(expected, g["geometry"])
address = pd.Series(
["address" + str(x) for x in range(len(points))], name="address"
)
assert_series_equal(g["address"], address)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,579 @@
import json
import os
import random
import re
import shutil
import tempfile
import warnings
import numpy as np
from numpy.testing import assert_array_equal
import pandas as pd
from pandas.testing import assert_index_equal
from pyproj import CRS
from shapely.geometry import (
GeometryCollection,
LineString,
MultiLineString,
MultiPoint,
MultiPolygon,
Point,
Polygon,
)
from shapely.geometry.base import BaseGeometry
from geopandas import GeoSeries, GeoDataFrame, read_file, datasets, clip
from geopandas._compat import ignore_shapely2_warnings
from geopandas.array import GeometryArray, GeometryDtype
from geopandas.testing import assert_geoseries_equal, geom_almost_equals
from geopandas.tests.util import geom_equals
from pandas.testing import assert_series_equal
import pytest
class TestSeries:
def setup_method(self):
self.tempdir = tempfile.mkdtemp()
self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
self.g1 = GeoSeries([self.t1, self.sq])
self.g2 = GeoSeries([self.sq, self.t1])
self.g3 = GeoSeries([self.t1, self.t2])
self.g3.crs = "epsg:4326"
self.g4 = GeoSeries([self.t2, self.t1])
self.na = GeoSeries([self.t1, self.t2, Polygon()])
self.na_none = GeoSeries([self.t1, self.t2, None])
self.a1 = self.g1.copy()
self.a1.index = ["A", "B"]
self.a2 = self.g2.copy()
self.a2.index = ["B", "C"]
self.esb = Point(-73.9847, 40.7484)
self.sol = Point(-74.0446, 40.6893)
self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326")
self.l1 = LineString([(0, 0), (0, 1), (1, 1)])
self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
self.g5 = GeoSeries([self.l1, self.l2])
def teardown_method(self):
shutil.rmtree(self.tempdir)
def test_copy(self):
gc = self.g3.copy()
assert type(gc) is GeoSeries
assert self.g3.name == gc.name
assert self.g3.crs == gc.crs
def test_in(self):
assert self.t1 in self.g1
assert self.sq in self.g1
assert self.t1 in self.a1
assert self.t2 in self.g3
assert self.sq not in self.g3
assert 5 not in self.g3
def test_align(self):
a1, a2 = self.a1.align(self.a2)
assert isinstance(a1, GeoSeries)
assert isinstance(a2, GeoSeries)
assert a2["A"] is None
assert a1["B"].equals(a2["B"])
assert a1["C"] is None
def test_align_crs(self):
a1 = self.a1
a1.crs = "epsg:4326"
a2 = self.a2
a2.crs = "epsg:31370"
res1, res2 = a1.align(a2)
assert res1.crs == "epsg:4326"
assert res2.crs == "epsg:31370"
a2.crs = None
res1, res2 = a1.align(a2)
assert res1.crs == "epsg:4326"
assert res2.crs is None
def test_align_mixed(self):
a1 = self.a1
s2 = pd.Series([1, 2], index=["B", "C"])
res1, res2 = a1.align(s2)
exp2 = pd.Series([np.nan, 1, 2], index=["A", "B", "C"])
assert_series_equal(res2, exp2)
def test_warning_if_not_aligned(self):
# GH-816
# Test that warning is issued when operating on non-aligned series
# _series_op
with pytest.warns(UserWarning, match="The indices .+ different"):
self.a1.contains(self.a2)
# _geo_op
with pytest.warns(UserWarning, match="The indices .+ different"):
self.a1.union(self.a2)
def test_no_warning_if_aligned(self):
# GH-816
# Test that warning is not issued when operating on aligned series
a1, a2 = self.a1.align(self.a2)
with warnings.catch_warnings(record=True) as record:
a1.contains(a2) # _series_op, explicitly aligned
self.g1.intersects(self.g2) # _series_op, implicitly aligned
a2.union(a1) # _geo_op, explicitly aligned
self.g2.intersection(self.g1) # _geo_op, implicitly aligned
user_warnings = [w for w in record if w.category is UserWarning]
assert not user_warnings, user_warnings[0].message
def test_geom_equals(self):
assert np.all(self.g1.geom_equals(self.g1))
assert_array_equal(self.g1.geom_equals(self.sq), [False, True])
def test_geom_equals_align(self):
with pytest.warns(UserWarning, match="The indices .+ different"):
a = self.a1.geom_equals(self.a2, align=True)
exp = pd.Series([False, True, False], index=["A", "B", "C"])
assert_series_equal(a, exp)
a = self.a1.geom_equals(self.a2, align=False)
exp = pd.Series([False, False], index=["A", "B"])
assert_series_equal(a, exp)
def test_geom_almost_equals(self):
# TODO: test decimal parameter
with pytest.warns(FutureWarning, match=re.escape("The 'geom_almost_equals()'")):
assert np.all(self.g1.geom_almost_equals(self.g1))
assert_array_equal(self.g1.geom_almost_equals(self.sq), [False, True])
assert_array_equal(
self.a1.geom_almost_equals(self.a2, align=True), [False, True, False]
)
assert_array_equal(
self.a1.geom_almost_equals(self.a2, align=False), [False, False]
)
def test_geom_equals_exact(self):
# TODO: test tolerance parameter
assert np.all(self.g1.geom_equals_exact(self.g1, 0.001))
assert_array_equal(self.g1.geom_equals_exact(self.sq, 0.001), [False, True])
assert_array_equal(
self.a1.geom_equals_exact(self.a2, 0.001, align=True), [False, True, False]
)
assert_array_equal(
self.a1.geom_equals_exact(self.a2, 0.001, align=False), [False, False]
)
def test_equal_comp_op(self):
s = GeoSeries([Point(x, x) for x in range(3)])
res = s == Point(1, 1)
exp = pd.Series([False, True, False])
assert_series_equal(res, exp)
def test_to_file(self):
"""Test to_file and from_file"""
tempfilename = os.path.join(self.tempdir, "test.shp")
self.g3.to_file(tempfilename)
# Read layer back in?
s = GeoSeries.from_file(tempfilename)
assert all(self.g3.geom_equals(s))
# TODO: compare crs
def test_to_json(self):
"""
Test whether GeoSeries.to_json works and returns an actual json file.
"""
json_str = self.g3.to_json()
json.loads(json_str)
# TODO : verify the output is a valid GeoJSON.
def test_representative_point(self):
assert np.all(self.g1.contains(self.g1.representative_point()))
assert np.all(self.g2.contains(self.g2.representative_point()))
assert np.all(self.g3.contains(self.g3.representative_point()))
assert np.all(self.g4.contains(self.g4.representative_point()))
def test_transform(self):
utm18n = self.landmarks.to_crs(epsg=26918)
lonlat = utm18n.to_crs(epsg=4326)
assert geom_almost_equals(self.landmarks, lonlat)
with pytest.raises(ValueError):
self.g1.to_crs(epsg=4326)
with pytest.raises(ValueError):
self.landmarks.to_crs(crs=None, epsg=None)
def test_estimate_utm_crs__geographic(self):
assert self.landmarks.estimate_utm_crs() == CRS("EPSG:32618")
assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918")
def test_estimate_utm_crs__projected(self):
assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == CRS(
"EPSG:32618"
)
def test_estimate_utm_crs__out_of_bounds(self):
with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"):
GeoSeries(
[Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326"
).estimate_utm_crs()
def test_estimate_utm_crs__missing_crs(self):
with pytest.raises(RuntimeError, match="crs must be set"):
GeoSeries([Polygon([(0, 90), (1, 90), (2, 90)])]).estimate_utm_crs()
def test_fillna(self):
# default is to fill with empty geometry
na = self.na_none.fillna()
assert isinstance(na[2], BaseGeometry)
assert na[2].is_empty
assert geom_equals(self.na_none[:2], na[:2])
# XXX: method works inconsistently for different pandas versions
# self.na_none.fillna(method='backfill')
def test_coord_slice(self):
"""Test CoordinateSlicer"""
# need some better test cases
assert geom_equals(self.g3, self.g3.cx[:, :])
assert geom_equals(self.g3[[True, False]], self.g3.cx[0.9:, :0.1])
assert geom_equals(self.g3[[False, True]], self.g3.cx[0:0.1, 0.9:1.0])
def test_coord_slice_with_zero(self):
# Test that CoordinateSlice correctly handles zero slice (#GH477).
gs = GeoSeries([Point(x, x) for x in range(-3, 4)])
assert geom_equals(gs.cx[:0, :0], gs.loc[:3])
assert geom_equals(gs.cx[:, :0], gs.loc[:3])
assert geom_equals(gs.cx[:0, :], gs.loc[:3])
assert geom_equals(gs.cx[0:, 0:], gs.loc[3:])
assert geom_equals(gs.cx[0:, :], gs.loc[3:])
assert geom_equals(gs.cx[:, 0:], gs.loc[3:])
def test_geoseries_geointerface(self):
assert self.g1.__geo_interface__["type"] == "FeatureCollection"
assert len(self.g1.__geo_interface__["features"]) == self.g1.shape[0]
def test_proj4strings(self):
# As string
reprojected = self.g3.to_crs("+proj=utm +zone=30")
reprojected_back = reprojected.to_crs(epsg=4326)
assert geom_almost_equals(self.g3, reprojected_back)
# As dict
reprojected = self.g3.to_crs({"proj": "utm", "zone": "30"})
reprojected_back = reprojected.to_crs(epsg=4326)
assert geom_almost_equals(self.g3, reprojected_back)
# Set to equivalent string, convert, compare to original
copy = self.g3.copy()
copy.crs = "epsg:4326"
reprojected = copy.to_crs({"proj": "utm", "zone": "30"})
reprojected_back = reprojected.to_crs(epsg=4326)
assert geom_almost_equals(self.g3, reprojected_back)
# Conversions by different format
reprojected_string = self.g3.to_crs("+proj=utm +zone=30")
reprojected_dict = self.g3.to_crs({"proj": "utm", "zone": "30"})
assert geom_almost_equals(reprojected_string, reprojected_dict)
def test_from_wkb(self):
assert_geoseries_equal(self.g1, GeoSeries.from_wkb([self.t1.wkb, self.sq.wkb]))
def test_from_wkb_series(self):
s = pd.Series([self.t1.wkb, self.sq.wkb], index=[1, 2])
expected = self.g1.copy()
expected.index = pd.Index([1, 2])
assert_geoseries_equal(expected, GeoSeries.from_wkb(s))
def test_from_wkb_series_with_index(self):
index = [0]
s = pd.Series([self.t1.wkb, self.sq.wkb], index=[0, 2])
expected = self.g1.reindex(index)
assert_geoseries_equal(expected, GeoSeries.from_wkb(s, index=index))
def test_from_wkt(self):
assert_geoseries_equal(self.g1, GeoSeries.from_wkt([self.t1.wkt, self.sq.wkt]))
def test_from_wkt_series(self):
s = pd.Series([self.t1.wkt, self.sq.wkt], index=[1, 2])
expected = self.g1.copy()
expected.index = pd.Index([1, 2])
assert_geoseries_equal(expected, GeoSeries.from_wkt(s))
def test_from_wkt_series_with_index(self):
index = [0]
s = pd.Series([self.t1.wkt, self.sq.wkt], index=[0, 2])
expected = self.g1.reindex(index)
assert_geoseries_equal(expected, GeoSeries.from_wkt(s, index=index))
def test_to_wkb(self):
assert_series_equal(pd.Series([self.t1.wkb, self.sq.wkb]), self.g1.to_wkb())
assert_series_equal(
pd.Series([self.t1.wkb_hex, self.sq.wkb_hex]), self.g1.to_wkb(hex=True)
)
def test_to_wkt(self):
assert_series_equal(pd.Series([self.t1.wkt, self.sq.wkt]), self.g1.to_wkt())
@pytest.mark.skip_no_sindex
def test_clip(self):
left = read_file(datasets.get_path("naturalearth_cities"))
world = read_file(datasets.get_path("naturalearth_lowres"))
south_america = world[world["continent"] == "South America"]
expected = clip(left.geometry, south_america)
result = left.geometry.clip(south_america)
assert_geoseries_equal(result, expected)
def test_from_xy_points(self):
x = self.landmarks.x.values
y = self.landmarks.y.values
index = self.landmarks.index.tolist()
crs = self.landmarks.crs
assert_geoseries_equal(
self.landmarks, GeoSeries.from_xy(x, y, index=index, crs=crs)
)
assert_geoseries_equal(
self.landmarks,
GeoSeries.from_xy(self.landmarks.x, self.landmarks.y, crs=crs),
)
def test_from_xy_points_w_z(self):
index_values = [5, 6, 7]
x = pd.Series([0, -1, 2], index=index_values)
y = pd.Series([8, 3, 1], index=index_values)
z = pd.Series([5, -6, 7], index=index_values)
expected = GeoSeries(
[Point(0, 8, 5), Point(-1, 3, -6), Point(2, 1, 7)], index=index_values
)
assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
def test_from_xy_points_unequal_index(self):
x = self.landmarks.x
y = self.landmarks.y
y.index = -np.arange(len(y))
crs = self.landmarks.crs
assert_geoseries_equal(
self.landmarks, GeoSeries.from_xy(x, y, index=x.index, crs=crs)
)
unindexed_landmarks = self.landmarks.copy()
unindexed_landmarks.reset_index(inplace=True, drop=True)
assert_geoseries_equal(
unindexed_landmarks,
GeoSeries.from_xy(x, y, crs=crs),
)
def test_from_xy_points_indexless(self):
x = np.array([0.0, 3.0])
y = np.array([2.0, 5.0])
z = np.array([-1.0, 4.0])
expected = GeoSeries([Point(0, 2, -1), Point(3, 5, 4)])
assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_missing_values():
s = GeoSeries([Point(1, 1), None, np.nan, GeometryCollection(), Polygon()])
# construction -> missing values get normalized to None
assert s[1] is None
assert s[2] is None
assert s[3].is_empty
assert s[4].is_empty
# isna / is_empty
assert s.isna().tolist() == [False, True, True, False, False]
assert s.is_empty.tolist() == [False, False, False, True, True]
assert s.notna().tolist() == [True, False, False, True, True]
# fillna defaults to fill with empty geometry -> no missing values anymore
assert not s.fillna().isna().any()
# dropna drops the missing values
assert not s.dropna().isna().any()
assert len(s.dropna()) == 3
def test_isna_empty_geoseries():
# ensure that isna() result for empty GeoSeries has the correct bool dtype
s = GeoSeries([])
result = s.isna()
assert_series_equal(result, pd.Series([], dtype="bool"))
def test_geoseries_crs():
gs = GeoSeries()
gs.crs = "IGNF:ETRS89UTM28"
assert gs.crs.to_authority() == ("IGNF", "ETRS89UTM28")
# -----------------------------------------------------------------------------
# # Constructor tests
# -----------------------------------------------------------------------------
def check_geoseries(s):
assert isinstance(s, GeoSeries)
assert isinstance(s.geometry, GeoSeries)
assert isinstance(s.dtype, GeometryDtype)
assert isinstance(s.values, GeometryArray)
class TestConstructor:
def test_constructor(self):
s = GeoSeries([Point(x, x) for x in range(3)])
check_geoseries(s)
def test_single_geom_constructor(self):
p = Point(1, 2)
line = LineString([(2, 3), (4, 5), (5, 6)])
poly = Polygon(
[(0, 0), (1, 0), (1, 1), (0, 1)], [[(0.1, 0.1), (0.9, 0.1), (0.9, 0.9)]]
)
mp = MultiPoint([(1, 2), (3, 4), (5, 6)])
mline = MultiLineString([[(1, 2), (3, 4), (5, 6)], [(7, 8), (9, 10)]])
poly2 = Polygon(
[(0, 0), (0, -1), (-1, -1), (-1, 0)],
[[(-0.1, -0.1), (-0.1, -0.5), (-0.5, -0.5), (-0.5, -0.1)]],
)
mpoly = MultiPolygon([poly, poly2])
geoms = [p, line, poly, mp, mline, mpoly]
index = ["a", "b", "c", "d"]
for g in geoms:
gs = GeoSeries(g)
assert len(gs) == 1
# accessing elements no longer give identical objects
assert gs.iloc[0].equals(g)
gs = GeoSeries(g, index=index)
assert len(gs) == len(index)
for x in gs:
assert x.equals(g)
def test_non_geometry_raises(self):
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
GeoSeries([True, False, True])
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
GeoSeries(["a", "b", "c"])
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
GeoSeries([[1, 2], [3, 4]])
def test_empty(self):
s = GeoSeries([])
check_geoseries(s)
s = GeoSeries()
check_geoseries(s)
def test_data_is_none(self):
s = GeoSeries(index=range(3))
check_geoseries(s)
def test_empty_array(self):
# with empty data that have an explicit dtype, we use the fallback or
# not depending on the dtype
# dtypes that can never hold geometry-like data
for arr in [
np.array([], dtype="bool"),
np.array([], dtype="int64"),
np.array([], dtype="float32"),
# this gets converted to object dtype by pandas
# np.array([], dtype="str"),
]:
with pytest.raises(
TypeError, match="Non geometry data passed to GeoSeries"
):
GeoSeries(arr)
# dtypes that can potentially hold geometry-like data (object) or
# can come from empty data (float64)
for arr in [
np.array([], dtype="object"),
np.array([], dtype="float64"),
np.array([], dtype="str"),
]:
with warnings.catch_warnings(record=True) as record:
s = GeoSeries(arr)
assert not record
assert isinstance(s, GeoSeries)
def test_from_series(self):
shapes = [
Polygon([(random.random(), random.random()) for _ in range(3)])
for _ in range(10)
]
with ignore_shapely2_warnings():
# the warning here is not suppressed by GeoPandas, as this is a pure
# pandas construction call
s = pd.Series(shapes, index=list("abcdefghij"), name="foo")
g = GeoSeries(s)
check_geoseries(g)
assert [a.equals(b) for a, b in zip(s, g)]
assert s.name == g.name
assert s.index is g.index
# GH 1216
@pytest.mark.parametrize("name", [None, "geometry", "Points"])
@pytest.mark.parametrize("crs", [None, "epsg:4326"])
def test_reset_index(self, name, crs):
s = GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])],
name=name,
crs=crs,
)
s = s.explode(index_parts=True)
df = s.reset_index()
assert type(df) == GeoDataFrame
# name None -> 0, otherwise name preserved
assert df.geometry.name == (name if name is not None else 0)
assert df.crs == s.crs
@pytest.mark.parametrize("name", [None, "geometry", "Points"])
@pytest.mark.parametrize("crs", [None, "epsg:4326"])
def test_to_frame(self, name, crs):
s = GeoSeries([Point(0, 0), Point(1, 1)], name=name, crs=crs)
df = s.to_frame()
assert type(df) == GeoDataFrame
# name None -> 0, otherwise name preserved
expected_name = name if name is not None else 0
assert df.geometry.name == expected_name
assert df._geometry_column_name == expected_name
assert df.crs == s.crs
# if name is provided to to_frame, it should override
df2 = s.to_frame(name="geom")
assert type(df) == GeoDataFrame
assert df2.geometry.name == "geom"
assert df2.crs == s.crs
def test_explode_without_multiindex(self):
s = GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
)
s = s.explode(index_parts=False)
expected_index = pd.Index([0, 0, 1, 1, 1])
assert_index_equal(s.index, expected_index)
def test_explode_ignore_index(self):
s = GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
)
s = s.explode(ignore_index=True)
expected_index = pd.Index(range(len(s)))
assert_index_equal(s.index, expected_index)
# index_parts is ignored if ignore_index=True
s = s.explode(index_parts=True, ignore_index=True)
assert_index_equal(s.index, expected_index)

View File

@@ -0,0 +1,183 @@
import warnings
import pandas as pd
import pytest
from geopandas.testing import assert_geodataframe_equal
from pandas.testing import assert_index_equal
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
class TestMerging:
def setup_method(self):
self.gseries = GeoSeries([Point(i, i) for i in range(3)])
self.series = pd.Series([1, 2, 3])
self.gdf = GeoDataFrame({"geometry": self.gseries, "values": range(3)})
self.df = pd.DataFrame({"col1": [1, 2, 3], "col2": [0.1, 0.2, 0.3]})
def _check_metadata(self, gdf, geometry_column_name="geometry", crs=None):
assert gdf._geometry_column_name == geometry_column_name
assert gdf.crs == crs
def test_merge(self):
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
# check result is a GeoDataFrame
assert isinstance(res, GeoDataFrame)
# check geometry property gives GeoSeries
assert isinstance(res.geometry, GeoSeries)
# check metadata
self._check_metadata(res)
# test that crs and other geometry name are preserved
self.gdf.crs = "epsg:4326"
self.gdf = self.gdf.rename(columns={"geometry": "points"}).set_geometry(
"points"
)
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res, "points", self.gdf.crs)
def test_concat_axis0(self):
# frame
res = pd.concat([self.gdf, self.gdf])
assert res.shape == (6, 2)
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res)
exp = GeoDataFrame(pd.concat([pd.DataFrame(self.gdf), pd.DataFrame(self.gdf)]))
assert_geodataframe_equal(exp, res)
# series
res = pd.concat([self.gdf.geometry, self.gdf.geometry])
assert res.shape == (6,)
assert isinstance(res, GeoSeries)
assert isinstance(res.geometry, GeoSeries)
def test_concat_axis0_crs(self):
# CRS not set for both GeoDataFrame
res = pd.concat([self.gdf, self.gdf])
self._check_metadata(res)
# CRS set for both GeoDataFrame, same CRS
res1 = pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")])
self._check_metadata(res1, crs="epsg:4326")
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrame
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
res2 = pd.concat([self.gdf, self.gdf.set_crs("epsg:4326")])
self._check_metadata(res2, crs="epsg:4326")
# CRS set for both GeoDataFrame, different CRS
with pytest.raises(
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
):
pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")])
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
# same CRS
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
res3 = pd.concat(
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")]
)
self._check_metadata(res3, crs="epsg:4326")
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
# different CRS
with pytest.raises(
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
):
pd.concat(
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")]
)
def test_concat_axis0_unaligned_cols(self):
# https://github.com/geopandas/geopandas/issues/2679
gdf = self.gdf.set_crs("epsg:4326").assign(
geom=self.gdf.geometry.set_crs("epsg:4327")
)
both_geom_cols = gdf[["geom", "geometry"]]
single_geom_col = gdf[["geometry"]]
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([both_geom_cols, single_geom_col])
# Check order of mismatch doesn't matter
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([single_geom_col, both_geom_cols])
# Side effect of this fix, explicitly provided all none geoseries
# will not be warned for (ideally this would still warn)
explicit_all_none_case = gdf[["geometry"]].assign(
geom=GeoSeries([None for _ in range(len(gdf))])
)
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([both_geom_cols, explicit_all_none_case])
# Check concat with partially None col is not affected by the special casing
# for all None no CRS handling
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
partial_none_case = self.gdf[["geometry"]]
partial_none_case.iloc[0] = None
pd.concat([single_geom_col, partial_none_case])
def test_concat_axis1(self):
res = pd.concat([self.gdf, self.df], axis=1)
assert res.shape == (3, 4)
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res)
def test_concat_axis1_multiple_geodataframes(self):
# https://github.com/geopandas/geopandas/issues/1230
# Expect that concat should fail gracefully if duplicate column names belonging
# to geometry columns are introduced.
expected_err = (
"GeoDataFrame does not support multiple columns using the geometry"
" column name 'geometry'"
)
with pytest.raises(ValueError, match=expected_err):
pd.concat([self.gdf, self.gdf], axis=1)
# Check case is handled if custom geometry column name is used
df2 = self.gdf.rename_geometry("geom")
expected_err2 = (
"Concat operation has resulted in multiple columns using the geometry "
"column name 'geom'."
)
with pytest.raises(ValueError, match=expected_err2):
pd.concat([df2, df2], axis=1)
# Check that two geometry columns is fine, if they have different names
res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1)
# check metadata comes from first df
self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326")
@pytest.mark.filterwarnings("ignore:Accessing CRS")
def test_concat_axis1_geoseries(self):
gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326")
result = pd.concat([gseries2, self.gseries], axis=1)
# Note this is not consistent with concat([gdf, gdf], axis=1) where the
# left metadata is set on the result. This is deliberate for now.
assert type(result) is GeoDataFrame
assert result._geometry_column_name is None
assert_index_equal(pd.Index([0, 1]), result.columns)
gseries2.name = "foo"
result2 = pd.concat([gseries2, self.gseries], axis=1)
assert type(result2) is GeoDataFrame
assert result._geometry_column_name is None
assert_index_equal(pd.Index(["foo", 0]), result2.columns)

View File

@@ -0,0 +1,372 @@
import pandas as pd
import pyproj
import pytest
from shapely.geometry import Point
import numpy as np
from geopandas import GeoDataFrame, GeoSeries
crs_osgb = pyproj.CRS(27700)
crs_wgs = pyproj.CRS(4326)
N = 10
@pytest.fixture(params=["geometry", "point"])
def df(request):
geo_name = request.param
df = GeoDataFrame(
[
{
"value1": x + y,
"value2": x * y,
geo_name: Point(x, y), # rename this col in tests
}
for x, y in zip(range(N), range(N))
],
crs=crs_wgs,
geometry=geo_name,
)
# want geometry2 to be a GeoSeries not Series, test behaviour of non geom col
df["geometry2"] = df[geo_name].set_crs(crs_osgb, allow_override=True)
return df
@pytest.fixture
def df2():
"""For constructor_sliced tests"""
return GeoDataFrame(
{
"geometry": GeoSeries([Point(x, x) for x in range(3)]),
"geometry2": GeoSeries([Point(x, x) for x in range(3)]),
"geometry3": GeoSeries([Point(x, x) for x in range(3)]),
"value": [1, 2, 1],
"value_nan": np.nan,
}
)
def _check_metadata_gdf(gdf, geo_name="geometry", crs=crs_wgs):
assert gdf._geometry_column_name == geo_name
assert gdf.geometry.name == geo_name
assert gdf.crs == crs
def _check_metadata_gs(gs, name="geometry", crs=crs_wgs):
assert gs.name == name
assert gs.crs == crs
def assert_object(result, expected_type, geo_name="geometry", crs=crs_wgs):
"""
Helper method to make tests easier to read. Checks result is of the expected
type. If result is a GeoDataFrame or GeoSeries, checks geo_name
and crs match. If geo_name is None, then we expect a GeoDataFrame
where the geometry column is invalid/ isn't set. This is never desirable,
but is a reality of this first stage of implementation.
"""
assert type(result) is expected_type
if expected_type == GeoDataFrame:
assert geo_name is not None
_check_metadata_gdf(result, geo_name=geo_name, crs=crs)
elif expected_type == GeoSeries:
_check_metadata_gs(result, name=geo_name, crs=crs)
def assert_obj_no_active_geo_col(result, expected_type, geo_colname=None):
"""
Helper method to make tests easier to read. Checks result is of the expected
type. Asserts that accessing result.geometry.name raises, corresponding to
_geometry_column_name being in an invalid state
(either None, or a column no longer present)
This amounts to testing the assertion raised (geometry column is unset, vs
old geometry column is missing)
We assert that _geometry_column_name = int_geo_colname
"""
if expected_type == GeoDataFrame:
if geo_colname is None:
assert result._geometry_column_name is None
else:
assert geo_colname == result._geometry_column_name
if result._geometry_column_name is None:
msg = (
"You are calling a geospatial method on the GeoDataFrame, "
"but the active"
)
else:
msg = (
"You are calling a geospatial method on the GeoDataFrame, but "
r"the active geometry column \("
rf"'{result._geometry_column_name}'\) is not present"
)
with pytest.raises(AttributeError, match=msg):
result.geometry.name # be explicit that geometry is invalid here
else:
raise NotImplementedError()
def test_getitem(df):
geo_name = df.geometry.name
assert_object(df[["value1", "value2"]], pd.DataFrame)
assert_object(df[[geo_name, "geometry2"]], GeoDataFrame, geo_name)
assert_object(df[[geo_name]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df[["geometry2", "value1"]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df[["geometry2"]], GeoDataFrame, geo_name)
assert_object(df[["value1"]], pd.DataFrame)
# Series
assert_object(df[geo_name], GeoSeries, geo_name)
assert_object(df["geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df["value1"], pd.Series)
def test_loc(df):
geo_name = df.geometry.name
assert_object(df.loc[:, ["value1", "value2"]], pd.DataFrame)
assert_object(df.loc[:, [geo_name, "geometry2"]], GeoDataFrame, geo_name)
assert_object(df.loc[:, [geo_name]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df.loc[:, ["geometry2", "value1"]], GeoDataFrame, geo_name
)
assert_obj_no_active_geo_col(df.loc[:, ["geometry2"]], GeoDataFrame, geo_name)
assert_object(df.loc[:, ["value1"]], pd.DataFrame)
# Series
assert_object(df.loc[:, geo_name], GeoSeries, geo_name)
assert_object(df.loc[:, "geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df.loc[:, "value1"], pd.Series)
def test_iloc(df):
geo_name = df.geometry.name
assert_object(df.iloc[:, 0:2], pd.DataFrame)
assert_object(df.iloc[:, 2:4], GeoDataFrame, geo_name)
assert_object(df.iloc[:, [2]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df.iloc[:, [3, 0]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df.iloc[:, [3]], GeoDataFrame, geo_name)
assert_object(df.iloc[:, [0]], pd.DataFrame)
# Series
assert_object(df.iloc[:, 2], GeoSeries, geo_name)
assert_object(df.iloc[:, 3], GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df.iloc[:, 0], pd.Series)
def test_squeeze(df):
geo_name = df.geometry.name
assert_object(df[[geo_name]].squeeze(), GeoSeries, geo_name)
assert_object(df[["geometry2"]].squeeze(), GeoSeries, "geometry2", crs=crs_osgb)
def test_to_frame(df):
geo_name = df.geometry.name
res1 = df[geo_name].to_frame()
assert_object(res1, GeoDataFrame, geo_name, crs=df[geo_name].crs)
res2 = df["geometry2"].to_frame()
assert_object(res2, GeoDataFrame, "geometry2", crs=crs_osgb)
res3 = df["value1"].to_frame()
assert_object(res3, pd.DataFrame)
def test_reindex(df):
geo_name = df.geometry.name
assert_object(df.reindex(columns=["value1", "value2"]), pd.DataFrame)
assert_object(df.reindex(columns=[geo_name, "geometry2"]), GeoDataFrame, geo_name)
assert_object(df.reindex(columns=[geo_name]), GeoDataFrame, geo_name)
assert_object(df.reindex(columns=["new_col", geo_name]), GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df.reindex(columns=["geometry2", "value1"]), GeoDataFrame, geo_name
)
assert_obj_no_active_geo_col(
df.reindex(columns=["geometry2"]), GeoDataFrame, geo_name
)
assert_object(df.reindex(columns=["value1"]), pd.DataFrame)
# reindexing the rows always preserves the GeoDataFrame
assert_object(df.reindex(index=[0, 1, 20]), GeoDataFrame, geo_name)
# reindexing both rows and columns
assert_object(
df.reindex(index=[0, 1, 20], columns=[geo_name]), GeoDataFrame, geo_name
)
assert_object(df.reindex(index=[0, 1, 20], columns=["value1"]), pd.DataFrame)
def test_drop(df):
geo_name = df.geometry.name
assert_object(df.drop(columns=[geo_name, "geometry2"]), pd.DataFrame)
assert_object(df.drop(columns=["value1", "value2"]), GeoDataFrame, geo_name)
cols = ["value1", "value2", "geometry2"]
assert_object(df.drop(columns=cols), GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df.drop(columns=[geo_name, "value2"]), GeoDataFrame, geo_name
)
assert_obj_no_active_geo_col(
df.drop(columns=["value1", "value2", geo_name]), GeoDataFrame, geo_name
)
assert_object(df.drop(columns=["geometry2", "value2", geo_name]), pd.DataFrame)
def test_apply(df):
geo_name = df.geometry.name
def identity(x):
return x
# axis = 0
assert_object(df[["value1", "value2"]].apply(identity), pd.DataFrame)
assert_object(df[[geo_name, "geometry2"]].apply(identity), GeoDataFrame, geo_name)
assert_object(df[[geo_name]].apply(identity), GeoDataFrame, geo_name)
res = df[["geometry2", "value1"]].apply(identity)
assert_obj_no_active_geo_col(res, GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df[["geometry2"]].apply(identity), GeoDataFrame, geo_name
)
assert_object(df[["value1"]].apply(identity), pd.DataFrame)
# axis = 0, Series
assert_object(df[geo_name].apply(identity), GeoSeries, geo_name)
assert_object(df["geometry2"].apply(identity), GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df["value1"].apply(identity), pd.Series)
# axis = 0, Series, no longer geometry
assert_object(df[geo_name].apply(lambda x: str(x)), pd.Series)
assert_object(df["geometry2"].apply(lambda x: str(x)), pd.Series)
# axis = 1
assert_object(df[["value1", "value2"]].apply(identity, axis=1), pd.DataFrame)
assert_object(
df[[geo_name, "geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
)
assert_object(df[[geo_name]].apply(identity, axis=1), GeoDataFrame, geo_name)
# TODO below should be a GeoDataFrame to be consistent with new getitem logic
# leave as follow up as quite complicated
# FrameColumnApply.series_generator returns object dtypes Series, so will have
# patch result of apply
assert_object(df[["geometry2", "value1"]].apply(identity, axis=1), pd.DataFrame)
assert_object(df[["value1"]].apply(identity, axis=1), pd.DataFrame)
def test_apply_axis1_secondary_geo_cols(df):
geo_name = df.geometry.name
def identity(x):
return x
assert_obj_no_active_geo_col(
df[["geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
)
def test_expanddim_in_apply():
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
s = GeoSeries.from_xy([0, 1], [0, 1])
result = s.apply(lambda x: pd.Series([x.x, x.y]))
assert_object(result, pd.DataFrame)
def test_expandim_in_groupby_aggregate_multiple_funcs():
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
# There are two calls to _constructor_expanddim here
# SeriesGroupBy._aggregate_multiple_funcs() and
# SeriesGroupBy._wrap_series_output() len(output) > 1
s = GeoSeries.from_xy([0, 1, 2], [0, 1, 3])
def union(s):
return s.unary_union
def total_area(s):
return s.area.sum()
grouped = s.groupby([0, 1, 0])
agg = grouped.agg([total_area, union])
assert_obj_no_active_geo_col(agg, GeoDataFrame, geo_colname=None)
result = grouped.agg([union, total_area])
assert_obj_no_active_geo_col(result, GeoDataFrame, geo_colname=None)
assert_object(grouped.agg([total_area, total_area]), pd.DataFrame)
assert_object(grouped.agg([total_area]), pd.DataFrame)
def test_expanddim_in_unstack():
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
s = GeoSeries.from_xy(
[0, 1, 2],
[0, 1, 3],
index=pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "a")]),
)
unstack = s.unstack()
expected_geo_name = None
assert_obj_no_active_geo_col(unstack, GeoDataFrame, geo_colname=expected_geo_name)
# https://github.com/geopandas/geopandas/issues/2486
s.name = "geometry"
unstack = s.unstack()
assert_obj_no_active_geo_col(unstack, GeoDataFrame, expected_geo_name)
# indexing / constructor_sliced tests
test_case_column_sets = [
["geometry"],
["geometry2"],
["geometry", "geometry2"],
# non active geo col case
["geometry", "value"],
["geometry", "value_nan"],
["geometry2", "value"],
["geometry2", "value_nan"],
]
@pytest.mark.parametrize(
"column_set",
test_case_column_sets,
ids=[", ".join(i) for i in test_case_column_sets],
)
def test_constructor_sliced_row_slices(df2, column_set):
# https://github.com/geopandas/geopandas/issues/2282
df_subset = df2[column_set]
assert isinstance(df_subset, GeoDataFrame)
res = df_subset.loc[0]
# row slices shouldn't be GeoSeries, even if they have a geometry col
assert type(res) == pd.Series
if "geometry" in column_set:
assert not isinstance(res.geometry, pd.Series)
assert res.geometry == Point(0, 0)
def test_constructor_sliced_column_slices(df2):
# Note loc doesn't use _constructor_sliced so it's not tested here
geo_idx = df2.columns.get_loc("geometry")
sub = df2.head(1)
# column slices should be GeoSeries if of geometry type
assert type(sub.iloc[:, geo_idx]) == GeoSeries
assert type(sub.iloc[[0], geo_idx]) == GeoSeries
sub = df2.head(2)
assert type(sub.iloc[:, geo_idx]) == GeoSeries
assert type(sub.iloc[[0, 1], geo_idx]) == GeoSeries
# check iloc row slices are pd.Series instead
assert type(df2.iloc[0, :]) == pd.Series
def test_constructor_sliced_in_pandas_methods(df2):
# constructor sliced is used in many places, checking a sample of non
# geometry cases are sensible
assert type(df2.count()) == pd.Series
# drop the secondary geometry columns as not hashable
hashable_test_df = df2.drop(columns=["geometry2", "geometry3"])
assert type(hashable_test_df.duplicated()) == pd.Series
assert type(df2.quantile(numeric_only=True)) == pd.Series
assert type(df2.memory_usage()) == pd.Series

View File

@@ -0,0 +1,881 @@
import os
import numpy as np
import pandas as pd
from shapely.geometry import Point, Polygon, LineString, GeometryCollection, box
import geopandas
from geopandas import GeoDataFrame, GeoSeries, overlay, read_file
from geopandas._compat import PANDAS_GE_20
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
import pytest
try:
from fiona.errors import DriverError
except ImportError:
class DriverError(Exception):
pass
DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data", "overlay")
pytestmark = pytest.mark.skip_no_sindex
@pytest.fixture
def dfs(request):
s1 = GeoSeries(
[
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s2 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
df2 = GeoDataFrame({"col2": [1, 2], "geometry": s2})
return df1, df2
@pytest.fixture(params=["default-index", "int-index", "string-index"])
def dfs_index(request, dfs):
df1, df2 = dfs
if request.param == "int-index":
df1.index = [1, 2]
df2.index = [0, 2]
if request.param == "string-index":
df1.index = ["row1", "row2"]
return df1, df2
@pytest.fixture(
params=["union", "intersection", "difference", "symmetric_difference", "identity"]
)
def how(request):
return request.param
@pytest.fixture(params=[True, False])
def keep_geom_type(request):
return request.param
def test_overlay(dfs_index, how):
"""
Basic overlay test with small dummy example dataframes (from docs).
Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools ->
Intersection / Union / ...), saved to GeoJSON
"""
df1, df2 = dfs_index
result = overlay(df1, df2, how=how)
# construction of result
def _read(name):
expected = read_file(
os.path.join(DATA, "polys", "df1_df2-{0}.geojson".format(name))
)
expected.crs = None
for col in expected.columns[expected.dtypes == "int32"]:
expected[col] = expected[col].astype("int64")
return expected
if how == "identity":
expected_intersection = _read("intersection")
expected_difference = _read("difference")
expected = pd.concat(
[expected_intersection, expected_difference], ignore_index=True, sort=False
)
expected["col1"] = expected["col1"].astype(float)
else:
expected = _read(how)
# TODO needed adaptations to result
if how == "union":
result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
elif how == "difference":
result = result.reset_index(drop=True)
assert_geodataframe_equal(result, expected, check_column_type=False)
# for difference also reversed
if how == "difference":
result = overlay(df2, df1, how=how)
result = result.reset_index(drop=True)
expected = _read("difference-inverse")
assert_geodataframe_equal(result, expected, check_column_type=False)
@pytest.mark.filterwarnings("ignore:GeoSeries crs mismatch:UserWarning")
def test_overlay_nybb(how):
polydf = read_file(geopandas.datasets.get_path("nybb"))
# The circles have been constructed and saved at the time the expected
# results were created (exact output of buffer algorithm can slightly
# change over time -> use saved ones)
# # construct circles dataframe
# N = 10
# b = [int(x) for x in polydf.total_bounds]
# polydf2 = GeoDataFrame(
# [
# {"geometry": Point(x, y).buffer(10000), "value1": x + y, "value2": x - y}
# for x, y in zip(
# range(b[0], b[2], int((b[2] - b[0]) / N)),
# range(b[1], b[3], int((b[3] - b[1]) / N)),
# )
# ],
# crs=polydf.crs,
# )
polydf2 = read_file(os.path.join(DATA, "nybb_qgis", "polydf2.shp"))
result = overlay(polydf, polydf2, how=how)
cols = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area", "value1", "value2"]
if how == "difference":
cols = cols[:-2]
# expected result
if how == "identity":
# read union one, further down below we take the appropriate subset
expected = read_file(os.path.join(DATA, "nybb_qgis", "qgis-union.shp"))
else:
expected = read_file(
os.path.join(DATA, "nybb_qgis", "qgis-{0}.shp".format(how))
)
# The result of QGIS for 'union' contains incorrect geometries:
# 24 is a full original circle overlapping with unioned geometries, and
# 27 is a completely duplicated row)
if how == "union":
expected = expected.drop([24, 27])
expected.reset_index(inplace=True, drop=True)
# Eliminate observations without geometries (issue from QGIS)
expected = expected[expected.is_valid]
expected.reset_index(inplace=True, drop=True)
if how == "identity":
expected = expected[expected.BoroCode.notnull()].copy()
# Order GeoDataFrames
expected = expected.sort_values(cols).reset_index(drop=True)
# TODO needed adaptations to result
result = result.sort_values(cols).reset_index(drop=True)
if how in ("union", "identity"):
# concat < 0.23 sorts, so changes the order of the columns
# but at least we ensure 'geometry' is the last column
assert result.columns[-1] == "geometry"
assert len(result.columns) == len(expected.columns)
result = result.reindex(columns=expected.columns)
# the ordering of the spatial index results causes slight deviations
# in the resultant geometries for multipolygons
# for more details on the discussion, see:
# https://github.com/geopandas/geopandas/pull/1338
# https://github.com/geopandas/geopandas/issues/1337
# Temporary workaround below:
# simplify multipolygon geometry comparison
# since the order of the constituent polygons depends on
# the ordering of spatial indexing results, we cannot
# compare symmetric_difference results directly when the
# resultant geometry is a multipolygon
# first, check that all bounds and areas are approx equal
# this is a very rough check for multipolygon equality
kwargs = {}
pd.testing.assert_series_equal(
result.geometry.area, expected.geometry.area, **kwargs
)
pd.testing.assert_frame_equal(
result.geometry.bounds, expected.geometry.bounds, **kwargs
)
# There are two cases where the multipolygon have a different number
# of sub-geometries -> not solved by normalize (and thus drop for now)
if how == "symmetric_difference":
expected.loc[9, "geometry"] = None
result.loc[9, "geometry"] = None
if how == "union":
expected.loc[24, "geometry"] = None
result.loc[24, "geometry"] = None
assert_geodataframe_equal(
result,
expected,
normalize=True,
check_crs=False,
check_column_type=False,
check_less_precise=True,
)
def test_overlay_overlap(how):
"""
Overlay test with overlapping geometries in both dataframes.
Test files are created with::
import geopandas
from geopandas import GeoSeries, GeoDataFrame
from shapely.geometry import Point, Polygon, LineString
s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2)
s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2)
df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]})
df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]})
ax = df1.plot(alpha=0.5)
df2.plot(alpha=0.5, ax=ax, color='C1')
df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson',
driver='GeoJSON')
df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson',
driver='GeoJSON')
and then overlay results are obtained from using QGIS 2.16
(Vector -> Geoprocessing Tools -> Intersection / Union / ...),
saved to GeoJSON.
"""
df1 = read_file(os.path.join(DATA, "overlap", "df1_overlap.geojson"))
df2 = read_file(os.path.join(DATA, "overlap", "df2_overlap.geojson"))
result = overlay(df1, df2, how=how)
if how == "identity":
raise pytest.skip()
expected = read_file(
os.path.join(DATA, "overlap", "df1_df2_overlap-{0}.geojson".format(how))
)
if how == "union":
# the QGIS result has the last row duplicated, so removing this
expected = expected.iloc[:-1]
# TODO needed adaptations to result
result = result.reset_index(drop=True)
if how == "union":
result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
assert_geodataframe_equal(
result,
expected,
normalize=True,
check_column_type=False,
check_less_precise=True,
)
@pytest.mark.parametrize("other_geometry", [False, True])
def test_geometry_not_named_geometry(dfs, how, other_geometry):
# Issue #306
# Add points and flip names
df1, df2 = dfs
df3 = df1.copy()
df3 = df3.rename(columns={"geometry": "polygons"})
df3 = df3.set_geometry("polygons")
if other_geometry:
df3["geometry"] = df1.centroid.geometry
assert df3.geometry.name == "polygons"
res1 = overlay(df1, df2, how=how)
res2 = overlay(df3, df2, how=how)
assert df3.geometry.name == "polygons"
if how == "difference":
# in case of 'difference', column names of left frame are preserved
assert res2.geometry.name == "polygons"
if other_geometry:
assert "geometry" in res2.columns
assert_geoseries_equal(
res2["geometry"], df3["geometry"], check_series_type=False
)
res2 = res2.drop(["geometry"], axis=1)
res2 = res2.rename(columns={"polygons": "geometry"})
res2 = res2.set_geometry("geometry")
# TODO if existing column is overwritten -> geometry not last column
if other_geometry and how == "intersection":
res2 = res2.reindex(columns=res1.columns)
assert_geodataframe_equal(res1, res2)
df4 = df2.copy()
df4 = df4.rename(columns={"geometry": "geom"})
df4 = df4.set_geometry("geom")
if other_geometry:
df4["geometry"] = df2.centroid.geometry
assert df4.geometry.name == "geom"
res1 = overlay(df1, df2, how=how)
res2 = overlay(df1, df4, how=how)
assert_geodataframe_equal(res1, res2)
def test_bad_how(dfs):
df1, df2 = dfs
with pytest.raises(ValueError):
overlay(df1, df2, how="spandex")
def test_duplicate_column_name(dfs, how):
if how == "difference":
pytest.skip("Difference uses columns from one df only.")
df1, df2 = dfs
df2r = df2.rename(columns={"col2": "col1"})
res = overlay(df1, df2r, how=how)
assert ("col1_1" in res.columns) and ("col1_2" in res.columns)
def test_geoseries_warning(dfs):
df1, df2 = dfs
# Issue #305
with pytest.raises(NotImplementedError):
overlay(df1, df2.geometry, how="union")
def test_preserve_crs(dfs, how):
df1, df2 = dfs
result = overlay(df1, df2, how=how)
assert result.crs is None
crs = "epsg:4326"
df1.crs = crs
df2.crs = crs
result = overlay(df1, df2, how=how)
assert result.crs == crs
def test_crs_mismatch(dfs, how):
df1, df2 = dfs
df1.crs = 4326
df2.crs = 3857
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
overlay(df1, df2, how=how)
def test_empty_intersection(dfs):
df1, df2 = dfs
polys3 = GeoSeries(
[
Polygon([(-1, -1), (-3, -1), (-3, -3), (-1, -3)]),
Polygon([(-3, -3), (-5, -3), (-5, -5), (-3, -5)]),
]
)
df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2]})
expected = GeoDataFrame([], columns=["col1", "col3", "geometry"])
result = overlay(df1, df3)
assert_geodataframe_equal(result, expected, check_dtype=False)
def test_correct_index(dfs):
# GH883 - case where the index was not properly reset
df1, df2 = dfs
polys3 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2, 3]})
i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)])
i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)])
expected = GeoDataFrame(
[[1, 1, i1], [3, 2, i2]], columns=["col3", "col2", "geometry"]
)
result = overlay(df3, df2, keep_geom_type=True)
assert_geodataframe_equal(result, expected)
def test_warn_on_keep_geom_type(dfs):
df1, df2 = dfs
polys3 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df3 = GeoDataFrame({"geometry": polys3})
with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
overlay(df2, df3, keep_geom_type=None)
@pytest.mark.parametrize(
"geom_types", ["polys", "poly_line", "poly_point", "line_poly", "point_poly"]
)
def test_overlay_strict(how, keep_geom_type, geom_types):
"""
Test of mixed geometry types on input and output. Expected results initially
generated using following snippet.
polys1 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
df1 = gpd.GeoDataFrame({'col1': [1, 2], 'geometry': polys1})
polys2 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
df2 = gpd.GeoDataFrame({'geometry': polys2, 'col2': [1, 2, 3]})
lines1 = gpd.GeoSeries([LineString([(2, 0), (2, 4), (6, 4)]),
LineString([(0, 3), (6, 3)])])
df3 = gpd.GeoDataFrame({'col3': [1, 2], 'geometry': lines1})
points1 = gpd.GeoSeries([Point((2, 2)),
Point((3, 3))])
df4 = gpd.GeoDataFrame({'col4': [1, 2], 'geometry': points1})
params=["union", "intersection", "difference", "symmetric_difference",
"identity"]
stricts = [True, False]
for p in params:
for s in stricts:
exp = gpd.overlay(df1, df2, how=p, keep_geom_type=s)
if not exp.empty:
exp.to_file('polys_{p}_{s}.geojson'.format(p=p, s=s),
driver='GeoJSON')
for p in params:
for s in stricts:
exp = gpd.overlay(df1, df3, how=p, keep_geom_type=s)
if not exp.empty:
exp.to_file('poly_line_{p}_{s}.geojson'.format(p=p, s=s),
driver='GeoJSON')
for p in params:
for s in stricts:
exp = gpd.overlay(df1, df4, how=p, keep_geom_type=s)
if not exp.empty:
exp.to_file('poly_point_{p}_{s}.geojson'.format(p=p, s=s),
driver='GeoJSON')
"""
polys1 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
polys2 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df2 = GeoDataFrame({"geometry": polys2, "col2": [1, 2, 3]})
lines1 = GeoSeries(
[LineString([(2, 0), (2, 4), (6, 4)]), LineString([(0, 3), (6, 3)])]
)
df3 = GeoDataFrame({"col3": [1, 2], "geometry": lines1})
points1 = GeoSeries([Point((2, 2)), Point((3, 3))])
df4 = GeoDataFrame({"col4": [1, 2], "geometry": points1})
if geom_types == "polys":
result = overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "poly_line":
result = overlay(df1, df3, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "poly_point":
result = overlay(df1, df4, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "line_poly":
result = overlay(df3, df1, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "point_poly":
result = overlay(df4, df1, how=how, keep_geom_type=keep_geom_type)
try:
expected = read_file(
os.path.join(
DATA,
"strict",
"{t}_{h}_{s}.geojson".format(t=geom_types, h=how, s=keep_geom_type),
)
)
# the order depends on the spatial index used
# so we sort the resultant dataframes to get a consistent order
# independently of the spatial index implementation
assert all(expected.columns == result.columns), "Column name mismatch"
cols = list(set(result.columns) - {"geometry"})
expected = expected.sort_values(cols, axis=0).reset_index(drop=True)
result = result.sort_values(cols, axis=0).reset_index(drop=True)
assert_geodataframe_equal(
result,
expected,
normalize=True,
check_column_type=False,
check_less_precise=True,
check_crs=False,
check_dtype=False,
)
except DriverError: # fiona >= 1.8
assert result.empty
except OSError: # fiona < 1.8
assert result.empty
except RuntimeError: # pyogrio.DataSourceError
assert result.empty
def test_mixed_geom_error():
polys1 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
mixed = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
dfmixed = GeoDataFrame({"col1": [1, 2], "geometry": mixed})
with pytest.raises(NotImplementedError):
overlay(df1, dfmixed, keep_geom_type=True)
def test_keep_geom_type_error():
gcol = GeoSeries(
GeometryCollection(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
)
dfcol = GeoDataFrame({"col1": [2], "geometry": gcol})
polys1 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
with pytest.raises(TypeError):
overlay(dfcol, df1, keep_geom_type=True)
def test_keep_geom_type_geometry_collection():
# GH 1581
df1 = read_file(os.path.join(DATA, "geom_type", "df1.geojson"))
df2 = read_file(os.path.join(DATA, "geom_type", "df2.geojson"))
with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
intersection = overlay(df1, df2, keep_geom_type=None)
assert len(intersection) == 1
assert (intersection.geom_type == "Polygon").all()
intersection = overlay(df1, df2, keep_geom_type=True)
assert len(intersection) == 1
assert (intersection.geom_type == "Polygon").all()
intersection = overlay(df1, df2, keep_geom_type=False)
assert len(intersection) == 1
assert (intersection.geom_type == "GeometryCollection").all()
def test_keep_geom_type_geometry_collection2():
polys1 = [
box(0, 0, 1, 1),
box(1, 1, 3, 3).union(box(1, 3, 5, 5)),
]
polys2 = [
box(0, 0, 1, 1),
box(3, 1, 4, 2).union(box(4, 1, 5, 4)),
]
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
result1 = overlay(df1, df2, keep_geom_type=True)
expected1 = GeoDataFrame(
{
"left": [0, 1],
"right": [0, 1],
"geometry": [box(0, 0, 1, 1), box(4, 3, 5, 4)],
}
)
assert_geodataframe_equal(result1, expected1)
result1 = overlay(df1, df2, keep_geom_type=False)
expected1 = GeoDataFrame(
{
"left": [0, 1, 1],
"right": [0, 0, 1],
"geometry": [
box(0, 0, 1, 1),
Point(1, 1),
GeometryCollection([box(4, 3, 5, 4), LineString([(3, 1), (3, 2)])]),
],
}
)
assert_geodataframe_equal(result1, expected1)
def test_keep_geom_type_geomcoll_different_types():
polys1 = [box(0, 1, 1, 3), box(10, 10, 12, 12)]
polys2 = [
Polygon([(1, 0), (3, 0), (3, 3), (1, 3), (1, 2), (2, 2), (2, 1), (1, 1)]),
box(11, 11, 13, 13),
]
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
result1 = overlay(df1, df2, keep_geom_type=True)
expected1 = GeoDataFrame(
{
"left": [1],
"right": [1],
"geometry": [box(11, 11, 12, 12)],
}
)
assert_geodataframe_equal(result1, expected1)
result2 = overlay(df1, df2, keep_geom_type=False)
expected2 = GeoDataFrame(
{
"left": [0, 1],
"right": [0, 1],
"geometry": [
GeometryCollection([LineString([(1, 2), (1, 3)]), Point(1, 1)]),
box(11, 11, 12, 12),
],
}
)
assert_geodataframe_equal(result2, expected2)
def test_keep_geom_type_geometry_collection_difference():
# GH 2163
polys1 = [
box(0, 0, 1, 1),
box(1, 1, 2, 2),
]
# the tiny sliver in the second geometry may be converted to a
# linestring during the overlay process due to floating point errors
# on some platforms
polys2 = [
box(0, 0, 1, 1),
box(1, 1, 2, 3).union(box(2, 2, 3, 2.00000000000000001)),
]
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
result1 = overlay(df2, df1, keep_geom_type=True, how="difference")
expected1 = GeoDataFrame(
{
"right": [1],
"geometry": [box(1, 2, 2, 3)],
},
)
assert_geodataframe_equal(result1, expected1)
@pytest.mark.parametrize("make_valid", [True, False])
def test_overlap_make_valid(make_valid):
bowtie = Polygon([(1, 1), (9, 9), (9, 1), (1, 9), (1, 1)])
assert not bowtie.is_valid
fixed_bowtie = bowtie.buffer(0)
assert fixed_bowtie.is_valid
df1 = GeoDataFrame({"col1": ["region"], "geometry": GeoSeries([box(0, 0, 10, 10)])})
df_bowtie = GeoDataFrame(
{"col1": ["invalid", "valid"], "geometry": GeoSeries([bowtie, fixed_bowtie])}
)
if make_valid:
df_overlay_bowtie = overlay(df1, df_bowtie, make_valid=make_valid)
assert df_overlay_bowtie.at[0, "geometry"].equals(fixed_bowtie)
assert df_overlay_bowtie.at[1, "geometry"].equals(fixed_bowtie)
else:
with pytest.raises(ValueError, match="1 invalid input geometries"):
overlay(df1, df_bowtie, make_valid=make_valid)
def test_empty_overlay_return_non_duplicated_columns():
nybb = geopandas.read_file(geopandas.datasets.get_path("nybb"))
nybb2 = nybb.copy()
nybb2.geometry = nybb2.translate(20000000)
result = geopandas.overlay(nybb, nybb2)
expected = GeoDataFrame(
columns=[
"BoroCode_1",
"BoroName_1",
"Shape_Leng_1",
"Shape_Area_1",
"BoroCode_2",
"BoroName_2",
"Shape_Leng_2",
"Shape_Area_2",
"geometry",
],
crs=nybb.crs,
)
assert_geodataframe_equal(result, expected, check_dtype=False)
def test_non_overlapping(how):
p1 = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
p2 = Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])
df1 = GeoDataFrame({"col1": [1], "geometry": [p1]})
df2 = GeoDataFrame({"col2": [2], "geometry": [p2]})
result = overlay(df1, df2, how=how)
if how == "intersection":
if PANDAS_GE_20:
index = None
else:
index = pd.Index([], dtype="object")
expected = GeoDataFrame(
{
"col1": np.array([], dtype="int64"),
"col2": np.array([], dtype="int64"),
"geometry": [],
},
index=index,
)
elif how == "union":
expected = GeoDataFrame(
{
"col1": [1, np.nan],
"col2": [np.nan, 2],
"geometry": [p1, p2],
}
)
elif how == "identity":
expected = GeoDataFrame(
{
"col1": [1.0],
"col2": [np.nan],
"geometry": [p1],
}
)
elif how == "symmetric_difference":
expected = GeoDataFrame(
{
"col1": [1, np.nan],
"col2": [np.nan, 2],
"geometry": [p1, p2],
}
)
elif how == "difference":
expected = GeoDataFrame(
{
"col1": [1],
"geometry": [p1],
}
)
assert_geodataframe_equal(result, expected)
def test_no_intersection():
# overlapping bounds but non-overlapping geometries
gs = GeoSeries([Point(x, x).buffer(0.1) for x in range(3)])
gdf1 = GeoDataFrame({"foo": ["a", "b", "c"]}, geometry=gs)
gdf2 = GeoDataFrame({"bar": ["1", "3", "5"]}, geometry=gs.translate(1))
expected = GeoDataFrame(columns=["foo", "bar", "geometry"])
result = overlay(gdf1, gdf2, how="intersection")
assert_geodataframe_equal(result, expected, check_index_type=False)
class TestOverlayWikiExample:
def setup_method(self):
self.layer_a = GeoDataFrame(geometry=[box(0, 2, 6, 6)])
self.layer_b = GeoDataFrame(geometry=[box(4, 0, 10, 4)])
self.intersection = GeoDataFrame(geometry=[box(4, 2, 6, 4)])
self.union = GeoDataFrame(
geometry=[
box(4, 2, 6, 4),
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
]
)
self.a_difference_b = GeoDataFrame(
geometry=[Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)])]
)
self.b_difference_a = GeoDataFrame(
geometry=[
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)])
]
)
self.symmetric_difference = GeoDataFrame(
geometry=[
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
]
)
self.a_identity_b = GeoDataFrame(
geometry=[
box(4, 2, 6, 4),
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
]
)
self.b_identity_a = GeoDataFrame(
geometry=[
box(4, 2, 6, 4),
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
]
)
def test_intersection(self):
df_result = overlay(self.layer_a, self.layer_b, how="intersection")
assert df_result.geom_equals(self.intersection).bool()
def test_union(self):
df_result = overlay(self.layer_a, self.layer_b, how="union")
assert_geodataframe_equal(df_result, self.union)
def test_a_difference_b(self):
df_result = overlay(self.layer_a, self.layer_b, how="difference")
assert_geodataframe_equal(df_result, self.a_difference_b)
def test_b_difference_a(self):
df_result = overlay(self.layer_b, self.layer_a, how="difference")
assert_geodataframe_equal(df_result, self.b_difference_a)
def test_symmetric_difference(self):
df_result = overlay(self.layer_a, self.layer_b, how="symmetric_difference")
assert_geodataframe_equal(df_result, self.symmetric_difference)
def test_a_identity_b(self):
df_result = overlay(self.layer_a, self.layer_b, how="identity")
assert_geodataframe_equal(df_result, self.a_identity_b)
def test_b_identity_a(self):
df_result = overlay(self.layer_b, self.layer_a, how="identity")
assert_geodataframe_equal(df_result, self.b_identity_a)

View File

@@ -0,0 +1,860 @@
import os
from packaging.version import Version
import warnings
import numpy as np
from numpy.testing import assert_array_equal
import pandas as pd
import shapely
from shapely.geometry import Point, GeometryCollection, LineString, LinearRing
import geopandas
from geopandas import GeoDataFrame, GeoSeries
import geopandas._compat as compat
from geopandas.array import from_shapely
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from pandas.testing import assert_frame_equal, assert_series_equal
import pytest
@pytest.fixture
def s():
return GeoSeries([Point(x, y) for x, y in zip(range(3), range(3))])
@pytest.fixture
def df():
return GeoDataFrame(
{
"geometry": [Point(x, x) for x in range(3)],
"value1": np.arange(3, dtype="int64"),
"value2": np.array([1, 2, 1], dtype="int64"),
}
)
def test_repr(s, df):
assert "POINT" in repr(s)
assert "POINT" in repr(df)
assert "POINT" in df._repr_html_()
def test_repr_boxed_display_precision():
# geographic coordinates
p1 = Point(10.123456789, 50.123456789)
p2 = Point(4.123456789, 20.123456789)
s1 = GeoSeries([p1, p2, None])
assert "POINT (10.12346 50.12346)" in repr(s1)
# geographic coordinates 4326
s3 = GeoSeries([p1, p2], crs=4326)
assert "POINT (10.12346 50.12346)" in repr(s3)
# projected coordinates
p1 = Point(3000.123456789, 3000.123456789)
p2 = Point(4000.123456789, 4000.123456789)
s2 = GeoSeries([p1, p2, None])
assert "POINT (3000.123 3000.123)" in repr(s2)
# projected geographic coordinate
s4 = GeoSeries([p1, p2], crs=3857)
assert "POINT (3000.123 3000.123)" in repr(s4)
geopandas.options.display_precision = 1
assert "POINT (10.1 50.1)" in repr(s1)
geopandas.options.display_precision = 9
assert "POINT (10.123456789 50.123456789)" in repr(s1)
def test_repr_all_missing():
# https://github.com/geopandas/geopandas/issues/1195
s = GeoSeries([None, None, None])
assert "None" in repr(s)
df = GeoDataFrame({"a": [1, 2, 3], "geometry": s})
assert "None" in repr(df)
assert "geometry" in df._repr_html_()
def test_repr_empty():
# https://github.com/geopandas/geopandas/issues/1195
s = GeoSeries([])
assert repr(s) == "GeoSeries([], dtype: geometry)"
df = GeoDataFrame({"a": [], "geometry": s})
assert "Empty GeoDataFrame" in repr(df)
# https://github.com/geopandas/geopandas/issues/1184
assert "geometry" in df._repr_html_()
def test_repr_linearring():
# https://github.com/geopandas/geopandas/pull/2689
# specifically, checking internal shapely/pygeos/wkt/wkb conversions
# preserve LinearRing
s = GeoSeries([LinearRing([(0, 0), (1, 1), (1, -1)])])
assert "LINEARRING" in str(s.iloc[0]) # shapely scalar repr
assert "LINEARRING" in str(s) # GeoSeries repr
# check something coercible to linearring is not converted
s2 = GeoSeries(
[
LineString([(0, 0), (1, 1), (1, -1)]),
LineString([(0, 0), (1, 1), (1, -1), (0, 0)]),
]
)
assert "LINEARRING" not in str(s2)
def test_indexing(s, df):
# accessing scalar from the geometry (column)
exp = Point(1, 1)
assert s[1] == exp
assert s.loc[1] == exp
assert s.iloc[1] == exp
assert df.loc[1, "geometry"] == exp
assert df.iloc[1, 0] == exp
# multiple values
exp = GeoSeries([Point(2, 2), Point(0, 0)], index=[2, 0])
assert_geoseries_equal(s.loc[[2, 0]], exp)
assert_geoseries_equal(s.iloc[[2, 0]], exp)
assert_geoseries_equal(s.reindex([2, 0]), exp)
assert_geoseries_equal(df.loc[[2, 0], "geometry"], exp)
# TODO here iloc does not return a GeoSeries
assert_series_equal(
df.iloc[[2, 0], 0], exp, check_series_type=False, check_names=False
)
# boolean indexing
exp = GeoSeries([Point(0, 0), Point(2, 2)], index=[0, 2])
mask = np.array([True, False, True])
assert_geoseries_equal(s[mask], exp)
assert_geoseries_equal(s.loc[mask], exp)
assert_geoseries_equal(df[mask]["geometry"], exp)
assert_geoseries_equal(df.loc[mask, "geometry"], exp)
# slices
s.index = [1, 2, 3]
exp = GeoSeries([Point(1, 1), Point(2, 2)], index=[2, 3])
assert_series_equal(s[1:], exp)
assert_series_equal(s.iloc[1:], exp)
assert_series_equal(s.loc[2:], exp)
def test_reindex(s, df):
# GeoSeries reindex
res = s.reindex([1, 2, 3])
exp = GeoSeries([Point(1, 1), Point(2, 2), None], index=[1, 2, 3])
assert_geoseries_equal(res, exp)
# GeoDataFrame reindex index
res = df.reindex(index=[1, 2, 3])
assert_geoseries_equal(res.geometry, exp)
# GeoDataFrame reindex columns
res = df.reindex(columns=["value1", "geometry"])
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
assert_frame_equal(res, df[["value1", "geometry"]])
res = df.reindex(columns=["value1", "value2"])
assert type(res) == pd.DataFrame
assert_frame_equal(res, df[["value1", "value2"]])
def test_take(s, df):
inds = np.array([0, 2])
# GeoSeries take
result = s.take(inds)
expected = s.iloc[[0, 2]]
assert isinstance(result, GeoSeries)
assert_geoseries_equal(result, expected)
# GeoDataFrame take axis 0
result = df.take(inds, axis=0)
expected = df.iloc[[0, 2], :]
assert isinstance(result, GeoDataFrame)
assert_geodataframe_equal(result, expected)
# GeoDataFrame take axis 1
df = df.reindex(columns=["value1", "value2", "geometry"]) # ensure consistent order
result = df.take(inds, axis=1)
expected = df[["value1", "geometry"]]
assert isinstance(result, GeoDataFrame)
assert_geodataframe_equal(result, expected)
result = df.take(np.array([0, 1]), axis=1)
expected = df[["value1", "value2"]]
assert isinstance(result, pd.DataFrame)
assert_frame_equal(result, expected)
def test_take_empty(s, df):
# ensure that index type is preserved in an empty take
# https://github.com/geopandas/geopandas/issues/1190
inds = np.array([], dtype="int64")
# use non-default index
df.index = pd.date_range("2012-01-01", periods=len(df))
result = df.take(inds, axis=0)
assert isinstance(result, GeoDataFrame)
assert result.shape == (0, 3)
assert isinstance(result.index, pd.DatetimeIndex)
# the original bug report was an empty boolean mask
for result in [df.loc[df["value1"] > 100], df[df["value1"] > 100]]:
assert isinstance(result, GeoDataFrame)
assert result.shape == (0, 3)
assert isinstance(result.index, pd.DatetimeIndex)
def test_assignment(s, df):
exp = GeoSeries([Point(10, 10), Point(1, 1), Point(2, 2)])
s2 = s.copy()
s2[0] = Point(10, 10)
assert_geoseries_equal(s2, exp)
s2 = s.copy()
s2.loc[0] = Point(10, 10)
assert_geoseries_equal(s2, exp)
s2 = s.copy()
s2.iloc[0] = Point(10, 10)
assert_geoseries_equal(s2, exp)
df2 = df.copy()
df2.loc[0, "geometry"] = Point(10, 10)
assert_geoseries_equal(df2["geometry"], exp)
df2 = df.copy()
df2.iloc[0, 0] = Point(10, 10)
assert_geoseries_equal(df2["geometry"], exp)
def test_assign(df):
res = df.assign(new=1)
exp = df.copy()
exp["new"] = 1
assert isinstance(res, GeoDataFrame)
assert_frame_equal(res, exp)
def test_astype(s, df):
# check geoseries functionality
with pytest.raises(TypeError):
s.astype(int)
assert s.astype(str)[0] == "POINT (0 0)"
res = s.astype(object)
if not (
(Version(pd.__version__) == Version("2.1.0"))
or (Version(pd.__version__) == Version("2.1.1"))
):
# https://github.com/geopandas/geopandas/issues/2948 - bug in pandas 2.1.0
assert isinstance(res, pd.Series) and not isinstance(res, GeoSeries)
assert res.dtype == object
df = df.rename_geometry("geom_list")
# check whether returned object is a geodataframe
res = df.astype({"value1": float})
assert isinstance(res, GeoDataFrame)
# check whether returned object is a dataframe
res = df.astype(str)
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
res = df.astype({"geom_list": str})
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
res = df.astype(object)
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
assert res["geom_list"].dtype == object
def test_astype_invalid_geodataframe():
# https://github.com/geopandas/geopandas/issues/1144
# a GeoDataFrame without geometry column should not error in astype
df = GeoDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
res = df.astype(object)
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
assert res["a"].dtype == object
def test_convert_dtypes(df):
# https://github.com/geopandas/geopandas/issues/1870
# Test geometry col is first col, first, geom_col_name=geometry
# (order is important in concat, used internally)
res1 = df.convert_dtypes()
expected1 = GeoDataFrame(
pd.DataFrame(df).convert_dtypes(), crs=df.crs, geometry=df.geometry.name
)
# Checking type and metadata are right
assert_geodataframe_equal(expected1, res1)
# Test geom last, geom_col_name=geometry
res2 = df[["value1", "value2", "geometry"]].convert_dtypes()
assert_geodataframe_equal(expected1[["value1", "value2", "geometry"]], res2)
# Test again with crs set and custom geom col name
df2 = df.set_crs(epsg=4326).rename_geometry("points")
expected2 = GeoDataFrame(
pd.DataFrame(df2).convert_dtypes(), crs=df2.crs, geometry=df2.geometry.name
)
res3 = df2.convert_dtypes()
assert_geodataframe_equal(expected2, res3)
# Test geom last, geom_col=geometry
res4 = df2[["value1", "value2", "points"]].convert_dtypes()
assert_geodataframe_equal(expected2[["value1", "value2", "points"]], res4)
def test_to_csv(df):
exp = (
"geometry,value1,value2\nPOINT (0 0),0,1\nPOINT (1 1),1,2\nPOINT (2 2),2,1\n"
).replace("\n", os.linesep)
assert df.to_csv(index=False) == exp
@pytest.mark.filterwarnings(
"ignore:Dropping of nuisance columns in DataFrame reductions"
)
def test_numerical_operations(s, df):
# df methods ignore the geometry column
exp = pd.Series([3, 4], index=["value1", "value2"])
if not compat.PANDAS_GE_20:
res = df.sum()
else:
res = df.sum(numeric_only=True)
assert_series_equal(res, exp)
# series methods raise error (not supported for geometry)
with pytest.raises(TypeError):
s.sum()
with pytest.raises(TypeError):
s.max()
with pytest.raises((TypeError, ValueError)):
# TODO: remove ValueError after pandas-dev/pandas#32749
s.idxmax()
# numerical ops raise an error
with pytest.raises(TypeError):
df + 1
with pytest.raises(TypeError):
s + 1
# boolean comparisons work
res = df == 100
exp = pd.DataFrame(False, index=df.index, columns=df.columns)
assert_frame_equal(res, exp)
def test_where(s):
res = s.where(np.array([True, False, True]))
exp = GeoSeries([Point(0, 0), None, Point(2, 2)])
assert_series_equal(res, exp)
def test_select_dtypes(df):
res = df.select_dtypes(include=[np.number])
exp = df[["value1", "value2"]]
assert_frame_equal(res, exp)
def test_equals(s, df):
# https://github.com/geopandas/geopandas/issues/1420
s2 = s.copy()
assert s.equals(s2) is True
s2.iloc[0] = None
assert s.equals(s2) is False
df2 = df.copy()
assert df.equals(df2) is True
df2.loc[0, "geometry"] = Point(10, 10)
assert df.equals(df2) is False
df2 = df.copy()
df2.loc[0, "value1"] = 10
assert df.equals(df2) is False
# Missing values
def test_fillna_scalar(s, df):
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
res = s2.fillna(Point(1, 1))
assert_geoseries_equal(res, s)
# allow np.nan although this does not change anything
# https://github.com/geopandas/geopandas/issues/1149
res = s2.fillna(np.nan)
assert_geoseries_equal(res, s2)
# raise exception if trying to fill missing geometry w/ non-geometry
df2 = df.copy()
df2["geometry"] = s2
res = df2.fillna(Point(1, 1))
assert_geodataframe_equal(res, df)
with pytest.raises((NotImplementedError, TypeError)): # GH2351
df2.fillna(0)
# allow non-geometry fill value if there are no missing values
# https://github.com/geopandas/geopandas/issues/1149
df3 = df.copy()
df3.loc[0, "value1"] = np.nan
res = df3.fillna(0)
assert_geodataframe_equal(res.astype({"value1": "int64"}), df)
def test_fillna_series(s):
# fill na with another GeoSeries
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
# check na filled with the same index
res = s2.fillna(GeoSeries([Point(1, 1)] * 3))
assert_geoseries_equal(res, s)
# check na filled based on index, not position
index = [3, 2, 1]
res = s2.fillna(GeoSeries([Point(i, i) for i in index], index=index))
assert_geoseries_equal(res, s)
# check na filled but the input length is different
res = s2.fillna(GeoSeries([Point(1, 1)], index=[1]))
assert_geoseries_equal(res, s)
# check na filled but the inputting index is different
res = s2.fillna(GeoSeries([Point(1, 1)], index=[9]))
assert_geoseries_equal(res, s2)
def test_fillna_inplace(s):
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
arr = s2.array
s2.fillna(Point(1, 1), inplace=True)
assert_geoseries_equal(s2, s)
if compat.PANDAS_GE_21:
# starting from pandas 2.1, there is support to do this actually inplace
assert s2.array is arr
def test_dropna():
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
res = s2.dropna()
exp = s2.loc[[0, 2]]
assert_geoseries_equal(res, exp)
@pytest.mark.parametrize("NA", [None, np.nan])
def test_isna(NA):
s2 = GeoSeries([Point(0, 0), NA, Point(2, 2)], index=[2, 4, 5], name="tt")
exp = pd.Series([False, True, False], index=[2, 4, 5], name="tt")
res = s2.isnull()
assert type(res) == pd.Series
assert_series_equal(res, exp)
res = s2.isna()
assert_series_equal(res, exp)
res = s2.notnull()
assert_series_equal(res, ~exp)
res = s2.notna()
assert_series_equal(res, ~exp)
# Any / all
def test_any_all():
empty = GeometryCollection([])
s = GeoSeries([empty, Point(1, 1)])
assert not s.all()
assert s.any()
s = GeoSeries([Point(1, 1), Point(1, 1)])
assert s.all()
assert s.any()
s = GeoSeries([empty, empty])
assert not s.all()
assert not s.any()
# Groupby / algos
def test_sort_values():
s = GeoSeries([Point(0, 0), Point(2, 2), Point(0, 2)])
res = s.sort_values()
assert res.index.tolist() == [0, 2, 1]
res2 = s.sort_values(ascending=False)
assert res2.index.tolist() == [1, 2, 0]
# empty geoseries
assert_geoseries_equal(s.iloc[:0].sort_values(), s.iloc[:0])
def test_sort_values_empty_missing():
s = GeoSeries([Point(0, 0), None, Point(), Point(1, 1)])
# default: NA sorts last, empty first
res = s.sort_values()
assert res.index.tolist() == [2, 0, 3, 1]
# descending: NA sorts last, empty last
res = s.sort_values(ascending=False)
assert res.index.tolist() == [3, 0, 2, 1]
# NAs first, empty first after NAs
res = s.sort_values(na_position="first")
assert res.index.tolist() == [1, 2, 0, 3]
# NAs first, descending with empyt last
res = s.sort_values(ascending=False, na_position="first")
assert res.index.tolist() == [1, 3, 0, 2]
# all missing / empty
s = GeoSeries([None, None, None])
res = s.sort_values()
assert res.index.tolist() == [0, 1, 2]
s = GeoSeries([Point(), Point(), Point()])
res = s.sort_values()
assert res.index.tolist() == [0, 1, 2]
s = GeoSeries([Point(), None, Point()])
res = s.sort_values()
assert res.index.tolist() == [0, 2, 1]
def test_unique():
s = GeoSeries([Point(0, 0), Point(0, 0), Point(2, 2)])
exp = from_shapely([Point(0, 0), Point(2, 2)])
# TODO should have specialized GeometryArray assert method
assert_array_equal(s.unique(), exp)
def pd14_compat_index(index):
if compat.PANDAS_GE_14:
return from_shapely(index)
else:
return index
def test_value_counts():
# each object is considered unique
s = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)])
res = s.value_counts()
if compat.PANDAS_GE_20:
name = "count"
else:
name = None
with compat.ignore_shapely2_warnings():
exp = pd.Series(
[2, 1], index=pd14_compat_index([Point(0, 0), Point(1, 1)]), name=name
)
assert_series_equal(res, exp)
# Check crs doesn't make a difference - note it is not kept in output index anyway
s2 = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)], crs="EPSG:4326")
res2 = s2.value_counts()
assert_series_equal(res2, exp)
if compat.PANDAS_GE_14:
# TODO should/ can we fix CRS being lost
assert s2.value_counts().index.array.crs is None
# check mixed geometry
s3 = GeoSeries([Point(0, 0), LineString([[1, 1], [2, 2]]), Point(0, 0)])
res3 = s3.value_counts()
index = pd14_compat_index([Point(0, 0), LineString([[1, 1], [2, 2]])])
with compat.ignore_shapely2_warnings():
exp3 = pd.Series([2, 1], index=index, name=name)
assert_series_equal(res3, exp3)
# check None is handled
s4 = GeoSeries([Point(0, 0), None, Point(0, 0)])
res4 = s4.value_counts(dropna=True)
with compat.ignore_shapely2_warnings():
exp4_dropna = pd.Series([2], index=pd14_compat_index([Point(0, 0)]), name=name)
assert_series_equal(res4, exp4_dropna)
with compat.ignore_shapely2_warnings():
exp4_keepna = pd.Series(
[2, 1], index=pd14_compat_index([Point(0, 0), None]), name=name
)
res4_keepna = s4.value_counts(dropna=False)
assert_series_equal(res4_keepna, exp4_keepna)
@pytest.mark.xfail(strict=False)
def test_drop_duplicates_series():
# duplicated does not yet use EA machinery
# (https://github.com/pandas-dev/pandas/issues/27264)
# but relies on unstable hashing of unhashable objects in numpy array
# giving flaky test (https://github.com/pandas-dev/pandas/issues/27035)
dups = GeoSeries([Point(0, 0), Point(0, 0)])
dropped = dups.drop_duplicates()
assert len(dropped) == 1
@pytest.mark.xfail(strict=False)
def test_drop_duplicates_frame():
# duplicated does not yet use EA machinery, see above
gdf_len = 3
dup_gdf = GeoDataFrame(
{"geometry": [Point(0, 0) for _ in range(gdf_len)], "value1": range(gdf_len)}
)
dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
assert len(dropped_geometry) == 1
dropped_all = dup_gdf.drop_duplicates()
assert len(dropped_all) == gdf_len
def test_groupby(df):
# counts work fine
res = df.groupby("value2").count()
exp = pd.DataFrame(
{"geometry": [2, 1], "value1": [2, 1], "value2": [1, 2]}
).set_index("value2")
assert_frame_equal(res, exp)
# reductions ignore geometry column
if not compat.PANDAS_GE_20:
res = df.groupby("value2").sum()
else:
res = df.groupby("value2").sum(numeric_only=True)
exp = pd.DataFrame({"value1": [2, 1], "value2": [1, 2]}, dtype="int64").set_index(
"value2"
)
assert_frame_equal(res, exp)
# applying on the geometry column
res = df.groupby("value2")["geometry"].apply(lambda x: x.unary_union)
exp = GeoSeries(
[shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)],
index=pd.Index([1, 2], name="value2"),
name="geometry",
)
assert_series_equal(res, exp)
# apply on geometry column not resulting in new geometry
res = df.groupby("value2")["geometry"].apply(lambda x: x.unary_union.area)
exp = pd.Series([0.0, 0.0], index=pd.Index([1, 2], name="value2"), name="geometry")
assert_series_equal(res, exp)
def test_groupby_groups(df):
g = df.groupby("value2")
res = g.get_group(1)
assert isinstance(res, GeoDataFrame)
exp = df.loc[[0, 2]]
assert_frame_equal(res, exp)
@pytest.mark.skip_no_sindex
@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
def test_groupby_metadata(crs):
# https://github.com/geopandas/geopandas/issues/2294
df = GeoDataFrame(
{
"geometry": [Point(0, 0), Point(1, 1), Point(0, 0)],
"value1": np.arange(3, dtype="int64"),
"value2": np.array([1, 2, 1], dtype="int64"),
},
crs=crs,
)
# dummy test asserting we can access the crs
def func(group):
assert isinstance(group, GeoDataFrame)
assert group.crs == crs
df.groupby("value2").apply(func)
# actual test with functionality
res = df.groupby("value2").apply(
lambda x: geopandas.sjoin(x, x[["geometry", "value1"]], how="inner")
)
if compat.PANDAS_GE_22:
# merge sort behaviour changed in pandas #54611
take_indices = [0, 0, 2, 2, 1]
value_right = [0, 2, 0, 2, 1]
else:
take_indices = [0, 2, 0, 2, 1]
value_right = [0, 0, 2, 2, 1]
expected = (
df.take(take_indices)
.set_index("value2", drop=False, append=True)
.swaplevel()
.rename(columns={"value1": "value1_left"})
.assign(value1_right=value_right)
)
assert_geodataframe_equal(res.drop(columns=["index_right"]), expected)
def test_apply(s):
# function that returns geometry preserves GeoSeries class
def geom_func(geom):
assert isinstance(geom, Point)
return geom
result = s.apply(geom_func)
assert isinstance(result, GeoSeries)
assert_geoseries_equal(result, s)
# function that returns non-geometry results in Series
def numeric_func(geom):
assert isinstance(geom, Point)
return geom.x
result = s.apply(numeric_func)
assert not isinstance(result, GeoSeries)
assert_series_equal(result, pd.Series([0.0, 1.0, 2.0]))
def test_apply_loc_len1(df):
# subset of len 1 with loc -> bug in pandas with inconsistent Block ndim
# resulting in bug in apply
# https://github.com/geopandas/geopandas/issues/1078
subset = df.loc[[0], "geometry"]
result = subset.apply(lambda geom: geom.is_empty)
expected = subset.is_empty
np.testing.assert_allclose(result, expected)
def test_apply_convert_dtypes_keyword(s):
# ensure the convert_dtypes keyword is accepted
if not compat.PANDAS_GE_21:
recorder = warnings.catch_warnings(record=True)
else:
recorder = pytest.warns()
with recorder as record:
res = s.apply(lambda x: x, convert_dtype=True, args=())
assert_geoseries_equal(res, s)
if compat.PANDAS_GE_21:
assert len(record) == 1
assert "the convert_dtype parameter" in str(record[0].message)
else:
assert len(record) == 0
@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
def test_apply_no_geometry_result(df, crs):
if crs:
df = df.set_crs(crs)
result = df.apply(lambda col: col.astype(str), axis=0)
assert type(result) is pd.DataFrame
expected = df.astype(str)
assert_frame_equal(result, expected)
result = df.apply(lambda col: col.astype(str), axis=1)
assert type(result) is pd.DataFrame
assert_frame_equal(result, expected)
def test_apply_preserves_geom_col_name(df):
df = df.rename_geometry("geom")
result = df.apply(lambda col: col, axis=0)
assert result.geometry.name == "geom"
def test_df_apply_returning_series(df):
# https://github.com/geopandas/geopandas/issues/2283
result = df.apply(lambda row: row.geometry, axis=1)
assert_geoseries_equal(result, df.geometry, check_crs=False)
result = df.apply(lambda row: row.value1, axis=1)
assert_series_equal(result, df["value1"].rename(None))
# https://github.com/geopandas/geopandas/issues/2480
result = df.apply(lambda x: float("NaN"), axis=1)
assert result.dtype == "float64"
# assert list of nones is not promoted to GeometryDtype
result = df.apply(lambda x: None, axis=1)
assert result.dtype == "object"
# https://github.com/geopandas/geopandas/issues/2889
# contrived case such that `from_shapely` receives an array of geodataframes
res = df.apply(lambda row: df.geometry.to_frame(), axis=1)
assert res.dtype == "object"
def test_df_apply_geometry_dtypes(df):
# https://github.com/geopandas/geopandas/issues/1852
apply_types = []
def get_dtypes(srs):
apply_types.append((srs.name, type(srs)))
df["geom2"] = df.geometry
df.apply(get_dtypes)
expected = [
("geometry", GeoSeries),
("value1", pd.Series),
("value2", pd.Series),
("geom2", GeoSeries),
]
assert apply_types == expected
def test_pivot(df):
# https://github.com/geopandas/geopandas/issues/2057
# pivot failing due to creating a MultiIndex
result = df.pivot(columns="value1")
expected = GeoDataFrame(pd.DataFrame(df).pivot(columns="value1"))
assert_geodataframe_equal(result, expected)
def test_preserve_attrs(df):
# https://github.com/geopandas/geopandas/issues/1654
df.attrs["name"] = "my_name"
attrs = {"name": "my_name"}
assert df.attrs == attrs
# preserve attrs in indexing operations
for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
assert df.attrs == attrs
# preserve attrs in methods
df2 = df.reset_index()
assert df2.attrs == attrs
# https://github.com/geopandas/geopandas/issues/1875
df3 = df2.explode(index_parts=True)
assert df3.attrs == attrs
def test_preserve_flags(df):
# https://github.com/geopandas/geopandas/issues/1654
df = df.set_flags(allows_duplicate_labels=False)
assert df.flags.allows_duplicate_labels is False
# preserve flags in indexing operations
for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
assert df.flags.allows_duplicate_labels is False
# preserve attrs in methods
df2 = df.reset_index()
assert df2.flags.allows_duplicate_labels is False
# it is honored for operations that introduce duplicate labels
with pytest.raises(ValueError):
df.reindex([0, 0, 1])
with pytest.raises(ValueError):
df[["value1", "value1", "geometry"]]
with pytest.raises(ValueError):
pd.concat([df, df])

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,51 @@
from geopandas.tools._show_versions import (
_get_C_info,
_get_deps_info,
_get_sys_info,
show_versions,
)
def test_get_sys_info():
sys_info = _get_sys_info()
assert "python" in sys_info
assert "executable" in sys_info
assert "machine" in sys_info
def test_get_c_info():
C_info = _get_C_info()
assert "GEOS" in C_info
assert "GEOS lib" in C_info
assert "GDAL" in C_info
assert "GDAL data dir" in C_info
assert "PROJ" in C_info
assert "PROJ data dir" in C_info
def test_get_deps_info():
deps_info = _get_deps_info()
assert "geopandas" in deps_info
assert "pandas" in deps_info
assert "fiona" in deps_info
assert "numpy" in deps_info
assert "shapely" in deps_info
assert "rtree" in deps_info
assert "pyproj" in deps_info
assert "matplotlib" in deps_info
assert "mapclassify" in deps_info
assert "geopy" in deps_info
assert "psycopg2" in deps_info
assert "geoalchemy2" in deps_info
def test_show_versions(capsys):
show_versions()
out, err = capsys.readouterr()
assert "python" in out
assert "GEOS" in out
assert "geopandas" in out

View File

@@ -0,0 +1,958 @@
from math import sqrt
from shapely.geometry import (
Point,
Polygon,
MultiPolygon,
box,
GeometryCollection,
LineString,
)
from numpy.testing import assert_array_equal
import geopandas
from geopandas import _compat as compat
from geopandas import GeoDataFrame, GeoSeries, read_file, datasets
import pytest
import numpy as np
import pandas as pd
if compat.USE_SHAPELY_20:
import shapely as mod
elif compat.USE_PYGEOS:
import pygeos as mod
@pytest.mark.skip_no_sindex
class TestSeriesSindex:
def test_has_sindex(self):
"""Test the has_sindex method."""
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
d = GeoDataFrame({"geom": [t1, t2]}, geometry="geom")
assert not d.has_sindex
d.sindex
assert d.has_sindex
d.geometry.values._sindex = None
assert not d.has_sindex
d.sindex
assert d.has_sindex
s = GeoSeries([t1, t2])
assert not s.has_sindex
s.sindex
assert s.has_sindex
s.values._sindex = None
assert not s.has_sindex
s.sindex
assert s.has_sindex
def test_empty_geoseries(self):
"""Tests creating a spatial index from an empty GeoSeries."""
s = GeoSeries(dtype=object)
assert not s.sindex
assert len(s.sindex) == 0
def test_point(self):
s = GeoSeries([Point(0, 0)])
assert s.sindex.size == 1
hits = s.sindex.intersection((-1, -1, 1, 1))
assert len(list(hits)) == 1
hits = s.sindex.intersection((-2, -2, -1, -1))
assert len(list(hits)) == 0
def test_empty_point(self):
"""Tests that a single empty Point results in an empty tree."""
s = GeoSeries([Point()])
assert not s.sindex
assert len(s.sindex) == 0
def test_polygons(self):
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
s = GeoSeries([t1, t2, sq])
assert s.sindex.size == 3
@pytest.mark.filterwarnings("ignore:The series.append method is deprecated")
@pytest.mark.skipif(compat.PANDAS_GE_20, reason="append removed in pandas 2.0")
def test_polygons_append(self):
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
s = GeoSeries([t1, t2, sq])
t = GeoSeries([t1, t2, sq], [3, 4, 5])
s = s.append(t)
assert len(s) == 6
assert s.sindex.size == 6
def test_lazy_build(self):
s = GeoSeries([Point(0, 0)])
assert s.values._sindex is None
assert s.sindex.size == 1
assert s.values._sindex is not None
def test_rebuild_on_item_change(self):
s = GeoSeries([Point(0, 0)])
original_index = s.sindex
s.iloc[0] = Point(0, 0)
assert s.sindex is not original_index
def test_rebuild_on_slice(self):
s = GeoSeries([Point(0, 0), Point(0, 0)])
original_index = s.sindex
# Select a couple of rows
sliced = s.iloc[:1]
assert sliced.sindex is not original_index
# Select all rows
sliced = s.iloc[:]
assert sliced.sindex is original_index
# Select all rows and flip
sliced = s.iloc[::-1]
assert sliced.sindex is not original_index
@pytest.mark.skip_no_sindex
class TestFrameSindex:
def setup_method(self):
data = {
"A": range(5),
"B": range(-5, 0),
"geom": [Point(x, y) for x, y in zip(range(5), range(5))],
}
self.df = GeoDataFrame(data, geometry="geom")
def test_sindex(self):
self.df.crs = "epsg:4326"
assert self.df.sindex.size == 5
hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4)))
assert len(hits) == 2
assert hits[0] == 3
def test_lazy_build(self):
assert self.df.geometry.values._sindex is None
assert self.df.sindex.size == 5
assert self.df.geometry.values._sindex is not None
def test_sindex_rebuild_on_set_geometry(self):
# First build the sindex
assert self.df.sindex is not None
original_index = self.df.sindex
self.df.set_geometry(
[Point(x, y) for x, y in zip(range(5, 10), range(5, 10))], inplace=True
)
assert self.df.sindex is not original_index
def test_rebuild_on_row_slice(self):
# Select a subset of rows rebuilds
original_index = self.df.sindex
sliced = self.df.iloc[:1]
assert sliced.sindex is not original_index
# Slicing all does not rebuild
original_index = self.df.sindex
sliced = self.df.iloc[:]
assert sliced.sindex is original_index
# Re-ordering rebuilds
sliced = self.df.iloc[::-1]
assert sliced.sindex is not original_index
def test_rebuild_on_single_col_selection(self):
"""Selecting a single column should not rebuild the spatial index."""
# Selecting geometry column preserves the index
original_index = self.df.sindex
geometry_col = self.df["geom"]
assert geometry_col.sindex is original_index
geometry_col = self.df.geometry
assert geometry_col.sindex is original_index
def test_rebuild_on_multiple_col_selection(self):
"""Selecting a subset of columns preserves the index."""
original_index = self.df.sindex
# Selecting a subset of columns preserves the index for pandas < 2.0
# with pandas 2.0, the column is now copied, losing the index (although
# with Copy-on-Write, this will again be preserved)
subset1 = self.df[["geom", "A"]]
if compat.PANDAS_GE_20 and not pd.options.mode.copy_on_write:
assert subset1.sindex is not original_index
else:
assert subset1.sindex is original_index
subset2 = self.df[["A", "geom"]]
if compat.PANDAS_GE_20 and not pd.options.mode.copy_on_write:
assert subset2.sindex is not original_index
else:
assert subset2.sindex is original_index
def test_rebuild_on_update_inplace(self):
gdf = self.df.copy()
old_sindex = gdf.sindex
# sorting in place
gdf.sort_values("A", ascending=False, inplace=True)
# spatial index should be invalidated
assert not gdf.has_sindex
new_sindex = gdf.sindex
# and should be different
assert new_sindex is not old_sindex
# sorting should still have happened though
assert gdf.index.tolist() == [4, 3, 2, 1, 0]
def test_update_inplace_no_rebuild(self):
gdf = self.df.copy()
old_sindex = gdf.sindex
gdf.rename(columns={"A": "AA"}, inplace=True)
# a rename shouldn't invalidate the index
assert gdf.has_sindex
# and the "new" should be the same
new_sindex = gdf.sindex
assert old_sindex is new_sindex
# Skip to accommodate Shapely geometries being unhashable
@pytest.mark.skip
class TestJoinSindex:
def setup_method(self):
nybb_filename = geopandas.datasets.get_path("nybb")
self.boros = read_file(nybb_filename)
def test_merge_geo(self):
# First check that we gets hits from the boros frame.
tree = self.boros.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = [self.boros.iloc[hit]["BoroName"] for hit in hits]
assert res == ["Bronx", "Queens"]
# Check that we only get the Bronx from this view.
first = self.boros[self.boros["BoroCode"] < 3]
tree = first.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = [first.iloc[hit]["BoroName"] for hit in hits]
assert res == ["Bronx"]
# Check that we only get Queens from this view.
second = self.boros[self.boros["BoroCode"] >= 3]
tree = second.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = ([second.iloc[hit]["BoroName"] for hit in hits],)
assert res == ["Queens"]
# Get both the Bronx and Queens again.
merged = first.merge(second, how="outer")
assert len(merged) == 5
assert merged.sindex.size == 5
tree = merged.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = [merged.iloc[hit]["BoroName"] for hit in hits]
assert res == ["Bronx", "Queens"]
@pytest.mark.skip_no_sindex
class TestPygeosInterface:
def setup_method(self):
data = {
"geom": [Point(x, y) for x, y in zip(range(5), range(5))]
+ [box(10, 10, 20, 20)] # include a box geometry
}
self.df = GeoDataFrame(data, geometry="geom")
self.expected_size = len(data["geom"])
# --------------------------- `intersection` tests -------------------------- #
@pytest.mark.parametrize(
"test_geom, expected",
(
((-1, -1, -0.5, -0.5), []),
((-0.5, -0.5, 0.5, 0.5), [0]),
((0, 0, 1, 1), [0, 1]),
((0, 0), [0]),
),
)
def test_intersection_bounds_tuple(self, test_geom, expected):
"""Tests the `intersection` method with valid inputs."""
res = list(self.df.sindex.intersection(test_geom))
assert_array_equal(res, expected)
@pytest.mark.parametrize("test_geom", ((-1, -1, -0.5), -0.5, None, Point(0, 0)))
def test_intersection_invalid_bounds_tuple(self, test_geom):
"""Tests the `intersection` method with invalid inputs."""
if compat.USE_PYGEOS:
with pytest.raises(TypeError):
# we raise a useful TypeError
self.df.sindex.intersection(test_geom)
else:
with pytest.raises((TypeError, Exception)):
# catch a general exception
# rtree raises an RTreeError which we need to catch
self.df.sindex.intersection(test_geom)
# ------------------------------ `query` tests ------------------------------ #
@pytest.mark.parametrize(
"predicate, test_geom, expected",
(
(None, box(-1, -1, -0.5, -0.5), []), # bbox does not intersect
(None, box(-0.5, -0.5, 0.5, 0.5), [0]), # bbox intersects
(None, box(0, 0, 1, 1), [0, 1]), # bbox intersects multiple
(
None,
LineString([(0, 1), (1, 0)]),
[0, 1],
), # bbox intersects but not geometry
("intersects", box(-1, -1, -0.5, -0.5), []), # bbox does not intersect
(
"intersects",
box(-0.5, -0.5, 0.5, 0.5),
[0],
), # bbox and geometry intersect
(
"intersects",
box(0, 0, 1, 1),
[0, 1],
), # bbox and geometry intersect multiple
(
"intersects",
LineString([(0, 1), (1, 0)]),
[],
), # bbox intersects but not geometry
("within", box(0.25, 0.28, 0.75, 0.75), []), # does not intersect
("within", box(0, 0, 10, 10), []), # intersects but is not within
("within", box(11, 11, 12, 12), [5]), # intersects and is within
("within", LineString([(0, 1), (1, 0)]), []), # intersects but not within
("contains", box(0, 0, 1, 1), []), # intersects but does not contain
("contains", box(0, 0, 1.001, 1.001), [1]), # intersects and contains
("contains", box(0.5, 0.5, 1.5, 1.5), [1]), # intersects and contains
("contains", box(-1, -1, 2, 2), [0, 1]), # intersects and contains multiple
(
"contains",
LineString([(0, 1), (1, 0)]),
[],
), # intersects but not contains
("touches", box(-1, -1, 0, 0), [0]), # bbox intersects and touches
(
"touches",
box(-0.5, -0.5, 1.5, 1.5),
[],
), # bbox intersects but geom does not touch
(
"contains",
box(10, 10, 20, 20),
[5],
), # contains but does not contains_properly
(
"covers",
box(-0.5, -0.5, 1, 1),
[0, 1],
), # covers (0, 0) and (1, 1)
(
"covers",
box(0.001, 0.001, 0.99, 0.99),
[],
), # does not cover any
(
"covers",
box(0, 0, 1, 1),
[0, 1],
), # covers but does not contain
(
"contains_properly",
box(0, 0, 1, 1),
[],
), # intersects but does not contain
(
"contains_properly",
box(0, 0, 1.001, 1.001),
[1],
), # intersects 2 and contains 1
(
"contains_properly",
box(0.5, 0.5, 1.001, 1.001),
[1],
), # intersects 1 and contains 1
(
"contains_properly",
box(0.5, 0.5, 1.5, 1.5),
[1],
), # intersects and contains
(
"contains_properly",
box(-1, -1, 2, 2),
[0, 1],
), # intersects and contains multiple
(
"contains_properly",
box(10, 10, 20, 20),
[],
), # contains but does not contains_properly
),
)
def test_query(self, predicate, test_geom, expected):
"""Tests the `query` method with valid inputs and valid predicates."""
res = self.df.sindex.query(test_geom, predicate=predicate)
assert_array_equal(res, expected)
def test_query_invalid_geometry(self):
"""Tests the `query` method with invalid geometry."""
with pytest.raises(TypeError):
self.df.sindex.query("notavalidgeom")
@pytest.mark.parametrize(
"test_geom, expected_value",
[
(None, []),
(GeometryCollection(), []),
(Point(), []),
(MultiPolygon(), []),
(Polygon(), []),
],
)
def test_query_empty_geometry(self, test_geom, expected_value):
"""Tests the `query` method with empty geometry."""
res = self.df.sindex.query(test_geom)
assert_array_equal(res, expected_value)
def test_query_invalid_predicate(self):
"""Tests the `query` method with invalid predicates."""
test_geom = box(-1, -1, -0.5, -0.5)
with pytest.raises(ValueError):
self.df.sindex.query(test_geom, predicate="test")
@pytest.mark.parametrize(
"sort, expected",
(
(True, [[0, 0, 0], [0, 1, 2]]),
# False could be anything, at least we'll know if it changes
(False, [[0, 0, 0], [0, 1, 2]]),
),
)
def test_query_sorting(self, sort, expected):
"""Check that results from `query` don't depend on the
order of geometries.
"""
# these geometries come from a reported issue:
# https://github.com/geopandas/geopandas/issues/1337
# there is no theoretical reason they were chosen
test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
tree_polys = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
expected = [0, 1, 2]
# pass through GeoSeries to have GeoPandas
# determine if it should use shapely or pygeos geometry objects
tree_df = geopandas.GeoDataFrame(geometry=tree_polys)
test_df = geopandas.GeoDataFrame(geometry=test_polys)
test_geo = test_df.geometry.values[0]
res = tree_df.sindex.query(test_geo, sort=sort)
# asserting the same elements
assert sorted(res) == sorted(expected)
# asserting the exact array can fail if sort=False
try:
assert_array_equal(res, expected)
except AssertionError as e:
if sort is False:
pytest.xfail(
"rtree results are known to be unordered, see "
"https://github.com/geopandas/geopandas/issues/1337\n"
"Expected:\n {}\n".format(expected)
+ "Got:\n {}\n".format(res.tolist())
)
raise e
# ------------------------- `query_bulk` tests -------------------------- #
@pytest.mark.parametrize(
"predicate, test_geom, expected",
(
(None, [(-1, -1, -0.5, -0.5)], [[], []]),
(None, [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
(None, [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
("intersects", [(-1, -1, -0.5, -0.5)], [[], []]),
("intersects", [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
("intersects", [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# only second geom intersects
("intersects", [(-1, -1, -0.5, -0.5), (-0.5, -0.5, 0.5, 0.5)], [[1], [0]]),
# both geoms intersect
(
"intersects",
[(-1, -1, 1, 1), (-0.5, -0.5, 0.5, 0.5)],
[[0, 0, 1], [0, 1, 0]],
),
("within", [(0.25, 0.28, 0.75, 0.75)], [[], []]), # does not intersect
("within", [(0, 0, 10, 10)], [[], []]), # intersects but is not within
("within", [(11, 11, 12, 12)], [[0], [5]]), # intersects and is within
(
"contains",
[(0, 0, 1, 1)],
[[], []],
), # intersects and covers, but does not contain
(
"contains",
[(0, 0, 1.001, 1.001)],
[[0], [1]],
), # intersects 2 and contains 1
(
"contains",
[(0.5, 0.5, 1.001, 1.001)],
[[0], [1]],
), # intersects 1 and contains 1
("contains", [(0.5, 0.5, 1.5, 1.5)], [[0], [1]]), # intersects and contains
(
"contains",
[(-1, -1, 2, 2)],
[[0, 0], [0, 1]],
), # intersects and contains multiple
(
"contains",
[(10, 10, 20, 20)],
[[0], [5]],
), # contains but does not contains_properly
("touches", [(-1, -1, 0, 0)], [[0], [0]]), # bbox intersects and touches
(
"touches",
[(-0.5, -0.5, 1.5, 1.5)],
[[], []],
), # bbox intersects but geom does not touch
(
"covers",
[(-0.5, -0.5, 1, 1)],
[[0, 0], [0, 1]],
), # covers (0, 0) and (1, 1)
(
"covers",
[(0.001, 0.001, 0.99, 0.99)],
[[], []],
), # does not cover any
(
"covers",
[(0, 0, 1, 1)],
[[0, 0], [0, 1]],
), # covers but does not contain
(
"contains_properly",
[(0, 0, 1, 1)],
[[], []],
), # intersects but does not contain
(
"contains_properly",
[(0, 0, 1.001, 1.001)],
[[0], [1]],
), # intersects 2 and contains 1
(
"contains_properly",
[(0.5, 0.5, 1.001, 1.001)],
[[0], [1]],
), # intersects 1 and contains 1
(
"contains_properly",
[(0.5, 0.5, 1.5, 1.5)],
[[0], [1]],
), # intersects and contains
(
"contains_properly",
[(-1, -1, 2, 2)],
[[0, 0], [0, 1]],
), # intersects and contains multiple
(
"contains_properly",
[(10, 10, 20, 20)],
[[], []],
), # contains but does not contains_properly
),
)
def test_query_bulk(self, predicate, test_geom, expected):
"""Tests the `query_bulk` method with valid
inputs and valid predicates.
"""
# pass through GeoSeries to have GeoPandas
# determine if it should use shapely or pygeos geometry objects
test_geom = geopandas.GeoSeries(
[box(*geom) for geom in test_geom], index=range(len(test_geom))
)
res = self.df.sindex.query(test_geom, predicate=predicate)
assert_array_equal(res, expected)
@pytest.mark.parametrize(
"test_geoms, expected_value",
[
# single empty geometry
([GeometryCollection()], [[], []]),
# None should be skipped
([GeometryCollection(), None], [[], []]),
([None], [[], []]),
([None, box(-0.5, -0.5, 0.5, 0.5), None], [[1], [0]]),
],
)
def test_query_bulk_empty_geometry(self, test_geoms, expected_value):
"""Tests the `query_bulk` method with an empty geometry."""
# pass through GeoSeries to have GeoPandas
# determine if it should use shapely or pygeos geometry objects
# note: for this test, test_geoms (note plural) is a list already
test_geoms = geopandas.GeoSeries(test_geoms, index=range(len(test_geoms)))
res = self.df.sindex.query(test_geoms)
assert_array_equal(res, expected_value)
def test_query_bulk_empty_input_array(self):
"""Tests the `query_bulk` method with an empty input array."""
test_array = np.array([], dtype=object)
expected_value = [[], []]
res = self.df.sindex.query(test_array)
assert_array_equal(res, expected_value)
def test_query_bulk_invalid_input_geometry(self):
"""
Tests the `query_bulk` method with invalid input for the `geometry` parameter.
"""
test_array = "notanarray"
with pytest.raises(TypeError):
self.df.sindex.query(test_array)
def test_query_bulk_invalid_predicate(self):
"""Tests the `query_bulk` method with invalid predicates."""
test_geom_bounds = (-1, -1, -0.5, -0.5)
test_predicate = "test"
# pass through GeoSeries to have GeoPandas
# determine if it should use shapely or pygeos geometry objects
test_geom = geopandas.GeoSeries([box(*test_geom_bounds)], index=["0"])
with pytest.raises(ValueError):
self.df.sindex.query(test_geom.geometry, predicate=test_predicate)
@pytest.mark.parametrize(
"predicate, test_geom, expected",
(
(None, (-1, -1, -0.5, -0.5), [[], []]),
("intersects", (-1, -1, -0.5, -0.5), [[], []]),
("contains", (-1, -1, 1, 1), [[0], [0]]),
),
)
def test_query_bulk_input_type(self, predicate, test_geom, expected):
"""Tests that query_bulk can accept a GeoSeries, GeometryArray or
numpy array.
"""
# pass through GeoSeries to have GeoPandas
# determine if it should use shapely or pygeos geometry objects
test_geom = geopandas.GeoSeries([box(*test_geom)], index=["0"])
# test GeoSeries
res = self.df.sindex.query(test_geom, predicate=predicate)
assert_array_equal(res, expected)
# test GeometryArray
res = self.df.sindex.query(test_geom.geometry, predicate=predicate)
assert_array_equal(res, expected)
res = self.df.sindex.query(test_geom.geometry.values, predicate=predicate)
assert_array_equal(res, expected)
# test numpy array
res = self.df.sindex.query(
test_geom.geometry.values.to_numpy(), predicate=predicate
)
assert_array_equal(res, expected)
res = self.df.sindex.query(
test_geom.geometry.values.to_numpy(), predicate=predicate
)
assert_array_equal(res, expected)
@pytest.mark.parametrize(
"sort, expected",
(
(True, [[0, 0, 0], [0, 1, 2]]),
# False could be anything, at least we'll know if it changes
(False, [[0, 0, 0], [0, 1, 2]]),
),
)
def test_query_bulk_sorting(self, sort, expected):
"""Check that results from `query_bulk` don't depend
on the order of geometries.
"""
# these geometries come from a reported issue:
# https://github.com/geopandas/geopandas/issues/1337
# there is no theoretical reason they were chosen
test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
tree_polys = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
# pass through GeoSeries to have GeoPandas
# determine if it should use shapely or pygeos geometry objects
tree_df = geopandas.GeoDataFrame(geometry=tree_polys)
test_df = geopandas.GeoDataFrame(geometry=test_polys)
res = tree_df.sindex.query(test_df.geometry, sort=sort)
# asserting the same elements
assert sorted(res[0]) == sorted(expected[0])
assert sorted(res[1]) == sorted(expected[1])
# asserting the exact array can fail if sort=False
try:
assert_array_equal(res, expected)
except AssertionError as e:
if sort is False:
pytest.xfail(
"rtree results are known to be unordered, see "
"https://github.com/geopandas/geopandas/issues/1337\n"
"Expected:\n {}\n".format(expected)
+ "Got:\n {}\n".format(res.tolist())
)
raise e
# ------------------------- `nearest` tests ------------------------- #
@pytest.mark.skipif(
compat.USE_PYGEOS or compat.USE_SHAPELY_20,
reason=("RTree supports sindex.nearest with different behaviour"),
)
def test_rtree_nearest_warns(self):
df = geopandas.GeoDataFrame({"geometry": []})
with pytest.warns(
FutureWarning, match="sindex.nearest using the rtree backend"
):
df.sindex.nearest((0, 0, 1, 1), num_results=2)
@pytest.mark.skipif(
compat.USE_SHAPELY_20 or not (compat.USE_PYGEOS and not compat.PYGEOS_GE_010),
reason=("PyGEOS < 0.10 does not support sindex.nearest"),
)
def test_pygeos_error(self):
df = geopandas.GeoDataFrame({"geometry": []})
with pytest.raises(NotImplementedError, match="requires pygeos >= 0.10"):
df.sindex.nearest(None)
@pytest.mark.skipif(
not (compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_010)),
reason=("PyGEOS >= 0.10 is required to test sindex.nearest"),
)
@pytest.mark.parametrize("return_all", [True, False])
@pytest.mark.parametrize(
"geometry,expected",
[
([0.25, 0.25], [[0], [0]]),
([0.75, 0.75], [[0], [1]]),
],
)
def test_nearest_single(self, geometry, expected, return_all):
geoms = mod.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
p = Point(geometry)
res = df.sindex.nearest(p, return_all=return_all)
assert_array_equal(res, expected)
p = mod.points(geometry)
res = df.sindex.nearest(p, return_all=return_all)
assert_array_equal(res, expected)
@pytest.mark.skipif(
not (compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_010)),
reason=("PyGEOS >= 0.10 is required to test sindex.nearest"),
)
@pytest.mark.parametrize("return_all", [True, False])
@pytest.mark.parametrize(
"geometry,expected",
[
([(1, 1), (0, 0)], [[0, 1], [1, 0]]),
([(1, 1), (0.25, 1)], [[0, 1], [1, 1]]),
],
)
def test_nearest_multi(self, geometry, expected, return_all):
geoms = mod.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
ps = [Point(p) for p in geometry]
res = df.sindex.nearest(ps, return_all=return_all)
assert_array_equal(res, expected)
ps = mod.points(geometry)
res = df.sindex.nearest(ps, return_all=return_all)
assert_array_equal(res, expected)
s = geopandas.GeoSeries(ps)
res = df.sindex.nearest(s, return_all=return_all)
assert_array_equal(res, expected)
x, y = zip(*geometry)
ga = geopandas.points_from_xy(x, y)
res = df.sindex.nearest(ga, return_all=return_all)
assert_array_equal(res, expected)
@pytest.mark.skipif(
not (compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_010)),
reason=("PyGEOS >= 0.10 is required to test sindex.nearest"),
)
@pytest.mark.parametrize("return_all", [True, False])
@pytest.mark.parametrize(
"geometry,expected",
[
(None, [[], []]),
([None], [[], []]),
],
)
def test_nearest_none(self, geometry, expected, return_all):
geoms = mod.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
res = df.sindex.nearest(geometry, return_all=return_all)
assert_array_equal(res, expected)
@pytest.mark.skipif(
not (compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_010)),
reason=("PyGEOS >= 0.10 is required to test sindex.nearest"),
)
@pytest.mark.parametrize("return_distance", [True, False])
@pytest.mark.parametrize(
"return_all,max_distance,expected",
[
(True, None, ([[0, 0, 1], [0, 1, 5]], [sqrt(0.5), sqrt(0.5), sqrt(50)])),
(False, None, ([[0, 1], [0, 5]], [sqrt(0.5), sqrt(50)])),
(True, 1, ([[0, 0], [0, 1]], [sqrt(0.5), sqrt(0.5)])),
(False, 1, ([[0], [0]], [sqrt(0.5)])),
],
)
def test_nearest_max_distance(
self, expected, max_distance, return_all, return_distance
):
geoms = mod.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
ps = [Point(0.5, 0.5), Point(0, 10)]
res = df.sindex.nearest(
ps,
return_all=return_all,
max_distance=max_distance,
return_distance=return_distance,
)
if return_distance:
assert_array_equal(res[0], expected[0])
assert_array_equal(res[1], expected[1])
else:
assert_array_equal(res, expected[0])
@pytest.mark.skipif(
not (compat.USE_SHAPELY_20),
reason=(
"shapely >= 2.0 is required to test sindex.nearest with parameter exclusive"
),
)
@pytest.mark.parametrize("return_distance", [True, False])
@pytest.mark.parametrize(
"return_all,max_distance,exclusive,expected",
[
(False, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
(False, None, True, ([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3]], 5 * [sqrt(2)])),
(True, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
(
True,
None,
True,
([[0, 1, 1, 2, 2, 3, 3, 4], [1, 0, 2, 1, 3, 2, 4, 3]], 8 * [sqrt(2)]),
),
(False, 1.1, True, ([[1, 2, 5], [5, 5, 1]], 3 * [1])),
(True, 1.1, True, ([[1, 2, 5, 5], [5, 5, 1, 2]], 4 * [1])),
],
)
def test_nearest_exclusive(
self, expected, max_distance, return_all, return_distance, exclusive
):
geoms = mod.points(np.arange(5), np.arange(5))
if max_distance:
# add a non grid point
geoms = np.append(geoms, [Point(1, 2)])
df = geopandas.GeoDataFrame({"geometry": geoms})
ps = geoms
res = df.sindex.nearest(
ps,
return_all=return_all,
max_distance=max_distance,
return_distance=return_distance,
exclusive=exclusive,
)
if return_distance:
assert_array_equal(res[0], expected[0])
assert_array_equal(res[1], expected[1])
else:
assert_array_equal(res, expected[0])
@pytest.mark.skipif(
compat.USE_SHAPELY_20 or not (compat.USE_PYGEOS and not compat.PYGEOS_GE_010),
reason="sindex.nearest exclusive parameter requires shapely >= 2.0",
)
def test_nearest_exclusive_unavailable(self):
from shapely.geometry import Point
geoms = [Point((x, y)) for (x, y) in zip(np.arange(5), np.arange(5))]
df = geopandas.GeoDataFrame(geometry=geoms)
with pytest.raises(NotImplementedError, match="requires shapely >= 2.0"):
df.sindex.nearest(geoms, exclusive=True)
# --------------------------- misc tests ---------------------------- #
def test_empty_tree_geometries(self):
"""Tests building sindex with interleaved empty geometries."""
geoms = [Point(0, 0), None, Point(), Point(1, 1), Point()]
df = geopandas.GeoDataFrame(geometry=geoms)
assert df.sindex.query(Point(1, 1))[0] == 3
def test_size(self):
"""Tests the `size` property."""
assert self.df.sindex.size == self.expected_size
def test_len(self):
"""Tests the `__len__` method of spatial indexes."""
assert len(self.df.sindex) == self.expected_size
def test_is_empty(self):
"""Tests the `is_empty` property."""
# create empty tree
empty = geopandas.GeoSeries([], dtype=object)
assert empty.sindex.is_empty
empty = geopandas.GeoSeries([None])
assert empty.sindex.is_empty
empty = geopandas.GeoSeries([Point()])
assert empty.sindex.is_empty
# create a non-empty tree
non_empty = geopandas.GeoSeries([Point(0, 0)])
assert not non_empty.sindex.is_empty
@pytest.mark.parametrize(
"predicate, expected_shape",
[
(None, (2, 471)),
("intersects", (2, 213)),
("within", (2, 213)),
("contains", (2, 0)),
("overlaps", (2, 0)),
("crosses", (2, 0)),
("touches", (2, 0)),
],
)
def test_integration_natural_earth(self, predicate, expected_shape):
"""Tests output sizes for the naturalearth datasets."""
world = read_file(datasets.get_path("naturalearth_lowres"))
capitals = read_file(datasets.get_path("naturalearth_cities"))
res = world.sindex.query(capitals.geometry, predicate)
assert res.shape == expected_shape
@pytest.mark.skipif(not compat.HAS_RTREE, reason="no rtree installed")
def test_old_spatial_index_deprecated():
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
stream = ((i, item.bounds, None) for i, item in enumerate([t1, t2]))
with pytest.warns(FutureWarning):
idx = geopandas.sindex.SpatialIndex(stream)
assert list(idx.intersection((0, 0, 1, 1))) == [0, 1]

View File

@@ -0,0 +1,183 @@
import warnings
import numpy as np
from shapely.geometry import Point, Polygon
import pandas as pd
from pandas import DataFrame, Series
from geopandas import GeoDataFrame, GeoSeries
from geopandas.array import from_shapely
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
import pytest
s1 = GeoSeries(
[
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s2 = GeoSeries(
[
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s3 = Series(
[
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
a = from_shapely(
[
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s4 = Series(a)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s2})
s4 = s1.copy()
s4.crs = 4326
s5 = s2.copy()
s5.crs = 27700
s6 = GeoSeries(
[
Polygon([(0, 3), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
df4 = GeoDataFrame(
{"col1": [1, 2], "geometry": s1.copy(), "geom2": s4.copy(), "geom3": s5.copy()},
crs=3857,
)
df5 = GeoDataFrame(
{"col1": [1, 2], "geometry": s1.copy(), "geom3": s5.copy(), "geom2": s4.copy()},
crs=3857,
)
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_geoseries():
assert_geoseries_equal(s1, s2)
assert_geoseries_equal(s1, s3, check_series_type=False, check_dtype=False)
assert_geoseries_equal(s3, s2, check_series_type=False, check_dtype=False)
assert_geoseries_equal(s1, s4, check_series_type=False)
with pytest.raises(AssertionError) as error:
assert_geoseries_equal(s1, s2, check_less_precise=True)
assert "1 out of 2 geometries are not almost equal" in str(error.value)
assert "not almost equal: [0]" in str(error.value)
with pytest.raises(AssertionError) as error:
assert_geoseries_equal(s2, s6, check_less_precise=False)
assert "1 out of 2 geometries are not equal" in str(error.value)
assert "not equal: [0]" in str(error.value)
def test_geodataframe():
assert_geodataframe_equal(df1, df2)
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df2, check_less_precise=True)
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df2[["geometry", "col1"]])
assert_geodataframe_equal(df1, df2[["geometry", "col1"]], check_like=True)
df3 = df2.copy()
df3.loc[0, "col1"] = 10
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df3)
assert_geodataframe_equal(df5, df4, check_like=True)
df5.geom2.crs = 3857
with pytest.raises(AssertionError):
assert_geodataframe_equal(df5, df4, check_like=True)
def test_equal_nans():
s = GeoSeries([Point(0, 0), np.nan])
assert_geoseries_equal(s, s.copy())
assert_geoseries_equal(s, s.copy(), check_less_precise=True)
def test_no_crs():
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs=None)
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs={})
assert_geodataframe_equal(df1, df2)
def test_ignore_crs_mismatch():
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1.copy()}, crs="EPSG:4326")
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs="EPSG:31370")
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df2)
# assert that with `check_crs=False` the assert passes, and also does not
# generate any warning from comparing both geometries with different crs
with warnings.catch_warnings(record=True) as record:
assert_geodataframe_equal(df1, df2, check_crs=False)
assert len(record) == 0
def test_almost_equal_but_not_equal():
s_origin = GeoSeries([Point(0, 0)])
s_almost_origin = GeoSeries([Point(0.0000001, 0)])
assert_geoseries_equal(s_origin, s_almost_origin, check_less_precise=True)
with pytest.raises(AssertionError):
assert_geoseries_equal(s_origin, s_almost_origin)
def test_geodataframe_no_active_geometry_column():
def create_dataframe():
gdf = GeoDataFrame({"value": [1, 2], "geometry": [Point(1, 1), Point(2, 2)]})
gdf["geom2"] = GeoSeries([Point(3, 3), Point(4, 4)])
return gdf
# no active geometry column (None)
df1 = create_dataframe()
df1._geometry_column_name = None
df2 = create_dataframe()
df2._geometry_column_name = None
assert_geodataframe_equal(df1, df2)
# active geometry column ("geometry") not present
df1 = create_dataframe()[["value", "geom2"]]
df2 = create_dataframe()[["value", "geom2"]]
assert_geodataframe_equal(df1, df2)
df1 = GeoDataFrame(create_dataframe()[["value"]])
df2 = GeoDataFrame(create_dataframe()[["value"]])
assert_geodataframe_equal(df1, df2)
def test_geodataframe_multiindex():
def create_dataframe():
gdf = DataFrame([[Point(0, 0), Point(1, 1)], [Point(2, 2), Point(3, 3)]])
gdf = GeoDataFrame(gdf.astype("geometry"))
gdf.columns = pd.MultiIndex.from_product([["geometry"], [0, 1]])
return gdf
df1 = create_dataframe()
df2 = create_dataframe()
assert_geodataframe_equal(df1, df2)
df1 = create_dataframe()
df1._geometry_column_name = None
df2 = create_dataframe()
df2._geometry_column_name = None
assert_geodataframe_equal(df1, df2)

View File

@@ -0,0 +1,85 @@
from pandas import DataFrame, Series
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
class TestSeries:
def setup_method(self):
N = self.N = 10
r = 0.5
self.pts = GeoSeries([Point(x, y) for x, y in zip(range(N), range(N))])
self.polys = self.pts.buffer(r)
def test_slice(self):
assert type(self.pts[:2]) is GeoSeries
assert type(self.pts[::2]) is GeoSeries
assert type(self.polys[:2]) is GeoSeries
def test_head(self):
assert type(self.pts.head()) is GeoSeries
def test_tail(self):
assert type(self.pts.tail()) is GeoSeries
def test_sort_index(self):
assert type(self.pts.sort_index()) is GeoSeries
def test_loc(self):
assert type(self.pts.loc[5:]) is GeoSeries
def test_iloc(self):
assert type(self.pts.iloc[5:]) is GeoSeries
def test_fancy(self):
idx = (self.pts.index.to_series() % 2).astype(bool)
assert type(self.pts[idx]) is GeoSeries
def test_take(self):
assert type(self.pts.take(list(range(0, self.N, 2)))) is GeoSeries
def test_groupby(self):
for f, s in self.pts.groupby(lambda x: x % 2):
assert type(s) is GeoSeries
class TestDataFrame:
def setup_method(self):
N = 10
self.df = GeoDataFrame(
[
{"geometry": Point(x, y), "value1": x + y, "value2": x * y}
for x, y in zip(range(N), range(N))
]
)
def test_geometry(self):
assert type(self.df.geometry) is GeoSeries
# still GeoSeries if different name
df2 = GeoDataFrame(
{
"coords": [Point(x, y) for x, y in zip(range(5), range(5))],
"nums": range(5),
},
geometry="coords",
)
assert type(df2.geometry) is GeoSeries
assert type(df2["coords"]) is GeoSeries
def test_nongeometry(self):
assert type(self.df["value1"]) is Series
def test_geometry_multiple(self):
assert type(self.df[["geometry", "value1"]]) is GeoDataFrame
def test_nongeometry_multiple(self):
assert type(self.df[["value1", "value2"]]) is DataFrame
def test_slice(self):
assert type(self.df[:2]) is GeoDataFrame
assert type(self.df[::2]) is GeoDataFrame
def test_fancy(self):
idx = (self.df.index.to_series() % 2).astype(bool)
assert type(self.df[idx]) is GeoDataFrame

View File

@@ -0,0 +1,142 @@
import os.path
from pandas import Series
from geopandas import GeoDataFrame
from geopandas.testing import ( # noqa: F401
assert_geoseries_equal,
geom_almost_equals,
geom_equals,
)
HERE = os.path.abspath(os.path.dirname(__file__))
PACKAGE_DIR = os.path.dirname(os.path.dirname(HERE))
# mock not used here, but the import from here is used in other modules
try:
from unittest import mock
except ImportError:
import mock # noqa: F401
def validate_boro_df(df, case_sensitive=False):
"""Tests a GeoDataFrame that has been read in from the nybb dataset."""
assert isinstance(df, GeoDataFrame)
# Make sure all the columns are there and the geometries
# were properly loaded as MultiPolygons
assert len(df) == 5
columns = ("BoroCode", "BoroName", "Shape_Leng", "Shape_Area")
if case_sensitive:
for col in columns:
assert col in df.columns
else:
for col in columns:
assert col.lower() in (dfcol.lower() for dfcol in df.columns)
assert Series(df.geometry.geom_type).dropna().eq("MultiPolygon").all()
def get_srid(df):
"""Return srid from `df.crs`."""
if df.crs is not None:
return df.crs.to_epsg() or 0
return 0
def create_spatialite(con, df):
"""
Return a SpatiaLite connection containing the nybb table.
Parameters
----------
`con`: ``sqlite3.Connection``
`df`: ``GeoDataFrame``
"""
with con:
geom_col = df.geometry.name
srid = get_srid(df)
con.execute(
"CREATE TABLE IF NOT EXISTS nybb "
"( ogc_fid INTEGER PRIMARY KEY"
", borocode INTEGER"
", boroname TEXT"
", shape_leng REAL"
", shape_area REAL"
")"
)
con.execute(
"SELECT AddGeometryColumn(?, ?, ?, ?)",
("nybb", geom_col, srid, df.geom_type.dropna().iat[0].upper()),
)
con.execute("SELECT CreateSpatialIndex(?, ?)", ("nybb", geom_col))
sql_row = "INSERT INTO nybb VALUES(?, ?, ?, ?, ?, GeomFromText(?, ?))"
con.executemany(
sql_row,
(
(
None,
row.BoroCode,
row.BoroName,
row.Shape_Leng,
row.Shape_Area,
row.geometry.wkt if row.geometry else None,
srid,
)
for row in df.itertuples(index=False)
),
)
def create_postgis(con, df, srid=None, geom_col="geom"):
"""
Create a nybb table in the test_geopandas PostGIS database.
Returns a boolean indicating whether the database table was successfully
created
"""
# Try to create the database, skip the db tests if something goes
# wrong
# If you'd like these tests to run, create a database called
# 'test_geopandas' and enable postgis in it:
# > createdb test_geopandas
# > psql -c "CREATE EXTENSION postgis" -d test_geopandas
if srid is not None:
geom_schema = "geometry(MULTIPOLYGON, {})".format(srid)
geom_insert = "ST_SetSRID(ST_GeometryFromText(%s), {})".format(srid)
else:
geom_schema = "geometry"
geom_insert = "ST_GeometryFromText(%s)"
try:
cursor = con.cursor()
cursor.execute("DROP TABLE IF EXISTS nybb;")
sql = """CREATE TABLE nybb (
{geom_col} {geom_schema},
borocode integer,
boroname varchar(40),
shape_leng float,
shape_area float
);""".format(
geom_col=geom_col, geom_schema=geom_schema
)
cursor.execute(sql)
for i, row in df.iterrows():
sql = """INSERT INTO nybb VALUES ({}, %s, %s, %s, %s
);""".format(
geom_insert
)
cursor.execute(
sql,
(
row["geometry"].wkt,
row["BoroCode"],
row["BoroName"],
row["Shape_Leng"],
row["Shape_Area"],
),
)
finally:
cursor.close()
con.commit()