del env py

This commit is contained in:
2024-10-11 17:10:34 -07:00
parent 55b630e6c8
commit b010ab0e6d
19334 changed files with 1 additions and 4003544 deletions

View File

@@ -1,9 +0,0 @@
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
]
}

View File

@@ -1,38 +0,0 @@
import subprocess
import sys
def test_no_additional_imports():
# test that 'import geopandas' does not import any of the optional or
# development dependencies
blacklist = {
"pytest",
"py",
"ipython",
# fiona actually gets imported if installed (but error suppressed until used)
# "fiona",
# "matplotlib", # matplotlib gets imported by pandas, see below
"mapclassify",
"sqlalchemy",
"psycopg",
"psycopg2",
"geopy",
"geoalchemy2",
"matplotlib",
}
code = """
import sys
import geopandas
blacklist = {0!r}
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
if mods:
sys.stderr.write('err: geopandas should not import: {{}}'.format(', '.join(mods)))
sys.exit(len(mods))
""".format(
blacklist
)
call = [sys.executable, "-c", code]
returncode = subprocess.run(call, check=False).returncode
assert returncode == 0

View File

@@ -1,30 +0,0 @@
from geopandas._compat import import_optional_dependency
import pytest
def test_import_optional_dependency_present():
# pandas is not optional, but we know it is present
pandas = import_optional_dependency("pandas")
assert pandas is not None
# module imported normally must be same
import pandas as pd
assert pandas == pd
def test_import_optional_dependency_absent():
with pytest.raises(ImportError, match="Missing optional dependency 'foo'"):
import_optional_dependency("foo")
with pytest.raises(ImportError, match="foo is required"):
import_optional_dependency("foo", extra="foo is required")
@pytest.mark.parametrize(
"bad_import", [["foo"], 0, False, True, {}, {"foo"}, {"foo": "bar"}]
)
def test_import_optional_dependency_invalid(bad_import):
with pytest.raises(ValueError, match="Invalid module name"):
import_optional_dependency(bad_import)

View File

@@ -1,47 +0,0 @@
import geopandas
import pytest
def test_options():
assert "display_precision: " in repr(geopandas.options)
assert set(dir(geopandas.options)) == {
"display_precision",
"use_pygeos",
"io_engine",
}
with pytest.raises(AttributeError):
geopandas.options.non_existing_option
with pytest.raises(AttributeError):
geopandas.options.non_existing_option = 10
def test_options_display_precision():
assert geopandas.options.display_precision is None
geopandas.options.display_precision = 5
assert geopandas.options.display_precision == 5
with pytest.raises(ValueError):
geopandas.options.display_precision = "abc"
with pytest.raises(ValueError):
geopandas.options.display_precision = -1
geopandas.options.display_precision = None
def test_options_io_engine():
assert geopandas.options.io_engine is None
geopandas.options.io_engine = "pyogrio"
assert geopandas.options.io_engine == "pyogrio"
with pytest.raises(ValueError):
geopandas.options.io_engine = "abc"
with pytest.raises(ValueError):
geopandas.options.io_engine = -1
geopandas.options.io_engine = None

View File

@@ -1,747 +0,0 @@
import random
import warnings
import numpy as np
import pandas as pd
from shapely.geometry import LineString, Point, Polygon
from geopandas import GeoDataFrame, GeoSeries, points_from_xy, read_file
from geopandas.array import GeometryArray, from_shapely, from_wkb, from_wkt
import pytest
from geopandas.testing import assert_geodataframe_equal
pyproj = pytest.importorskip("pyproj")
def _create_df(x, y=None, crs=None):
y = y or x
x = np.asarray(x)
y = np.asarray(y)
return GeoDataFrame(
{"geometry": points_from_xy(x, y), "value1": x + y, "value2": x * y}, crs=crs
)
def df_epsg26918():
# EPSG:26918
# Center coordinates
# -1683723.64 6689139.23
return _create_df(
x=range(-1683723, -1683723 + 10, 1),
y=range(6689139, 6689139 + 10, 1),
crs="epsg:26918",
)
def test_to_crs_transform():
df = df_epsg26918()
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_transform__missing_data():
# https://github.com/geopandas/geopandas/issues/1573
df = df_epsg26918()
df.loc[3, "geometry"] = None
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_transform__empty_data():
df = df_epsg26918().iloc[:0]
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_inplace():
df = df_epsg26918()
lonlat = df.to_crs(epsg=4326)
df.to_crs(epsg=4326, inplace=True)
assert_geodataframe_equal(df, lonlat, check_less_precise=True)
def test_to_crs_geo_column_name():
# Test to_crs() with different geometry column name (GH#339)
df = df_epsg26918()
df = df.rename(columns={"geometry": "geom"})
df.set_geometry("geom", inplace=True)
lonlat = df.to_crs(epsg=4326)
utm = lonlat.to_crs(epsg=26918)
assert lonlat.geometry.name == "geom"
assert utm.geometry.name == "geom"
assert_geodataframe_equal(df, utm, check_less_precise=True)
def test_to_crs_dimension_z():
# preserve z dimension
arr = points_from_xy([1, 2], [2, 3], [3, 4], crs=4326)
assert arr.has_z.all()
result = arr.to_crs(epsg=3857)
assert result.has_z.all()
# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
def test_to_crs_dimension_mixed():
s = GeoSeries([Point(1, 2), LineString([(1, 2, 3), (4, 5, 6)])], crs=2056)
result = s.to_crs(epsg=4326)
assert not result[0].is_empty
assert result.has_z.tolist() == [False, True]
roundtrip = result.to_crs(epsg=2056)
# TODO replace with assert_geoseries_equal once we expose tolerance keyword
# assert_geoseries_equal(roundtrip, s, check_less_precise=True)
for a, b in zip(roundtrip, s):
np.testing.assert_allclose(a.coords[:], b.coords[:], atol=0.01)
# -----------------------------------------------------------------------------
# Test different supported formats for CRS specification
@pytest.fixture(
params=[
4326,
"epsg:4326",
pytest.param(
{"init": "epsg:4326"},
),
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs",
{"proj": "latlong", "ellps": "WGS84", "datum": "WGS84", "no_defs": True},
],
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
)
def epsg4326(request):
if isinstance(request.param, int):
return {"epsg": request.param}
return {"crs": request.param}
@pytest.fixture(
params=[
26918,
"epsg:26918",
pytest.param(
{"init": "epsg:26918", "no_defs": True},
),
"+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ",
{"proj": "utm", "zone": 18, "datum": "NAD83", "units": "m", "no_defs": True},
],
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
)
def epsg26918(request):
if isinstance(request.param, int):
return {"epsg": request.param}
return {"crs": request.param}
@pytest.mark.filterwarnings("ignore:'\\+init:DeprecationWarning")
@pytest.mark.filterwarnings("ignore:'\\+init:FutureWarning")
def test_transform2(epsg4326, epsg26918):
# with PROJ >= 7, the transformation using EPSG code vs proj4 string is
# slightly different due to use of grid files or not -> turn off network
# to not use grid files at all for this test
pyproj.network.set_network_enabled(False)
df = df_epsg26918()
lonlat = df.to_crs(**epsg4326)
utm = lonlat.to_crs(**epsg26918)
# can't check for CRS equality, as the formats differ although representing
# the same CRS
assert_geodataframe_equal(df, utm, check_less_precise=True, check_crs=False)
# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
def test_crs_axis_order__always_xy():
df = GeoDataFrame(geometry=[Point(-1683723, 6689139)], crs="epsg:26918")
lonlat = df.to_crs("epsg:4326")
test_lonlat = GeoDataFrame(
geometry=[Point(-110.1399901, 55.1350011)], crs="epsg:4326"
)
assert_geodataframe_equal(lonlat, test_lonlat, check_less_precise=True)
def test_skip_exact_same():
df = df_epsg26918()
utm = df.to_crs(df.crs)
assert_geodataframe_equal(df, utm, check_less_precise=True)
# Test CRS on GeometryArray level
class TestGeometryArrayCRS:
def setup_method(self):
self.osgb = pyproj.CRS(27700)
self.wgs = pyproj.CRS(4326)
self.geoms = [Point(0, 0), Point(1, 1)]
self.polys = [
Polygon([(random.random(), random.random()) for i in range(3)])
for _ in range(10)
]
self.arr = from_shapely(self.polys, crs=27700)
def test_array(self):
arr = from_shapely(self.geoms)
arr.crs = 27700
assert arr.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
assert arr.crs == self.osgb
arr = GeometryArray(arr)
assert arr.crs == self.osgb
arr = GeometryArray(arr, crs=4326)
assert arr.crs == self.wgs
def test_series(self):
s = GeoSeries(crs=27700)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
# manually change CRS
s = s.set_crs(4326, allow_override=True)
assert s.crs == self.wgs
assert s.values.crs == self.wgs
s = GeoSeries(self.geoms, crs=27700)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(arr)
assert s.crs == self.osgb
assert s.values.crs == self.osgb
with pytest.raises(
ValueError,
match="CRS mismatch between CRS of the passed geometries and 'crs'",
):
s = GeoSeries(arr, crs=4326)
assert s.crs == self.osgb
def test_dataframe(self):
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame(geometry=arr)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame(geometry=s)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# different passed CRS than array CRS is now an error
match_str = "CRS mismatch between CRS of the passed geometries and 'crs'"
with pytest.raises(ValueError, match=match_str):
df = GeoDataFrame(geometry=s, crs=4326)
with pytest.raises(ValueError, match=match_str):
GeoDataFrame(geometry=s, crs=4326)
with pytest.raises(ValueError, match=match_str):
GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326)
with pytest.raises(ValueError, match=match_str):
GeoDataFrame(df, crs=4326).crs
# manually change CRS
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame(geometry=s)
df = df.set_crs(crs="epsg:4326", allow_override=True)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
GeoDataFrame(self.geoms, columns=["geom"], crs=27700)
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
GeoDataFrame(crs=27700)
df = GeoDataFrame(self.geoms, columns=["geom"])
df = df.set_geometry("geom", crs=27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
assert df.geom.crs == self.osgb
assert df.geom.values.crs == self.osgb
df = GeoDataFrame(geometry=self.geoms, crs=27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# new geometry with set CRS has priority over GDF CRS
df = GeoDataFrame(geometry=self.geoms, crs=27700)
df = df.set_geometry(self.geoms, crs=4326)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame()
df = df.set_geometry(s)
assert df._geometry_column_name == "geometry"
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame()
df = df.set_geometry(arr)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
arr = from_shapely(self.geoms, crs=4326)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr})
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
# geometry column name None on init
df = GeoDataFrame({"geometry": [0, 1]})
with pytest.raises(
ValueError,
match="Assigning CRS to a GeoDataFrame without a geometry",
):
df.crs = 27700
# geometry column without geometry
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "Geometry column does not contain geometry", UserWarning
)
df = GeoDataFrame({"geometry": [Point(0, 1)]}).assign(geometry=[0])
with pytest.raises(
ValueError,
match="Assigning CRS to a GeoDataFrame without an active geometry",
):
df.crs = 27700
with pytest.raises(
AttributeError,
match="The CRS attribute of a GeoDataFrame without an active",
):
assert df.crs == self.osgb
def test_dataframe_getitem_without_geometry_column(self):
df = GeoDataFrame({"col": range(10)}, geometry=self.arr)
df["geom2"] = df.geometry.centroid
subset = df[["col", "geom2"]]
with pytest.raises(
AttributeError,
match="The CRS attribute of a GeoDataFrame without an active",
):
assert subset.crs == self.osgb
def test_dataframe_setitem(self):
# new geometry CRS has priority over GDF CRS
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
df = GeoDataFrame()
with pytest.warns(
FutureWarning, match="You are adding a column named 'geometry'"
):
df["geometry"] = s
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame()
with pytest.warns(
FutureWarning, match="You are adding a column named 'geometry'"
):
df["geometry"] = arr
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# test to_crs case (GH1960)
arr = from_shapely(self.geoms)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
df["geometry"] = df["geometry"].to_crs(27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# test changing geometry crs not in the geometry column doesn't change the crs
arr = from_shapely(self.geoms)
df = GeoDataFrame(
{"col1": [1, 2], "geometry": arr, "other_geom": arr}, crs=4326
)
df["other_geom"] = from_shapely(self.geoms, crs=27700)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df["geometry"].crs == self.wgs
assert df["other_geom"].crs == self.osgb
def test_dataframe_setitem_without_geometry_column(self):
arr = from_shapely(self.geoms)
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
# override geometry with non geometry
with pytest.warns(UserWarning):
df["geometry"] = 1
# assigning a list of geometry object doesn't have cached access to 4326
df["geometry"] = self.geoms
assert df.crs is None
@pytest.mark.parametrize(
"scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])]
)
def test_scalar(self, scalar):
df = GeoDataFrame()
with pytest.warns(
FutureWarning, match="You are adding a column named 'geometry'"
):
df["geometry"] = scalar
df = df.set_crs(4326)
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
@pytest.mark.filterwarnings("ignore:Accessing CRS")
def test_crs_with_no_geom_fails(self):
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
df = GeoDataFrame()
df.crs = 4326
def test_read_file(self, nybb_filename):
df = read_file(nybb_filename)
assert df.crs == pyproj.CRS(2263)
assert df.geometry.crs == pyproj.CRS(2263)
assert df.geometry.values.crs == pyproj.CRS(2263)
def test_multiple_geoms(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
assert df.col1.crs == self.wgs
assert df.col1.values.crs == self.wgs
def test_multiple_geoms_set_geom(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
df = df.set_geometry("col1")
assert df.crs == self.wgs
assert df.geometry.crs == self.wgs
assert df.geometry.values.crs == self.wgs
assert df["geometry"].crs == self.osgb
assert df["geometry"].values.crs == self.osgb
def test_assign_cols(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
df["geom2"] = s
df["geom3"] = s.values
df["geom4"] = from_shapely(self.geoms)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
assert df.geom2.crs == self.wgs
assert df.geom2.values.crs == self.wgs
assert df.geom3.crs == self.wgs
assert df.geom3.values.crs == self.wgs
assert df.geom4.crs is None
assert df.geom4.values.crs is None
def test_copy(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
arr_copy = arr.copy()
assert arr_copy.crs == arr.crs
s_copy = s.copy()
assert s_copy.crs == s.crs
assert s_copy.values.crs == s.values.crs
df_copy = df.copy()
assert df_copy.crs == df.crs
assert df_copy.geometry.crs == df.geometry.crs
assert df_copy.geometry.values.crs == df.geometry.values.crs
assert df_copy.col1.crs == df.col1.crs
assert df_copy.col1.values.crs == df.col1.values.crs
def test_rename(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
df = df.rename(columns={"geometry": "geom"}).set_geometry("geom")
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
df = df.rename_geometry("geom2")
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
df = df.rename(columns={"col1": "column1"})
assert df.column1.crs == self.wgs
assert df.column1.values.crs == self.wgs
def test_geoseries_to_crs(self):
s = GeoSeries(self.geoms, crs=27700)
s = s.to_crs(4326)
assert s.crs == self.wgs
assert s.values.crs == self.wgs
df = GeoDataFrame(geometry=s)
assert df.crs == self.wgs
df = df.to_crs(27700)
assert df.crs == self.osgb
assert df.geometry.crs == self.osgb
assert df.geometry.values.crs == self.osgb
# make sure that only active geometry is transformed
arr = from_shapely(self.geoms, crs=4326)
df["col1"] = arr
df = df.to_crs(3857)
assert df.col1.crs == self.wgs
assert df.col1.values.crs == self.wgs
def test_array_to_crs(self):
arr = from_shapely(self.geoms, crs=27700)
arr = arr.to_crs(4326)
assert arr.crs == self.wgs
def test_from_shapely(self):
arr = from_shapely(self.geoms, crs=27700)
assert arr.crs == self.osgb
def test_from_wkb(self):
L_wkb = [p.wkb for p in self.geoms]
arr = from_wkb(L_wkb, crs=27700)
assert arr.crs == self.osgb
def test_from_wkt(self):
L_wkt = [p.wkt for p in self.geoms]
arr = from_wkt(L_wkt, crs=27700)
assert arr.crs == self.osgb
def test_points_from_xy(self):
df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)])
arr = points_from_xy(df["x"], df["y"], crs=27700)
assert arr.crs == self.osgb
# setting CRS in GeoSeries should not set it in passed array without CRS
def test_original(self):
arr = from_shapely(self.geoms)
s = GeoSeries(arr, crs=27700)
assert arr.crs is None
assert s.crs == self.osgb
def test_ops(self):
arr = self.arr
bound = arr.boundary
assert bound.crs == self.osgb
cent = arr.centroid
assert cent.crs == self.osgb
hull = arr.convex_hull
assert hull.crs == self.osgb
envelope = arr.envelope
assert envelope.crs == self.osgb
exterior = arr.exterior
assert exterior.crs == self.osgb
representative_point = arr.representative_point()
assert representative_point.crs == self.osgb
def test_binary_ops(self):
arr = self.arr
quads = []
while len(quads) < 10:
geom = Polygon([(random.random(), random.random()) for i in range(4)])
if geom.is_valid:
quads.append(geom)
arr2 = from_shapely(quads, crs=27700)
difference = arr.difference(arr2)
assert difference.crs == self.osgb
intersection = arr.intersection(arr2)
assert intersection.crs == self.osgb
symmetric_difference = arr.symmetric_difference(arr2)
assert symmetric_difference.crs == self.osgb
union = arr.union(arr2)
assert union.crs == self.osgb
def test_other(self):
arr = self.arr
buffer = arr.buffer(5)
assert buffer.crs == self.osgb
interpolate = arr.exterior.interpolate(0.1)
assert interpolate.crs == self.osgb
simplify = arr.simplify(5)
assert simplify.crs == self.osgb
@pytest.mark.parametrize(
"attr, arg",
[
("affine_transform", ([0, 1, 1, 0, 0, 0],)),
("translate", ()),
("rotate", (10,)),
("scale", ()),
("skew", ()),
],
)
def test_affinity_methods(self, attr, arg):
result = getattr(self.arr, attr)(*arg)
assert result.crs == self.osgb
def test_slice(self):
s = GeoSeries(self.arr, crs=27700)
assert s.iloc[1:].values.crs == self.osgb
df = GeoDataFrame({"col1": self.arr}, geometry=s)
assert df.iloc[1:].geometry.values.crs == self.osgb
assert df.iloc[1:].col1.values.crs == self.osgb
def test_concat(self):
s = GeoSeries(self.arr, crs=27700)
assert pd.concat([s, s]).values.crs == self.osgb
df = GeoDataFrame({"col1": from_shapely(self.geoms, crs=4326)}, geometry=s)
assert pd.concat([df, df]).geometry.values.crs == self.osgb
assert pd.concat([df, df]).col1.values.crs == self.wgs
def test_merge(self):
arr = from_shapely(self.geoms, crs=27700)
s = GeoSeries(self.geoms, crs=4326)
df = GeoDataFrame({"col1": s}, geometry=arr)
df2 = GeoDataFrame({"col2": s}, geometry=arr).rename_geometry("geom")
merged = df.merge(df2, left_index=True, right_index=True)
assert merged.col1.values.crs == self.wgs
assert merged.geometry.values.crs == self.osgb
assert merged.col2.values.crs == self.wgs
assert merged.geom.values.crs == self.osgb
assert merged.crs == self.osgb
# make sure that geometry column from list has CRS (__setitem__)
def test_setitem_geometry(self):
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
df["geometry"] = list(df.geometry)
assert df.geometry.values.crs == self.osgb
df2 = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
df2["geometry"] = from_shapely(self.geoms, crs=4326)
assert df2.geometry.values.crs == self.wgs
def test_astype(self):
arr = from_shapely(self.geoms, crs=27700)
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
df2 = df.astype({"col1": str})
assert df2.crs == self.osgb
def test_apply(self):
s = GeoSeries(self.arr)
assert s.crs == 27700
# apply preserves the CRS if the result is a GeoSeries
result = s.apply(lambda x: x.centroid)
assert result.crs == 27700
def test_apply_geodataframe(self):
df = GeoDataFrame({"col1": [0, 1]}, geometry=self.geoms, crs=27700)
assert df.crs == 27700
# apply preserves the CRS if the result is a GeoDataFrame
result = df.apply(lambda col: col, axis=0)
assert result.crs == 27700
result = df.apply(lambda row: row, axis=1)
assert result.crs == 27700
class TestSetCRS:
@pytest.mark.parametrize(
"constructor",
[
lambda geoms, crs: GeoSeries(geoms, crs=crs),
lambda geoms, crs: GeoDataFrame(geometry=geoms, crs=crs),
],
ids=["geoseries", "geodataframe"],
)
def test_set_crs(self, constructor):
naive = constructor([Point(0, 0), Point(1, 1)], crs=None)
assert naive.crs is None
# by default returns a copy
result = naive.set_crs(crs="EPSG:4326")
assert result.crs == "EPSG:4326"
assert naive.crs is None
result = naive.set_crs(epsg=4326)
assert result.crs == "EPSG:4326"
assert naive.crs is None
# with inplace=True
result = naive.set_crs(crs="EPSG:4326", inplace=True)
assert result is naive
assert result.crs == naive.crs == "EPSG:4326"
# raise for non-naive when crs would be overridden
non_naive = constructor([Point(0, 0), Point(1, 1)], crs="EPSG:4326")
assert non_naive.crs == "EPSG:4326"
with pytest.raises(ValueError, match="already has a CRS"):
non_naive.set_crs("EPSG:3857")
# allow for equal crs
result = non_naive.set_crs("EPSG:4326")
assert result.crs == "EPSG:4326"
# replace with allow_override=True
result = non_naive.set_crs("EPSG:3857", allow_override=True)
assert non_naive.crs == "EPSG:4326"
assert result.crs == "EPSG:3857"
result = non_naive.set_crs("EPSG:3857", allow_override=True, inplace=True)
assert non_naive.crs == "EPSG:3857"
assert result.crs == "EPSG:3857"
# set CRS to None
result = non_naive.set_crs(crs=None, allow_override=True)
assert result.crs is None
assert non_naive.crs == "EPSG:3857"

View File

@@ -1,15 +0,0 @@
from geopandas import GeoDataFrame, read_file
from geopandas.datasets import get_path
import pytest
@pytest.mark.parametrize(
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb", "foo"]
)
def test_read_paths(test_dataset):
with pytest.raises(
AttributeError,
match=r"The geopandas\.dataset has been deprecated and was removed",
):
assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)

View File

@@ -1,87 +0,0 @@
from textwrap import dedent
from geopandas._decorator import doc
@doc(method="cumsum", operation="sum")
def cumsum(whatever):
"""
This is the {method} method.
It computes the cumulative {operation}.
"""
@doc(
cumsum,
dedent(
"""
Examples
--------
>>> cumavg([1, 2, 3])
2
"""
),
method="cumavg",
operation="average",
)
def cumavg(whatever): ...
@doc(cumsum, method="cummax", operation="maximum")
def cummax(whatever): ...
@doc(cummax, method="cummin", operation="minimum")
def cummin(whatever): ...
def test_docstring_formatting():
docstr = dedent(
"""
This is the cumsum method.
It computes the cumulative sum.
"""
)
assert cumsum.__doc__ == docstr
def test_docstring_appending():
docstr = dedent(
"""
This is the cumavg method.
It computes the cumulative average.
Examples
--------
>>> cumavg([1, 2, 3])
2
"""
)
assert cumavg.__doc__ == docstr
def test_doc_template_from_func():
docstr = dedent(
"""
This is the cummax method.
It computes the cumulative maximum.
"""
)
assert cummax.__doc__ == docstr
def test_inherit_doc_template():
docstr = dedent(
"""
This is the cummin method.
It computes the cumulative minimum.
"""
)
assert cummin.__doc__ == docstr

View File

@@ -1,372 +0,0 @@
import warnings
import numpy as np
import pandas as pd
import geopandas
from geopandas import GeoDataFrame, read_file
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_15, PANDAS_GE_20, PANDAS_GE_30
import pytest
from geopandas.testing import assert_geodataframe_equal, geom_almost_equals
from pandas.testing import assert_frame_equal
@pytest.fixture
def nybb_polydf(nybb_filename):
nybb_polydf = read_file(nybb_filename)
nybb_polydf = nybb_polydf[["geometry", "BoroName", "BoroCode"]]
nybb_polydf = nybb_polydf.rename(columns={"geometry": "myshapes"})
nybb_polydf = nybb_polydf.set_geometry("myshapes")
nybb_polydf["manhattan_bronx"] = 5
nybb_polydf.loc[3:4, "manhattan_bronx"] = 6
nybb_polydf["BoroCode"] = nybb_polydf["BoroCode"].astype("int64")
return nybb_polydf
@pytest.fixture
def merged_shapes(nybb_polydf):
# Merged geometry
manhattan_bronx = nybb_polydf.loc[3:4]
others = nybb_polydf.loc[0:2]
collapsed = [others.geometry.union_all(), manhattan_bronx.geometry.union_all()]
merged_shapes = GeoDataFrame(
{"myshapes": collapsed},
geometry="myshapes",
index=pd.Index([5, 6], name="manhattan_bronx"),
crs=nybb_polydf.crs,
)
return merged_shapes
@pytest.fixture
def first(merged_shapes):
first = merged_shapes.copy()
first["BoroName"] = ["Staten Island", "Manhattan"]
first["BoroCode"] = [5, 1]
return first
@pytest.fixture
def expected_mean(merged_shapes):
test_mean = merged_shapes.copy()
test_mean["BoroCode"] = [4, 1.5]
return test_mean
def test_geom_dissolve(nybb_polydf, first):
test = nybb_polydf.dissolve("manhattan_bronx")
assert test.geometry.name == "myshapes"
assert geom_almost_equals(test, first)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_dissolve_retains_existing_crs(nybb_polydf):
assert nybb_polydf.crs is not None
test = nybb_polydf.dissolve("manhattan_bronx")
assert test.crs is not None
def test_dissolve_retains_nonexisting_crs(nybb_polydf):
nybb_polydf.geometry.array.crs = None
test = nybb_polydf.dissolve("manhattan_bronx")
assert test.crs is None
def test_first_dissolve(nybb_polydf, first):
test = nybb_polydf.dissolve("manhattan_bronx")
assert_frame_equal(first, test, check_column_type=False)
def test_mean_dissolve(nybb_polydf, first, expected_mean):
if not PANDAS_GE_15:
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
elif PANDAS_GE_15 and not PANDAS_GE_20:
with pytest.warns(FutureWarning, match=".*used in dissolve is deprecated.*"):
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
else: # pandas 2.0
test = nybb_polydf.dissolve(
"manhattan_bronx", aggfunc="mean", numeric_only=True
)
# for non pandas "mean", numeric only cannot be applied. Drop columns manually
test2 = nybb_polydf.drop(columns=["BoroName"]).dissolve(
"manhattan_bronx", aggfunc="mean"
)
assert_frame_equal(expected_mean, test, check_column_type=False)
assert_frame_equal(expected_mean, test2, check_column_type=False)
@pytest.mark.skipif(not PANDAS_GE_15 or PANDAS_GE_20, reason="warning for pandas 1.5.x")
def test_mean_dissolve_warning_capture(nybb_polydf, first, expected_mean):
with pytest.warns(
FutureWarning,
match=".*used in dissolve is deprecated.*",
):
nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
# test no warning for aggfunc first which doesn't have numeric only semantics
with warnings.catch_warnings():
warnings.simplefilter("error")
nybb_polydf.dissolve("manhattan_bronx", aggfunc="first")
def test_dissolve_emits_other_warnings(nybb_polydf):
# we only do something special for pandas 1.5.x, but expect this
# test to be true on any version
def sum_and_warn(group):
warnings.warn("foo") # noqa: B028
if PANDAS_GE_20:
return group.sum(numeric_only=False)
else:
return group.sum()
with pytest.warns(UserWarning, match="foo"):
nybb_polydf.dissolve("manhattan_bronx", aggfunc=sum_and_warn)
def test_multicolumn_dissolve(nybb_polydf, first):
multi = nybb_polydf.copy()
multi["dup_col"] = multi.manhattan_bronx
multi_test = multi.dissolve(["manhattan_bronx", "dup_col"], aggfunc="first")
first_copy = first.copy()
first_copy["dup_col"] = first_copy.index
first_copy = first_copy.set_index([first_copy.index, "dup_col"])
assert_frame_equal(multi_test, first_copy, check_column_type=False)
def test_reset_index(nybb_polydf, first):
test = nybb_polydf.dissolve("manhattan_bronx", as_index=False)
comparison = first.reset_index()
assert_frame_equal(comparison, test, check_column_type=False)
def test_dissolve_none(nybb_polydf):
test = nybb_polydf.dissolve(by=None)
expected = GeoDataFrame(
{
nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
"BoroName": ["Staten Island"],
"BoroCode": [5],
"manhattan_bronx": [5],
},
geometry=nybb_polydf.geometry.name,
crs=nybb_polydf.crs,
)
assert_frame_equal(expected, test, check_column_type=False)
def test_dissolve_none_mean(nybb_polydf):
test = nybb_polydf.dissolve(aggfunc="mean", numeric_only=True)
expected = GeoDataFrame(
{
nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
"BoroCode": [3.0],
"manhattan_bronx": [5.4],
},
geometry=nybb_polydf.geometry.name,
crs=nybb_polydf.crs,
)
assert_frame_equal(expected, test, check_column_type=False)
def test_dissolve_level():
gdf = geopandas.GeoDataFrame(
{
"a": [1, 1, 2, 2],
"b": [3, 4, 4, 4],
"c": [3, 4, 5, 6],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
),
}
).set_index(["a", "b", "c"])
expected_a = geopandas.GeoDataFrame(
{
"a": [1, 2],
"geometry": geopandas.array.from_wkt(
["MULTIPOINT (0 0, 1 1)", "MULTIPOINT (2 2, 3 3)"]
),
}
).set_index("a")
expected_b = geopandas.GeoDataFrame(
{
"b": [3, 4],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "MULTIPOINT (1 1, 2 2, 3 3)"]
),
}
).set_index("b")
expected_ab = geopandas.GeoDataFrame(
{
"a": [1, 1, 2],
"b": [3, 4, 4],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "MULTIPOINT (2 2, 3 3)"]
),
}
).set_index(["a", "b"])
assert_frame_equal(expected_a, gdf.dissolve(level=0))
assert_frame_equal(expected_a, gdf.dissolve(level="a"))
assert_frame_equal(expected_b, gdf.dissolve(level=1))
assert_frame_equal(expected_b, gdf.dissolve(level="b"))
assert_frame_equal(expected_ab, gdf.dissolve(level=[0, 1]))
assert_frame_equal(expected_ab, gdf.dissolve(level=["a", "b"]))
def test_dissolve_sort():
gdf = geopandas.GeoDataFrame(
{
"a": [2, 1, 1],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
),
}
)
expected_unsorted = geopandas.GeoDataFrame(
{
"a": [2, 1],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "MULTIPOINT (1 1, 2 2)"]
),
}
).set_index("a")
expected_sorted = expected_unsorted.sort_index()
assert_frame_equal(expected_sorted, gdf.dissolve("a"))
assert_frame_equal(expected_unsorted, gdf.dissolve("a", sort=False))
def test_dissolve_categorical():
gdf = geopandas.GeoDataFrame(
{
"cat": pd.Categorical(["a", "a", "b", "b"]),
"noncat": [1, 1, 1, 2],
"to_agg": [1, 2, 3, 4],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
),
}
)
# when observed=False we get an additional observation
# that wasn't in the original data
none_val = "GEOMETRYCOLLECTION EMPTY" if PANDAS_GE_30 else None
expected_gdf_observed_false = geopandas.GeoDataFrame(
{
"cat": pd.Categorical(["a", "a", "b", "b"]),
"noncat": [1, 2, 1, 2],
"geometry": geopandas.array.from_wkt(
[
"MULTIPOINT (0 0, 1 1)",
none_val,
"POINT (2 2)",
"POINT (3 3)",
]
),
"to_agg": [1, None, 3, 4],
}
).set_index(["cat", "noncat"])
# when observed=True we do not get any additional observations
expected_gdf_observed_true = geopandas.GeoDataFrame(
{
"cat": pd.Categorical(["a", "b", "b"]),
"noncat": [1, 1, 2],
"geometry": geopandas.array.from_wkt(
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)", "POINT (3 3)"]
),
"to_agg": [1, 3, 4],
}
).set_index(["cat", "noncat"])
assert_frame_equal(expected_gdf_observed_false, gdf.dissolve(["cat", "noncat"]))
assert_frame_equal(
expected_gdf_observed_true, gdf.dissolve(["cat", "noncat"], observed=True)
)
def test_dissolve_dropna():
gdf = geopandas.GeoDataFrame(
{
"a": [1, 1, None],
"geometry": geopandas.array.from_wkt(
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
),
}
)
expected_with_na = geopandas.GeoDataFrame(
{
"a": [1.0, np.nan],
"geometry": geopandas.array.from_wkt(
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)"]
),
}
).set_index("a")
expected_no_na = geopandas.GeoDataFrame(
{
"a": [1.0],
"geometry": geopandas.array.from_wkt(["MULTIPOINT (0 0, 1 1)"]),
}
).set_index("a")
assert_frame_equal(expected_with_na, gdf.dissolve("a", dropna=False))
assert_frame_equal(expected_no_na, gdf.dissolve("a"))
def test_dissolve_dropna_warn(nybb_polydf):
# No warning with default params
with warnings.catch_warnings(record=True) as record:
nybb_polydf.dissolve()
for r in record:
assert "dropna kwarg is not supported" not in str(r.message)
def test_dissolve_multi_agg(nybb_polydf, merged_shapes):
merged_shapes[("BoroCode", "min")] = [3, 1]
merged_shapes[("BoroCode", "max")] = [5, 2]
merged_shapes[("BoroName", "count")] = [3, 2]
with warnings.catch_warnings(record=True) as record:
test = nybb_polydf.dissolve(
by="manhattan_bronx",
aggfunc={
"BoroCode": ["min", "max"],
"BoroName": "count",
},
)
assert_geodataframe_equal(test, merged_shapes)
assert len(record) == 0
def test_coverage_dissolve(nybb_polydf):
manhattan_bronx = nybb_polydf.loc[3:4]
others = nybb_polydf.loc[0:2]
collapsed = [
others.geometry.union_all(method="coverage"),
manhattan_bronx.geometry.union_all(method="coverage"),
]
merged_shapes = GeoDataFrame(
{"myshapes": collapsed},
geometry="myshapes",
index=pd.Index([5, 6], name="manhattan_bronx"),
crs=nybb_polydf.crs,
)
merged_shapes["BoroName"] = ["Staten Island", "Manhattan"]
merged_shapes["BoroCode"] = [5, 1]
test = nybb_polydf.dissolve("manhattan_bronx", method="coverage")
assert_frame_equal(merged_shapes, test, check_column_type=False)

View File

@@ -1,648 +0,0 @@
"""
This file contains a minimal set of tests for compliance with the extension
array interface test suite (by inheriting the pandas test suite), and should
contain no other tests.
Other tests (eg related to the spatial functionality or integration
with GeoSeries/GeoDataFrame) should be added to test_array.py and others.
The tests in this file are inherited from the BaseExtensionTests, and only
minimal tweaks should be applied to get the tests passing (by overwriting a
parent method).
A set of fixtures are defined to provide data for the tests (the fixtures
expected to be available to pytest by the inherited pandas tests).
"""
import itertools
import operator
import numpy as np
import pandas as pd
from pandas.tests.extension import base as extension_tests
import shapely.geometry
from shapely.geometry import Point
from geopandas._compat import PANDAS_GE_15, PANDAS_GE_21, PANDAS_GE_22
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
import pytest
from pandas.testing import assert_frame_equal, assert_series_equal
# -----------------------------------------------------------------------------
# Compat with extension tests in older pandas versions
# -----------------------------------------------------------------------------
not_yet_implemented = pytest.mark.skip(reason="Not yet implemented")
no_minmax = pytest.mark.skip(reason="Min/max not supported")
# -----------------------------------------------------------------------------
# Required fixtures
# -----------------------------------------------------------------------------
@pytest.fixture
def dtype():
"""A fixture providing the ExtensionDtype to validate."""
return GeometryDtype()
def make_data():
a = np.empty(100, dtype=object)
a[:] = [shapely.geometry.Point(i, i) for i in range(100)]
ga = from_shapely(a)
return ga
@pytest.fixture
def data():
"""Length-100 array for this type.
* data[0] and data[1] should both be non missing
* data[0] and data[1] should not be equal
"""
return make_data()
@pytest.fixture
def data_for_twos():
"""Length-100 array in which all the elements are two."""
raise NotImplementedError
@pytest.fixture
def data_missing():
"""Length-2 array with [NA, Valid]"""
return from_shapely([None, shapely.geometry.Point(1, 1)])
@pytest.fixture(params=["data", "data_missing"])
def all_data(request, data, data_missing):
"""Parametrized fixture giving 'data' and 'data_missing'"""
if request.param == "data":
return data
elif request.param == "data_missing":
return data_missing
@pytest.fixture
def data_repeated(data):
"""
Generate many datasets.
Parameters
----------
data : fixture implementing `data`
Returns
-------
Callable[[int], Generator]:
A callable that takes a `count` argument and
returns a generator yielding `count` datasets.
"""
def gen(count):
for _ in range(count):
yield data
return gen
@pytest.fixture
def data_for_sorting():
"""Length-3 array with a known sort order.
This should be three items [B, C, A] with
A < B < C
"""
return from_shapely([Point(0, 1), Point(1, 1), Point(0, 0)])
@pytest.fixture
def data_missing_for_sorting():
"""Length-3 array with a known sort order.
This should be three items [B, NA, A] with
A < B and NA missing.
"""
return from_shapely([Point(1, 2), None, Point(0, 0)])
@pytest.fixture
def na_cmp():
"""Binary operator for comparing NA values.
Should return a function of two arguments that returns
True if both arguments are (scalar) NA for your type.
By default, uses ``operator.or``
"""
return lambda x, y: x is None and y is None
@pytest.fixture
def na_value():
"""The scalar missing value for this type. Default 'None'"""
return None
@pytest.fixture
def data_for_grouping():
"""Data for factorization, grouping, and unique tests.
Expected to be like [B, B, NA, NA, A, A, B, C]
Where A < B < C and NA is missing
"""
return from_shapely(
[
shapely.geometry.Point(1, 1),
shapely.geometry.Point(1, 1),
None,
None,
shapely.geometry.Point(0, 0),
shapely.geometry.Point(0, 0),
shapely.geometry.Point(1, 1),
shapely.geometry.Point(2, 2),
]
)
@pytest.fixture(params=[True, False])
def box_in_series(request):
"""Whether to box the data in a Series"""
return request.param
@pytest.fixture(
params=[
lambda x: 1,
lambda x: [1] * len(x),
lambda x: pd.Series([1] * len(x)),
lambda x: x,
],
ids=["scalar", "list", "series", "object"],
)
def groupby_apply_op(request):
"""
Functions to test groupby.apply().
"""
return request.param
@pytest.fixture(params=[True, False])
def as_frame(request):
"""
Boolean fixture to support Series and Series.to_frame() comparison testing.
"""
return request.param
@pytest.fixture(params=[True, False])
def as_series(request):
"""
Boolean fixture to support arr and Series(arr) comparison testing.
"""
return request.param
@pytest.fixture(params=[True, False])
def use_numpy(request):
"""
Boolean fixture to support comparison testing of ExtensionDtype array
and numpy array.
"""
return request.param
@pytest.fixture(params=["ffill", "bfill"])
def fillna_method(request):
"""
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
Series.fillna(method=<method>) testing.
"""
return request.param
@pytest.fixture(params=[True, False])
def as_array(request):
"""
Boolean fixture to support ExtensionDtype _from_sequence method testing.
"""
return request.param
@pytest.fixture
def invalid_scalar(data):
"""
A scalar that *cannot* be held by this ExtensionArray.
The default should work for most subclasses, but is not guaranteed.
If the array can hold any item (i.e. object dtype), then use pytest.skip.
"""
return object.__new__(object)
# Fixtures defined in pandas/conftest.py that are also needed: defining them
# here instead of importing for compatibility
@pytest.fixture(
params=["sum", "max", "min", "mean", "prod", "std", "var", "median", "kurt", "skew"]
)
def all_numeric_reductions(request):
"""
Fixture for numeric reduction names
"""
return request.param
@pytest.fixture(params=["all", "any"])
def all_boolean_reductions(request):
"""
Fixture for boolean reduction names
"""
return request.param
# only == and != are support for GeometryArray
# @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
@pytest.fixture(params=["__eq__", "__ne__"])
def all_compare_operators(request):
"""
Fixture for dunder names for common compare operations
* >=
* >
* ==
* !=
* <
* <=
"""
return request.param
@pytest.fixture(params=[None, lambda x: x])
def sort_by_key(request):
"""
Simple fixture for testing keys in sorting methods.
Tests None (no key) and the identity key.
"""
return request.param
# -----------------------------------------------------------------------------
# Inherited tests
# -----------------------------------------------------------------------------
class TestDtype(extension_tests.BaseDtypeTests):
# additional tests
def test_array_type_with_arg(self, data, dtype):
assert dtype.construct_array_type() is GeometryArray
def test_registry(self, data, dtype):
s = pd.Series(np.asarray(data), dtype=object)
result = s.astype("geometry")
assert isinstance(result.array, GeometryArray)
expected = pd.Series(data)
assert_series_equal(result, expected)
class TestInterface(extension_tests.BaseInterfaceTests):
def test_contains(self, data, data_missing):
# overridden due to the inconsistency between
# GeometryDtype.na_value = np.nan
# and None being used as NA in array
# ensure data without missing values
data = data[~data.isna()]
# first elements are non-missing
assert data[0] in data
assert data_missing[0] in data_missing
assert None in data_missing
assert None not in data
assert pd.NaT not in data_missing
class TestConstructors(extension_tests.BaseConstructorsTests):
pass
class TestReshaping(extension_tests.BaseReshapingTests):
# NOTE: this test is copied from pandas/tests/extension/base/reshaping.py
# because starting with pandas 3.0 the assert_frame_equal is strict regarding
# the exact missing value (None vs NaN)
# Our `result` uses None, but the way the `expected` is created results in
# NaNs (and specifying to use None as fill value in unstack also does not
# help)
# -> the only change compared to the upstream test is marked
@pytest.mark.parametrize(
"index",
[
# Two levels, uniform.
pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
# non-uniform
pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
# three levels, non-uniform
pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
pd.MultiIndex.from_tuples(
[
("A", "a", 1),
("A", "b", 0),
("A", "a", 0),
("B", "a", 0),
("B", "c", 1),
]
),
],
)
@pytest.mark.parametrize("obj", ["series", "frame"])
def test_unstack(self, data, index, obj):
data = data[: len(index)]
if obj == "series":
ser = pd.Series(data, index=index)
else:
ser = pd.DataFrame({"A": data, "B": data}, index=index)
n = index.nlevels
levels = list(range(n))
# [0, 1, 2]
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
combinations = itertools.chain.from_iterable(
itertools.permutations(levels, i) for i in range(1, n)
)
for level in combinations:
result = ser.unstack(level=level)
assert all(
isinstance(result[col].array, type(data)) for col in result.columns
)
if obj == "series":
# We should get the same result with to_frame+unstack+droplevel
df = ser.to_frame()
alt = df.unstack(level=level).droplevel(0, axis=1)
assert_frame_equal(result, alt)
obj_ser = ser.astype(object)
expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
if obj == "series":
assert (expected.dtypes == object).all()
# <------------ next line is added
expected[expected.isna()] = None
# ------------->
result = result.astype(object)
assert_frame_equal(result, expected)
class TestGetitem(extension_tests.BaseGetitemTests):
pass
class TestSetitem(extension_tests.BaseSetitemTests):
pass
class TestMissing(extension_tests.BaseMissingTests):
def test_fillna_series(self, data_missing):
fill_value = data_missing[1]
ser = pd.Series(data_missing)
# Fill with a scalar
result = ser.fillna(fill_value)
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
assert_series_equal(result, expected)
# Fill with a series
filler = pd.Series(
from_shapely(
[
shapely.geometry.Point(1, 1),
shapely.geometry.Point(2, 2),
],
)
)
result = ser.fillna(filler)
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
assert_series_equal(result, expected)
# Fill with a series not affecting the missing values
filler = pd.Series(
from_shapely(
[
shapely.geometry.Point(2, 2),
shapely.geometry.Point(1, 1),
]
),
index=[10, 11],
)
result = ser.fillna(filler)
assert_series_equal(result, ser)
# More `GeoSeries.fillna` testcases are in
# `geopandas\tests\test_pandas_methods.py::test_fillna_scalar`
# and `geopandas\tests\test_pandas_methods.py::test_fillna_series`.
@pytest.mark.skipif(
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
)
def test_fillna_limit_pad(self, data_missing):
super().test_fillna_limit_pad(data_missing)
@pytest.mark.skipif(
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
)
def test_fillna_limit_backfill(self, data_missing):
super().test_fillna_limit_backfill(data_missing)
@pytest.mark.skipif(
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
)
def test_fillna_series_method(self, data_missing, fillna_method):
super().test_fillna_series_method(data_missing, fillna_method)
@pytest.mark.skipif(
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
)
def test_fillna_no_op_returns_copy(self, data):
super().test_fillna_no_op_returns_copy(data)
if PANDAS_GE_22:
from pandas.tests.extension.base import BaseReduceTests
else:
from pandas.tests.extension.base import BaseNoReduceTests as BaseReduceTests
class TestReduce(BaseReduceTests):
@pytest.mark.skip("boolean reduce (any/all) tested in test_pandas_methods")
def test_reduce_series_boolean(self):
pass
_all_arithmetic_operators = [
"__add__",
"__radd__",
# '__sub__', '__rsub__',
"__mul__",
"__rmul__",
"__floordiv__",
"__rfloordiv__",
"__truediv__",
"__rtruediv__",
"__pow__",
"__rpow__",
"__mod__",
"__rmod__",
]
@pytest.fixture(params=_all_arithmetic_operators)
def all_arithmetic_operators(request):
"""
Fixture for dunder names for common arithmetic operations
Adapted to exclude __sub__, as this is implemented as "difference".
"""
return request.param
# an inherited test from pandas creates a Series from a list of geometries, which
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
@pytest.mark.filterwarnings(
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
)
class TestArithmeticOps(extension_tests.BaseArithmeticOpsTests):
@pytest.mark.skip(reason="not applicable")
def test_divmod_series_array(self, data, data_for_twos):
pass
@pytest.mark.skip(reason="not applicable")
def test_add_series_with_extension_array(self, data):
pass
# an inherited test from pandas creates a Series from a list of geometries, which
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
@pytest.mark.filterwarnings(
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
)
class TestComparisonOps(extension_tests.BaseComparisonOpsTests):
def _compare_other(self, s, data, op_name, other):
op = getattr(operator, op_name.strip("_"))
result = op(s, other)
expected = s.combine(other, op)
assert_series_equal(result, expected)
def test_compare_scalar(self, data, all_compare_operators):
op_name = all_compare_operators
s = pd.Series(data)
self._compare_other(s, data, op_name, data[0])
def test_compare_array(self, data, all_compare_operators):
op_name = all_compare_operators
s = pd.Series(data)
other = pd.Series([data[0]] * len(data))
self._compare_other(s, data, op_name, other)
class TestMethods(extension_tests.BaseMethodsTests):
@pytest.mark.skipif(
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
)
@pytest.mark.parametrize("dropna", [True, False])
def test_value_counts(self, all_data, dropna):
pass
@pytest.mark.skipif(
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
)
def test_value_counts_with_normalize(self, data):
pass
@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_frame(self, data_for_sorting, ascending):
super().test_sort_values_frame(data_for_sorting, ascending)
@pytest.mark.skip(reason="searchsorted not supported")
def test_searchsorted(self, data_for_sorting, as_series):
pass
@not_yet_implemented
def test_combine_le(self):
pass
@pytest.mark.skip(reason="addition not supported")
def test_combine_add(self):
pass
@not_yet_implemented
def test_fillna_length_mismatch(self, data_missing):
msg = "Length of 'value' does not match."
with pytest.raises(ValueError, match=msg):
data_missing.fillna(data_missing.take([1]))
@no_minmax
def test_argmin_argmax(self):
pass
@no_minmax
def test_argmin_argmax_empty_array(self):
pass
@no_minmax
def test_argmin_argmax_all_na(self):
pass
@no_minmax
def test_argreduce_series(self):
pass
@no_minmax
def test_argmax_argmin_no_skipna_notimplemented(self):
pass
class TestCasting(extension_tests.BaseCastingTests):
pass
class TestGroupby(extension_tests.BaseGroupbyTests):
@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping):
super().test_groupby_extension_agg(as_index, data_for_grouping)
def test_groupby_extension_transform(self, data_for_grouping):
super().test_groupby_extension_transform(data_for_grouping)
@pytest.mark.parametrize(
"op",
[
lambda x: 1,
lambda x: [1] * len(x),
lambda x: pd.Series([1] * len(x)),
lambda x: x,
],
ids=["scalar", "list", "series", "object"],
)
def test_groupby_extension_apply(self, data_for_grouping, op):
super().test_groupby_extension_apply(data_for_grouping, op)
class TestPrinting(extension_tests.BasePrintingTests):
pass
@not_yet_implemented
class TestParsing(extension_tests.BaseParsingTests):
pass

View File

@@ -1,170 +0,0 @@
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
from geopandas._compat import HAS_PYPROJ
from geopandas.tools import geocode, reverse_geocode
from geopandas.tools.geocoding import _prepare_geocode_result
import pytest
from geopandas.testing import assert_geodataframe_equal
from geopandas.tests.util import assert_geoseries_equal, mock
from pandas.testing import assert_series_equal
geopy = pytest.importorskip("geopy")
class ForwardMock(mock.MagicMock):
"""
Mock the forward geocoding function.
Returns the passed in address and (p, p+.5) where p increases
at each call
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._n = 0.0
def __call__(self, *args, **kwargs):
self.return_value = args[0], (self._n, self._n + 0.5)
self._n += 1
return super().__call__(*args, **kwargs)
class ReverseMock(mock.MagicMock):
"""
Mock the reverse geocoding function.
Returns the passed in point and 'address{p}' where p increases
at each call
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._n = 0
def __call__(self, *args, **kwargs):
self.return_value = "address{0}".format(self._n), args[0]
self._n += 1
return super().__call__(*args, **kwargs)
@pytest.fixture
def locations():
locations = ["260 Broadway, New York, NY", "77 Massachusetts Ave, Cambridge, MA"]
return locations
@pytest.fixture
def points():
points = [Point(-71.0597732, 42.3584308), Point(-77.0365305, 38.8977332)]
return points
def test_prepare_result():
# Calls _prepare_result with sample results from the geocoder call
# loop
p0 = Point(12.3, -45.6) # Treat these as lat/lon
p1 = Point(-23.4, 56.7)
d = {"a": ("address0", p0.coords[0]), "b": ("address1", p1.coords[0])}
df = _prepare_geocode_result(d)
assert type(df) is GeoDataFrame
if HAS_PYPROJ:
assert df.crs == "EPSG:4326"
assert len(df) == 2
assert "address" in df
coords = df.loc["a"]["geometry"].coords[0]
test = p0.coords[0]
# Output from the df should be lon/lat
assert coords[0] == pytest.approx(test[1])
assert coords[1] == pytest.approx(test[0])
coords = df.loc["b"]["geometry"].coords[0]
test = p1.coords[0]
assert coords[0] == pytest.approx(test[1])
assert coords[1] == pytest.approx(test[0])
def test_prepare_result_none():
p0 = Point(12.3, -45.6) # Treat these as lat/lon
d = {"a": ("address0", p0.coords[0]), "b": (None, None)}
df = _prepare_geocode_result(d)
assert type(df) is GeoDataFrame
if HAS_PYPROJ:
assert df.crs == "EPSG:4326"
assert len(df) == 2
assert "address" in df
row = df.loc["b"]
# TODO we should probably replace this with a missing value instead of point?
assert len(row["geometry"].coords) == 0
assert row["geometry"].is_empty
assert row["address"] is None
@pytest.mark.parametrize("geocode_result", (None, (None, None)))
def test_prepare_geocode_result_when_result_is(geocode_result):
result = {0: geocode_result}
expected_output = GeoDataFrame(
{"geometry": [Point()], "address": [None]},
crs="EPSG:4326",
)
output = _prepare_geocode_result(result)
assert_geodataframe_equal(output, expected_output)
def test_bad_provider_forward():
from geopy.exc import GeocoderNotFound
with pytest.raises(GeocoderNotFound):
geocode(["cambridge, ma"], "badprovider")
def test_bad_provider_reverse():
from geopy.exc import GeocoderNotFound
with pytest.raises(GeocoderNotFound):
reverse_geocode([Point(0, 0)], "badprovider")
def test_forward(locations, points):
from geopy.geocoders import Photon
for provider in ["photon", Photon]:
with mock.patch("geopy.geocoders.Photon.geocode", ForwardMock()) as m:
g = geocode(locations, provider=provider, timeout=2)
assert len(locations) == m.call_count
n = len(locations)
assert isinstance(g, GeoDataFrame)
expected = GeoSeries(
[Point(float(x) + 0.5, float(x)) for x in range(n)], crs="EPSG:4326"
)
assert_geoseries_equal(expected, g["geometry"])
assert_series_equal(g["address"], pd.Series(locations, name="address"))
def test_reverse(locations, points):
from geopy.geocoders import Photon
for provider in ["photon", Photon]:
with mock.patch("geopy.geocoders.Photon.reverse", ReverseMock()) as m:
g = reverse_geocode(points, provider=provider, timeout=2)
assert len(points) == m.call_count
assert isinstance(g, GeoDataFrame)
expected = GeoSeries(points, crs="EPSG:4326")
assert_geoseries_equal(expected, g["geometry"])
address = pd.Series(
["address" + str(x) for x in range(len(points))], name="address"
)
assert_series_equal(g["address"], address)

View File

@@ -1,747 +0,0 @@
import json
import os
import random
import shutil
import tempfile
import warnings
import numpy as np
import pandas as pd
from shapely.geometry import (
GeometryCollection,
LineString,
MultiLineString,
MultiPoint,
MultiPolygon,
Point,
Polygon,
)
from shapely.geometry.base import BaseGeometry
import geopandas._compat as compat
from geopandas import GeoDataFrame, GeoSeries, clip, read_file
from geopandas.array import GeometryArray, GeometryDtype
import pytest
from geopandas.testing import assert_geoseries_equal, geom_almost_equals
from geopandas.tests.util import geom_equals
from numpy.testing import assert_array_equal
from pandas.testing import assert_index_equal, assert_series_equal
class TestSeries:
def setup_method(self):
self.tempdir = tempfile.mkdtemp()
self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
self.g1 = GeoSeries([self.t1, self.sq])
self.g2 = GeoSeries([self.sq, self.t1])
self.g3 = GeoSeries([self.t1, self.t2], crs="epsg:4326")
self.g4 = GeoSeries([self.t2, self.t1])
self.na = GeoSeries([self.t1, self.t2, Polygon()])
self.na_none = GeoSeries([self.t1, self.t2, None])
self.a1 = self.g1.copy()
self.a1.index = ["A", "B"]
self.a2 = self.g2.copy()
self.a2.index = ["B", "C"]
self.esb = Point(-73.9847, 40.7484)
self.sol = Point(-74.0446, 40.6893)
self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326")
self.l1 = LineString([(0, 0), (0, 1), (1, 1)])
self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
self.g5 = GeoSeries([self.l1, self.l2])
self.esb3857 = Point(-8235939.130493107, 4975301.253789809)
self.sol3857 = Point(-8242607.167991625, 4966620.938285081)
self.landmarks3857 = GeoSeries([self.esb3857, self.sol3857], crs="epsg:3857")
def teardown_method(self):
shutil.rmtree(self.tempdir)
def test_copy(self):
gc = self.g3.copy()
assert type(gc) is GeoSeries
assert self.g3.name == gc.name
assert self.g3.crs == gc.crs
def test_in(self):
assert self.t1 in self.g1
assert self.sq in self.g1
assert self.t1 in self.a1
assert self.t2 in self.g3
assert self.sq not in self.g3
assert 5 not in self.g3
def test_align(self):
a1, a2 = self.a1.align(self.a2)
assert isinstance(a1, GeoSeries)
assert isinstance(a2, GeoSeries)
assert a2["A"] is None
assert a1["B"].equals(a2["B"])
assert a1["C"] is None
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_align_crs(self):
a1 = self.a1.set_crs("epsg:4326")
a2 = self.a2.set_crs("epsg:31370")
res1, res2 = a1.align(a2)
assert res1.crs == "epsg:4326"
assert res2.crs == "epsg:31370"
res1, res2 = a1.align(a2.set_crs(None, allow_override=True))
assert res1.crs == "epsg:4326"
assert res2.crs is None
def test_align_mixed(self):
a1 = self.a1
s2 = pd.Series([1, 2], index=["B", "C"])
res1, res2 = a1.align(s2)
exp2 = pd.Series([np.nan, 1, 2], index=["A", "B", "C"])
assert_series_equal(res2, exp2)
def test_warning_if_not_aligned(self):
# GH-816
# Test that warning is issued when operating on non-aligned series
# _series_op
with pytest.warns(UserWarning, match="The indices .+ not equal"):
self.a1.contains(self.a2)
# _geo_op
with pytest.warns(UserWarning, match="The indices .+ not equal"):
self.a1.union(self.a2)
def test_no_warning_if_aligned(self):
# GH-816
# Test that warning is not issued when operating on aligned series
a1, a2 = self.a1.align(self.a2)
with warnings.catch_warnings(record=True) as record:
a1.contains(a2) # _series_op, explicitly aligned
self.g1.intersects(self.g2) # _series_op, implicitly aligned
a2.union(a1) # _geo_op, explicitly aligned
self.g2.intersection(self.g1) # _geo_op, implicitly aligned
user_warnings = [w for w in record if w.category is UserWarning]
assert not user_warnings, user_warnings[0].message
def test_geom_equals(self):
assert np.all(self.g1.geom_equals(self.g1))
assert_array_equal(self.g1.geom_equals(self.sq), [False, True])
def test_geom_equals_align(self):
a = self.a1.geom_equals(self.a2, align=True)
exp = pd.Series([False, True, False], index=["A", "B", "C"])
assert_series_equal(a, exp)
a = self.a1.geom_equals(self.a2, align=False)
exp = pd.Series([False, False], index=["A", "B"])
assert_series_equal(a, exp)
@pytest.mark.filterwarnings(r"ignore:The 'geom_almost_equals\(\)':FutureWarning")
def test_geom_almost_equals(self):
# TODO: test decimal parameter
assert np.all(self.g1.geom_almost_equals(self.g1))
assert_array_equal(self.g1.geom_almost_equals(self.sq), [False, True])
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"The indices of the left and right GeoSeries' are not equal",
UserWarning,
)
assert_array_equal(
self.a1.geom_almost_equals(self.a2, align=True),
[False, True, False],
)
assert_array_equal(
self.a1.geom_almost_equals(self.a2, align=False), [False, False]
)
def test_geom_equals_exact(self):
# TODO: test tolerance parameter
assert np.all(self.g1.geom_equals_exact(self.g1, 0.001))
assert_array_equal(self.g1.geom_equals_exact(self.sq, 0.001), [False, True])
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"The indices of the left and right GeoSeries' are not equal",
UserWarning,
)
assert_array_equal(
self.a1.geom_equals_exact(self.a2, 0.001, align=True),
[False, True, False],
)
assert_array_equal(
self.a1.geom_equals_exact(self.a2, 0.001, align=False), [False, False]
)
def test_equal_comp_op(self):
s = GeoSeries([Point(x, x) for x in range(3)])
res = s == Point(1, 1)
exp = pd.Series([False, True, False])
assert_series_equal(res, exp)
def test_to_file(self):
"""Test to_file and from_file"""
tempfilename = os.path.join(self.tempdir, "test.shp")
self.g3.to_file(tempfilename)
# Read layer back in?
s = GeoSeries.from_file(tempfilename)
assert all(self.g3.geom_equals(s))
# TODO: compare crs
def test_to_json(self):
"""
Test whether GeoSeries.to_json works and returns an actual json file.
"""
json_str = self.g3.to_json()
data = json.loads(json_str)
assert "id" in data["features"][0].keys()
assert "bbox" in data["features"][0].keys()
# TODO : verify the output is a valid GeoJSON.
def test_to_json_drop_id(self):
"""
Test whether GeoSeries.to_json works when drop_id is True.
"""
json_str = self.g3.to_json(drop_id=True)
data = json.loads(json_str)
assert "id" not in data["features"][0].keys()
def test_to_json_no_bbox(self):
"""
Test whether GeoSeries.to_json works when show_bbox is False.
"""
json_str = self.g3.to_json(show_bbox=False)
data = json.loads(json_str)
assert "bbox" not in data["features"][0].keys()
def test_to_json_no_bbox_drop_id(self):
"""
Test whether GeoSeries.to_json works when show_bbox is False
and drop_id is True.
"""
json_str = self.g3.to_json(show_bbox=False, drop_id=True)
data = json.loads(json_str)
assert "id" not in data["features"][0].keys()
assert "bbox" not in data["features"][0].keys()
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="Requires pyproj")
def test_to_json_wgs84(self):
"""
Test whether the wgs84 conversion works as intended.
"""
text = self.landmarks3857.to_json(to_wgs84=True)
data = json.loads(text)
assert data["type"] == "FeatureCollection"
assert "id" in data["features"][0].keys()
coord1 = data["features"][0]["geometry"]["coordinates"]
coord2 = data["features"][1]["geometry"]["coordinates"]
np.testing.assert_allclose(coord1, self.esb.coords[0])
np.testing.assert_allclose(coord2, self.sol.coords[0])
def test_to_json_wgs84_false(self):
"""
Ensure no conversion to wgs84
"""
text = self.landmarks3857.to_json()
data = json.loads(text)
coord1 = data["features"][0]["geometry"]["coordinates"]
coord2 = data["features"][1]["geometry"]["coordinates"]
assert coord1 == [-8235939.130493107, 4975301.253789809]
assert coord2 == [-8242607.167991625, 4966620.938285081]
def test_representative_point(self):
assert np.all(self.g1.contains(self.g1.representative_point()))
assert np.all(self.g2.contains(self.g2.representative_point()))
assert np.all(self.g3.contains(self.g3.representative_point()))
assert np.all(self.g4.contains(self.g4.representative_point()))
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_transform(self):
utm18n = self.landmarks.to_crs(epsg=26918)
lonlat = utm18n.to_crs(epsg=4326)
assert geom_almost_equals(self.landmarks, lonlat)
with pytest.raises(ValueError):
self.g1.to_crs(epsg=4326)
with pytest.raises(ValueError):
self.landmarks.to_crs(crs=None, epsg=None)
def test_estimate_utm_crs__geographic(self):
pyproj = pytest.importorskip("pyproj")
assert self.landmarks.estimate_utm_crs() == pyproj.CRS("EPSG:32618")
assert self.landmarks.estimate_utm_crs("NAD83") == pyproj.CRS("EPSG:26918")
def test_estimate_utm_crs__projected(self):
pyproj = pytest.importorskip("pyproj")
assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == pyproj.CRS(
"EPSG:32618"
)
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_estimate_utm_crs__out_of_bounds(self):
with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"):
GeoSeries(
[Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326"
).estimate_utm_crs()
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_estimate_utm_crs__missing_crs(self):
with pytest.raises(RuntimeError, match="crs must be set"):
GeoSeries([Polygon([(0, 90), (1, 90), (2, 90)])]).estimate_utm_crs()
def test_fillna(self):
# default is to fill with empty geometry
na = self.na_none.fillna()
assert isinstance(na[2], BaseGeometry)
assert na[2].is_empty
assert geom_equals(self.na_none[:2], na[:2])
# XXX: method works inconsistently for different pandas versions
# self.na_none.fillna(method='backfill')
def test_coord_slice(self):
"""Test CoordinateSlicer"""
# need some better test cases
assert geom_equals(self.g3, self.g3.cx[:, :])
assert geom_equals(self.g3[[True, False]], self.g3.cx[0.9:, :0.1])
assert geom_equals(self.g3[[False, True]], self.g3.cx[0:0.1, 0.9:1.0])
def test_coord_slice_with_zero(self):
# Test that CoordinateSlice correctly handles zero slice (#GH477).
gs = GeoSeries([Point(x, x) for x in range(-3, 4)])
assert geom_equals(gs.cx[:0, :0], gs.loc[:3])
assert geom_equals(gs.cx[:, :0], gs.loc[:3])
assert geom_equals(gs.cx[:0, :], gs.loc[:3])
assert geom_equals(gs.cx[0:, 0:], gs.loc[3:])
assert geom_equals(gs.cx[0:, :], gs.loc[3:])
assert geom_equals(gs.cx[:, 0:], gs.loc[3:])
def test_geoseries_geointerface(self):
assert self.g1.__geo_interface__["type"] == "FeatureCollection"
assert len(self.g1.__geo_interface__["features"]) == self.g1.shape[0]
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_proj4strings(self):
# As string
reprojected = self.g3.to_crs("+proj=utm +zone=30")
reprojected_back = reprojected.to_crs(epsg=4326)
assert geom_almost_equals(self.g3, reprojected_back)
# As dict
reprojected = self.g3.to_crs({"proj": "utm", "zone": "30"})
reprojected_back = reprojected.to_crs(epsg=4326)
assert geom_almost_equals(self.g3, reprojected_back)
# Set to equivalent string, convert, compare to original
copy = self.g3.copy().set_crs("epsg:4326", allow_override=True)
reprojected = copy.to_crs({"proj": "utm", "zone": "30"})
reprojected_back = reprojected.to_crs(epsg=4326)
assert geom_almost_equals(self.g3, reprojected_back)
# Conversions by different format
reprojected_string = self.g3.to_crs("+proj=utm +zone=30")
reprojected_dict = self.g3.to_crs({"proj": "utm", "zone": "30"})
assert geom_almost_equals(reprojected_string, reprojected_dict)
def test_from_wkb(self):
assert_geoseries_equal(self.g1, GeoSeries.from_wkb([self.t1.wkb, self.sq.wkb]))
def test_from_wkb_on_invalid(self):
# Single point LineString hex WKB: invalid
invalid_wkb_hex = "01020000000100000000000000000008400000000000000840"
message = "point array must contain 0 or >1 elements"
with pytest.raises(Exception, match=message):
GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="raise")
with pytest.warns(Warning, match=message):
res = GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="warn")
assert res[0] is None
with warnings.catch_warnings():
warnings.simplefilter("error")
res = GeoSeries.from_wkb([invalid_wkb_hex], on_invalid="ignore")
assert res[0] is None
def test_from_wkb_series(self):
s = pd.Series([self.t1.wkb, self.sq.wkb], index=[1, 2])
expected = self.g1.copy()
expected.index = pd.Index([1, 2])
assert_geoseries_equal(expected, GeoSeries.from_wkb(s))
def test_from_wkb_series_with_index(self):
index = [0]
s = pd.Series([self.t1.wkb, self.sq.wkb], index=[0, 2])
expected = self.g1.reindex(index)
assert_geoseries_equal(expected, GeoSeries.from_wkb(s, index=index))
def test_from_wkt(self):
assert_geoseries_equal(self.g1, GeoSeries.from_wkt([self.t1.wkt, self.sq.wkt]))
def test_from_wkt_on_invalid(self):
# Single point LineString WKT: invalid
invalid_wkt = "LINESTRING(0 0)"
message = "point array must contain 0 or >1 elements"
with pytest.raises(Exception, match=message):
GeoSeries.from_wkt([invalid_wkt], on_invalid="raise")
with pytest.warns(Warning, match=message):
res = GeoSeries.from_wkt([invalid_wkt], on_invalid="warn")
assert res[0] is None
with warnings.catch_warnings():
warnings.simplefilter("error")
res = GeoSeries.from_wkt([invalid_wkt], on_invalid="ignore")
assert res[0] is None
def test_from_wkt_series(self):
s = pd.Series([self.t1.wkt, self.sq.wkt], index=[1, 2])
expected = self.g1.copy()
expected.index = pd.Index([1, 2])
assert_geoseries_equal(expected, GeoSeries.from_wkt(s))
def test_from_wkt_series_with_index(self):
index = [0]
s = pd.Series([self.t1.wkt, self.sq.wkt], index=[0, 2])
expected = self.g1.reindex(index)
assert_geoseries_equal(expected, GeoSeries.from_wkt(s, index=index))
def test_to_wkb(self):
assert_series_equal(pd.Series([self.t1.wkb, self.sq.wkb]), self.g1.to_wkb())
assert_series_equal(
pd.Series([self.t1.wkb_hex, self.sq.wkb_hex]), self.g1.to_wkb(hex=True)
)
def test_to_wkt(self):
assert_series_equal(pd.Series([self.t1.wkt, self.sq.wkt]), self.g1.to_wkt())
def test_clip(self, naturalearth_lowres, naturalearth_cities):
left = read_file(naturalearth_cities)
world = read_file(naturalearth_lowres)
south_america = world[world["continent"] == "South America"]
expected = clip(left.geometry, south_america)
result = left.geometry.clip(south_america)
assert_geoseries_equal(result, expected)
def test_clip_sorting(self, naturalearth_cities, naturalearth_lowres):
"""
Test sorting of geodseries when clipping.
"""
cities = read_file(naturalearth_cities)
world = read_file(naturalearth_lowres)
south_america = world[world["continent"] == "South America"]
unsorted_clipped_cities = clip(cities, south_america, sort=False)
sorted_clipped_cities = clip(cities, south_america, sort=True)
expected_sorted_index = pd.Index(
[55, 59, 62, 88, 101, 114, 122, 169, 181, 189, 210, 230, 236, 238, 239]
)
assert not (
sorted(unsorted_clipped_cities.index) == unsorted_clipped_cities.index
).all()
assert (
sorted(sorted_clipped_cities.index) == sorted_clipped_cities.index
).all()
assert_index_equal(expected_sorted_index, sorted_clipped_cities.index)
def test_from_xy_points(self):
x = self.landmarks.x.values
y = self.landmarks.y.values
index = self.landmarks.index.tolist()
crs = self.landmarks.crs
assert_geoseries_equal(
self.landmarks, GeoSeries.from_xy(x, y, index=index, crs=crs)
)
assert_geoseries_equal(
self.landmarks,
GeoSeries.from_xy(self.landmarks.x, self.landmarks.y, crs=crs),
)
def test_from_xy_points_w_z(self):
index_values = [5, 6, 7]
x = pd.Series([0, -1, 2], index=index_values)
y = pd.Series([8, 3, 1], index=index_values)
z = pd.Series([5, -6, 7], index=index_values)
expected = GeoSeries(
[Point(0, 8, 5), Point(-1, 3, -6), Point(2, 1, 7)], index=index_values
)
assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
def test_from_xy_points_unequal_index(self):
x = self.landmarks.x
y = self.landmarks.y
y.index = -np.arange(len(y))
crs = self.landmarks.crs
assert_geoseries_equal(
self.landmarks, GeoSeries.from_xy(x, y, index=x.index, crs=crs)
)
unindexed_landmarks = self.landmarks.copy()
unindexed_landmarks.reset_index(inplace=True, drop=True)
assert_geoseries_equal(
unindexed_landmarks,
GeoSeries.from_xy(x, y, crs=crs),
)
def test_from_xy_points_indexless(self):
x = np.array([0.0, 3.0])
y = np.array([2.0, 5.0])
z = np.array([-1.0, 4.0])
expected = GeoSeries([Point(0, 2, -1), Point(3, 5, 4)])
assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z))
@pytest.mark.skipif(compat.HAS_PYPROJ, reason="pyproj installed")
def test_set_crs_pyproj_error(self):
with pytest.raises(
ImportError, match="The 'pyproj' package is required for set_crs"
):
self.g1.set_crs(3857)
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_missing_values():
s = GeoSeries([Point(1, 1), None, np.nan, GeometryCollection(), Polygon()])
# construction -> missing values get normalized to None
assert s[1] is None
assert s[2] is None
assert s[3].is_empty
assert s[4].is_empty
# isna / is_empty
assert s.isna().tolist() == [False, True, True, False, False]
assert s.is_empty.tolist() == [False, False, False, True, True]
assert s.notna().tolist() == [True, False, False, True, True]
# fillna defaults to fill with empty geometry -> no missing values anymore
assert not s.fillna().isna().any()
# dropna drops the missing values
assert not s.dropna().isna().any()
assert len(s.dropna()) == 3
def test_isna_empty_geoseries():
# ensure that isna() result for empty GeoSeries has the correct bool dtype
s = GeoSeries([])
result = s.isna()
assert_series_equal(result, pd.Series([], dtype="bool"))
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_geoseries_crs():
gs = GeoSeries().set_crs("IGNF:ETRS89UTM28")
assert gs.crs.to_authority() == ("IGNF", "ETRS89UTM28")
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="Requires pyproj")
def test_geoseries_override_existing_crs_warning():
gs = GeoSeries(crs="epsg:4326")
with pytest.warns(
DeprecationWarning,
match="Overriding the CRS of a GeoSeries that already has CRS",
):
gs.crs = "epsg:2100"
# -----------------------------------------------------------------------------
# # Constructor tests
# -----------------------------------------------------------------------------
def check_geoseries(s):
assert isinstance(s, GeoSeries)
assert isinstance(s.geometry, GeoSeries)
assert isinstance(s.dtype, GeometryDtype)
assert isinstance(s.values, GeometryArray)
class TestConstructor:
def test_constructor(self):
s = GeoSeries([Point(x, x) for x in range(3)])
check_geoseries(s)
def test_single_geom_constructor(self):
p = Point(1, 2)
line = LineString([(2, 3), (4, 5), (5, 6)])
poly = Polygon(
[(0, 0), (1, 0), (1, 1), (0, 1)], [[(0.1, 0.1), (0.9, 0.1), (0.9, 0.9)]]
)
mp = MultiPoint([(1, 2), (3, 4), (5, 6)])
mline = MultiLineString([[(1, 2), (3, 4), (5, 6)], [(7, 8), (9, 10)]])
poly2 = Polygon(
[(0, 0), (0, -1), (-1, -1), (-1, 0)],
[[(-0.1, -0.1), (-0.1, -0.5), (-0.5, -0.5), (-0.5, -0.1)]],
)
mpoly = MultiPolygon([poly, poly2])
geoms = [p, line, poly, mp, mline, mpoly]
index = ["a", "b", "c", "d"]
for g in geoms:
gs = GeoSeries(g)
assert len(gs) == 1
# accessing elements no longer give identical objects
assert gs.iloc[0].equals(g)
gs = GeoSeries(g, index=index)
assert len(gs) == len(index)
for x in gs:
assert x.equals(g)
def test_non_geometry_raises(self):
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
GeoSeries([True, False, True])
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
GeoSeries(["a", "b", "c"])
with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"):
GeoSeries([[1, 2], [3, 4]])
def test_empty(self):
s = GeoSeries([])
check_geoseries(s)
s = GeoSeries()
check_geoseries(s)
def test_data_is_none(self):
s = GeoSeries(index=range(3))
check_geoseries(s)
def test_empty_array(self):
# with empty data that have an explicit dtype, we use the fallback or
# not depending on the dtype
# dtypes that can never hold geometry-like data
for arr in [
np.array([], dtype="bool"),
np.array([], dtype="int64"),
np.array([], dtype="float32"),
# this gets converted to object dtype by pandas
# np.array([], dtype="str"),
]:
with pytest.raises(
TypeError, match="Non geometry data passed to GeoSeries"
):
GeoSeries(arr)
# dtypes that can potentially hold geometry-like data (object) or
# can come from empty data (float64)
for arr in [
np.array([], dtype="object"),
np.array([], dtype="float64"),
np.array([], dtype="str"),
]:
with warnings.catch_warnings(record=True) as record:
s = GeoSeries(arr)
assert not record
assert isinstance(s, GeoSeries)
def test_from_series(self):
shapes = [
Polygon([(random.random(), random.random()) for _ in range(3)])
for _ in range(10)
]
s = pd.Series(shapes, index=list("abcdefghij"), name="foo")
g = GeoSeries(s)
check_geoseries(g)
assert [a.equals(b) for a, b in zip(s, g)]
assert s.name == g.name
assert s.index is g.index
@pytest.mark.skipif(not compat.HAS_PYPROJ, reason="pyproj not available")
def test_from_series_no_set_crs_on_construction(self):
# https://github.com/geopandas/geopandas/issues/2492
# also when passing Series[geometry], ensure we don't change crs of
# original data
gs = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
s = pd.Series(gs)
result = GeoSeries(s, crs=4326)
assert s.values.crs is None
assert gs.crs is None
assert result.crs == "EPSG:4326"
def test_copy(self):
# default is to copy with CoW / pandas 3+
arr = np.array([Point(x, x) for x in range(3)], dtype=object)
result = GeoSeries(arr)
# modifying result doesn't change original array
result.loc[0] = Point(10, 10)
if compat.PANDAS_GE_30 or getattr(pd.options.mode, "copy_on_write", False):
assert arr[0] == Point(0, 0)
else:
assert arr[0] == Point(10, 10)
# avoid copy with copy=False
arr = np.array([Point(x, x) for x in range(3)], dtype=object)
result = GeoSeries(arr, copy=False)
assert result.array._data.flags.writeable
# now modifying result also updates original array
result.loc[0] = Point(10, 10)
assert arr[0] == Point(10, 10)
# GH 1216
@pytest.mark.parametrize("name", [None, "geometry", "Points"])
@pytest.mark.parametrize("crs", [None, "epsg:4326"])
def test_reset_index(self, name, crs):
s = GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])],
name=name,
crs=crs,
)
s = s.explode(index_parts=True)
df = s.reset_index()
assert type(df) == GeoDataFrame
# name None -> 0, otherwise name preserved
assert df.geometry.name == (name if name is not None else 0)
assert df.crs == s.crs
@pytest.mark.parametrize("name", [None, "geometry", "Points"])
@pytest.mark.parametrize("crs", [None, "epsg:4326"])
def test_to_frame(self, name, crs):
s = GeoSeries([Point(0, 0), Point(1, 1)], name=name, crs=crs)
df = s.to_frame()
assert type(df) == GeoDataFrame
# name None -> 0, otherwise name preserved
expected_name = name if name is not None else 0
assert df.geometry.name == expected_name
assert df._geometry_column_name == expected_name
assert df.crs == s.crs
# if name is provided to to_frame, it should override
df2 = s.to_frame(name="geom")
assert type(df) == GeoDataFrame
assert df2.geometry.name == "geom"
assert df2.crs == s.crs
def test_explode_without_multiindex(self):
s = GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
)
s = s.explode(index_parts=False)
expected_index = pd.Index([0, 0, 1, 1, 1])
assert_index_equal(s.index, expected_index)
def test_explode_ignore_index(self):
s = GeoSeries(
[MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
)
s = s.explode(ignore_index=True)
expected_index = pd.Index(range(len(s)))
assert_index_equal(s.index, expected_index)
# index_parts is ignored if ignore_index=True
s = s.explode(index_parts=True, ignore_index=True)
assert_index_equal(s.index, expected_index)

View File

@@ -1,230 +0,0 @@
import warnings
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_21
import pytest
from geopandas.testing import assert_geodataframe_equal
from pandas.testing import assert_index_equal
class TestMerging:
def setup_method(self):
self.gseries = GeoSeries([Point(i, i) for i in range(3)])
self.series = pd.Series([1, 2, 3])
self.gdf = GeoDataFrame({"geometry": self.gseries, "values": range(3)})
self.df = pd.DataFrame({"col1": [1, 2, 3], "col2": [0.1, 0.2, 0.3]})
def _check_metadata(self, gdf, geometry_column_name="geometry", crs=None):
assert gdf._geometry_column_name == geometry_column_name
assert gdf.crs == crs
def test_merge(self):
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
# check result is a GeoDataFrame
assert isinstance(res, GeoDataFrame)
# check geometry property gives GeoSeries
assert isinstance(res.geometry, GeoSeries)
# check metadata
self._check_metadata(res)
# test that crs and other geometry name are preserved
self.gdf.crs = "epsg:4326"
self.gdf = self.gdf.rename(columns={"geometry": "points"}).set_geometry(
"points"
)
res = self.gdf.merge(self.df, left_on="values", right_on="col1")
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res, "points", self.gdf.crs)
def test_concat_axis0(self):
# frame
res = pd.concat([self.gdf, self.gdf])
assert res.shape == (6, 2)
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res)
exp = GeoDataFrame(pd.concat([pd.DataFrame(self.gdf), pd.DataFrame(self.gdf)]))
assert_geodataframe_equal(exp, res)
# series
res = pd.concat([self.gdf.geometry, self.gdf.geometry])
assert res.shape == (6,)
assert isinstance(res, GeoSeries)
assert isinstance(res.geometry, GeoSeries)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_concat_axis0_crs(self):
# CRS not set for both GeoDataFrame
res = pd.concat([self.gdf, self.gdf])
self._check_metadata(res)
# CRS set for both GeoDataFrame, same CRS
res1 = pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")])
self._check_metadata(res1, crs="epsg:4326")
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrame
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
res2 = pd.concat([self.gdf, self.gdf.set_crs("epsg:4326")])
self._check_metadata(res2, crs="epsg:4326")
# CRS set for both GeoDataFrame, different CRS
with pytest.raises(
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
):
pd.concat([self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")])
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
# same CRS
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
res3 = pd.concat(
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4326")]
)
self._check_metadata(res3, crs="epsg:4326")
# CRS not set for one GeoDataFrame, but set for the other GeoDataFrames,
# different CRS
with pytest.raises(
ValueError, match=r"Cannot determine common CRS for concatenation inputs.*"
):
pd.concat(
[self.gdf, self.gdf.set_crs("epsg:4326"), self.gdf.set_crs("epsg:4327")]
)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_concat_axis0_unaligned_cols(self):
# https://github.com/geopandas/geopandas/issues/2679
gdf = self.gdf.set_crs("epsg:4326").assign(
geom=self.gdf.geometry.set_crs("epsg:4327")
)
both_geom_cols = gdf[["geom", "geometry"]]
single_geom_col = gdf[["geometry"]]
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([both_geom_cols, single_geom_col])
# Check order of mismatch doesn't matter
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([single_geom_col, both_geom_cols])
# Side effect of this fix, explicitly provided all none geoseries
# will not be warned for (ideally this would still warn)
explicit_all_none_case = gdf[["geometry"]].assign(
geom=GeoSeries([None for _ in range(len(gdf))])
)
with warnings.catch_warnings():
warnings.simplefilter("error")
pd.concat([both_geom_cols, explicit_all_none_case])
# Check concat with partially None col is not affected by the special casing
# for all None no CRS handling
with pytest.warns(
UserWarning, match=r"CRS not set for some of the concatenation inputs.*"
):
partial_none_case = self.gdf[["geometry"]]
partial_none_case.iloc[0] = None
pd.concat([single_geom_col, partial_none_case])
def test_concat_axis0_crs_wkt_mismatch(self):
pyproj = pytest.importorskip("pyproj")
# https://github.com/geopandas/geopandas/issues/326#issuecomment-1727958475
wkt_template = """GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[SCOPE["Horizontal component of 3D system."],
AREA["World.{}"],BBOX[-90,-180,90,180]],ID["EPSG",4326]]"""
wkt_v1 = wkt_template.format("")
wkt_v2 = wkt_template.format(" ") # add additional whitespace
crs1 = pyproj.CRS.from_wkt(wkt_v1)
crs2 = pyproj.CRS.from_wkt(wkt_v2)
# pyproj crs __hash__ based on WKT strings means these are distinct in a
# set are but equal by equality
assert len({crs1, crs2}) == 2
assert crs1 == crs2
expected = pd.concat([self.gdf, self.gdf]).set_crs(crs1)
res = pd.concat([self.gdf.set_crs(crs1), self.gdf.set_crs(crs2)])
assert_geodataframe_equal(expected, res)
def test_concat_axis1(self):
res = pd.concat([self.gdf, self.df], axis=1)
assert res.shape == (3, 4)
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
self._check_metadata(res)
def test_concat_axis1_multiple_geodataframes(self):
# https://github.com/geopandas/geopandas/issues/1230
# Expect that concat should fail gracefully if duplicate column names belonging
# to geometry columns are introduced.
if PANDAS_GE_21:
# _constructor_from_mgr changes mean we now get the concat specific error
# message in this case too
expected_err = (
"Concat operation has resulted in multiple columns using the geometry "
"column name 'geometry'."
)
else:
expected_err = (
"GeoDataFrame does not support multiple columns using the geometry"
" column name 'geometry'"
)
with pytest.raises(ValueError, match=expected_err):
pd.concat([self.gdf, self.gdf], axis=1)
# Check case is handled if custom geometry column name is used
df2 = self.gdf.rename_geometry("geom")
expected_err2 = (
"Concat operation has resulted in multiple columns using the geometry "
"column name 'geom'."
)
with pytest.raises(ValueError, match=expected_err2):
pd.concat([df2, df2], axis=1)
if HAS_PYPROJ:
# Check that two geometry columns is fine, if they have different names
res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1)
# check metadata comes from first df
self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326")
@pytest.mark.filterwarnings("ignore:Accessing CRS")
def test_concat_axis1_geoseries(self):
gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326")
result = pd.concat([gseries2, self.gseries], axis=1)
# Note this is not consistent with concat([gdf, gdf], axis=1) where the
# left metadata is set on the result. This is deliberate for now.
assert type(result) is GeoDataFrame
assert result._geometry_column_name is None
assert_index_equal(pd.Index([0, 1]), result.columns)
gseries2.name = "foo"
result2 = pd.concat([gseries2, self.gseries], axis=1)
assert type(result2) is GeoDataFrame
assert result._geometry_column_name is None
assert_index_equal(pd.Index(["foo", 0]), result2.columns)

View File

@@ -1,411 +0,0 @@
import numpy as np
import pandas as pd
from shapely.geometry import Point
import geopandas
from geopandas import GeoDataFrame, GeoSeries
import pytest
from geopandas.testing import assert_geodataframe_equal
pyproj = pytest.importorskip("pyproj")
crs_osgb = pyproj.CRS(27700)
crs_wgs = pyproj.CRS(4326)
N = 10
@pytest.fixture(params=["geometry", "point"])
def df(request):
geo_name = request.param
df = GeoDataFrame(
[
{
"value1": x + y,
"value2": x * y,
geo_name: Point(x, y), # rename this col in tests
}
for x, y in zip(range(N), range(N))
],
crs=crs_wgs,
geometry=geo_name,
)
# want geometry2 to be a GeoSeries not Series, test behaviour of non geom col
df["geometry2"] = df[geo_name].set_crs(crs_osgb, allow_override=True)
return df
@pytest.fixture
def df2():
"""For constructor_sliced tests"""
return GeoDataFrame(
{
"geometry": GeoSeries([Point(x, x) for x in range(3)]),
"geometry2": GeoSeries([Point(x, x) for x in range(3)]),
"geometry3": GeoSeries([Point(x, x) for x in range(3)]),
"value": [1, 2, 1],
"value_nan": np.nan,
}
)
def _check_metadata_gdf(gdf, geo_name="geometry", crs=crs_wgs):
assert gdf._geometry_column_name == geo_name
assert gdf.geometry.name == geo_name
assert gdf.crs == crs
def _check_metadata_gs(gs, name="geometry", crs=crs_wgs):
assert gs.name == name
assert gs.crs == crs
def assert_object(result, expected_type, geo_name="geometry", crs=crs_wgs):
"""
Helper method to make tests easier to read. Checks result is of the expected
type. If result is a GeoDataFrame or GeoSeries, checks geo_name
and crs match. If geo_name is None, then we expect a GeoDataFrame
where the geometry column is invalid/ isn't set. This is never desirable,
but is a reality of this first stage of implementation.
"""
assert type(result) is expected_type
if expected_type == GeoDataFrame:
assert geo_name is not None
_check_metadata_gdf(result, geo_name=geo_name, crs=crs)
elif expected_type == GeoSeries:
_check_metadata_gs(result, name=geo_name, crs=crs)
def assert_obj_no_active_geo_col(result, expected_type, geo_colname=None):
"""
Helper method to make tests easier to read. Checks result is of the expected
type. Asserts that accessing result.geometry.name raises, corresponding to
_geometry_column_name being in an invalid state
(either None, or a column no longer present)
This amounts to testing the assertion raised (geometry column is unset, vs
old geometry column is missing)
We assert that _geometry_column_name = int_geo_colname
"""
if expected_type == GeoDataFrame:
if geo_colname is None:
assert result._geometry_column_name is None
else:
assert geo_colname == result._geometry_column_name
if result._geometry_column_name is None:
msg = (
"You are calling a geospatial method on the GeoDataFrame, "
"but the active"
)
else:
msg = (
"You are calling a geospatial method on the GeoDataFrame, but "
r"the active geometry column \("
rf"'{result._geometry_column_name}'\) is not present"
)
with pytest.raises(AttributeError, match=msg):
result.geometry.name # be explicit that geometry is invalid here
else:
raise NotImplementedError()
def test_getitem(df):
geo_name = df.geometry.name
assert_object(df[["value1", "value2"]], pd.DataFrame)
assert_object(df[[geo_name, "geometry2"]], GeoDataFrame, geo_name)
assert_object(df[[geo_name]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df[["geometry2", "value1"]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df[["geometry2"]], GeoDataFrame, geo_name)
assert_object(df[["value1"]], pd.DataFrame)
# Series
assert_object(df[geo_name], GeoSeries, geo_name)
assert_object(df["geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df["value1"], pd.Series)
def test_loc(df):
geo_name = df.geometry.name
assert_object(df.loc[:, ["value1", "value2"]], pd.DataFrame)
assert_object(df.loc[:, [geo_name, "geometry2"]], GeoDataFrame, geo_name)
assert_object(df.loc[:, [geo_name]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df.loc[:, ["geometry2", "value1"]], GeoDataFrame, geo_name
)
assert_obj_no_active_geo_col(df.loc[:, ["geometry2"]], GeoDataFrame, geo_name)
assert_object(df.loc[:, ["value1"]], pd.DataFrame)
# Series
assert_object(df.loc[:, geo_name], GeoSeries, geo_name)
assert_object(df.loc[:, "geometry2"], GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df.loc[:, "value1"], pd.Series)
@pytest.mark.parametrize(
"geom_name",
[
"geometry",
pytest.param(
"geom",
marks=pytest.mark.xfail(
reason="pre-regression behaviour only works for geometry col geometry"
),
),
],
)
def test_loc_add_row(geom_name, nybb_filename):
# https://github.com/geopandas/geopandas/issues/3119
nybb = geopandas.read_file(nybb_filename)[["BoroCode", "geometry"]]
if geom_name != "geometry":
nybb = nybb.rename_geometry(geom_name)
# crs_orig = nybb.crs
# add a new row
nybb.loc[5] = [6, nybb.geometry.iloc[0]]
assert nybb.geometry.dtype == "geometry"
assert nybb.crs is None # TODO this should be crs_orig, regressed in #2373
def test_iloc(df):
geo_name = df.geometry.name
assert_object(df.iloc[:, 0:2], pd.DataFrame)
assert_object(df.iloc[:, 2:4], GeoDataFrame, geo_name)
assert_object(df.iloc[:, [2]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df.iloc[:, [3, 0]], GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(df.iloc[:, [3]], GeoDataFrame, geo_name)
assert_object(df.iloc[:, [0]], pd.DataFrame)
# Series
assert_object(df.iloc[:, 2], GeoSeries, geo_name)
assert_object(df.iloc[:, 3], GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df.iloc[:, 0], pd.Series)
def test_squeeze(df):
geo_name = df.geometry.name
assert_object(df[[geo_name]].squeeze(), GeoSeries, geo_name)
assert_object(df[["geometry2"]].squeeze(), GeoSeries, "geometry2", crs=crs_osgb)
def test_to_frame(df):
geo_name = df.geometry.name
res1 = df[geo_name].to_frame()
assert_object(res1, GeoDataFrame, geo_name, crs=df[geo_name].crs)
res2 = df["geometry2"].to_frame()
assert_object(res2, GeoDataFrame, "geometry2", crs=crs_osgb)
res3 = df["value1"].to_frame()
assert_object(res3, pd.DataFrame)
def test_reindex(df):
geo_name = df.geometry.name
assert_object(df.reindex(columns=["value1", "value2"]), pd.DataFrame)
assert_object(df.reindex(columns=[geo_name, "geometry2"]), GeoDataFrame, geo_name)
assert_object(df.reindex(columns=[geo_name]), GeoDataFrame, geo_name)
assert_object(df.reindex(columns=["new_col", geo_name]), GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df.reindex(columns=["geometry2", "value1"]), GeoDataFrame, geo_name
)
assert_obj_no_active_geo_col(
df.reindex(columns=["geometry2"]), GeoDataFrame, geo_name
)
assert_object(df.reindex(columns=["value1"]), pd.DataFrame)
# reindexing the rows always preserves the GeoDataFrame
assert_object(df.reindex(index=[0, 1, 20]), GeoDataFrame, geo_name)
# reindexing both rows and columns
assert_object(
df.reindex(index=[0, 1, 20], columns=[geo_name]), GeoDataFrame, geo_name
)
assert_object(df.reindex(index=[0, 1, 20], columns=["value1"]), pd.DataFrame)
def test_drop(df):
geo_name = df.geometry.name
assert_object(df.drop(columns=[geo_name, "geometry2"]), pd.DataFrame)
assert_object(df.drop(columns=["value1", "value2"]), GeoDataFrame, geo_name)
cols = ["value1", "value2", "geometry2"]
assert_object(df.drop(columns=cols), GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df.drop(columns=[geo_name, "value2"]), GeoDataFrame, geo_name
)
assert_obj_no_active_geo_col(
df.drop(columns=["value1", "value2", geo_name]), GeoDataFrame, geo_name
)
assert_object(df.drop(columns=["geometry2", "value2", geo_name]), pd.DataFrame)
def test_apply(df):
geo_name = df.geometry.name
def identity(x):
return x
# axis = 0
assert_object(df[["value1", "value2"]].apply(identity), pd.DataFrame)
assert_object(df[[geo_name, "geometry2"]].apply(identity), GeoDataFrame, geo_name)
assert_object(df[[geo_name]].apply(identity), GeoDataFrame, geo_name)
res = df[["geometry2", "value1"]].apply(identity)
assert_obj_no_active_geo_col(res, GeoDataFrame, geo_name)
assert_obj_no_active_geo_col(
df[["geometry2"]].apply(identity), GeoDataFrame, geo_name
)
assert_object(df[["value1"]].apply(identity), pd.DataFrame)
# axis = 0, Series
assert_object(df[geo_name].apply(identity), GeoSeries, geo_name)
assert_object(df["geometry2"].apply(identity), GeoSeries, "geometry2", crs=crs_osgb)
assert_object(df["value1"].apply(identity), pd.Series)
# axis = 0, Series, no longer geometry
assert_object(df[geo_name].apply(lambda x: str(x)), pd.Series)
assert_object(df["geometry2"].apply(lambda x: str(x)), pd.Series)
# axis = 1
assert_object(df[["value1", "value2"]].apply(identity, axis=1), pd.DataFrame)
assert_object(
df[[geo_name, "geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
)
assert_object(df[[geo_name]].apply(identity, axis=1), GeoDataFrame, geo_name)
# TODO below should be a GeoDataFrame to be consistent with new getitem logic
# leave as follow up as quite complicated
# FrameColumnApply.series_generator returns object dtypes Series, so will have
# patch result of apply
assert_object(df[["geometry2", "value1"]].apply(identity, axis=1), pd.DataFrame)
assert_object(df[["value1"]].apply(identity, axis=1), pd.DataFrame)
def test_apply_axis1_secondary_geo_cols(df):
geo_name = df.geometry.name
def identity(x):
return x
assert_obj_no_active_geo_col(
df[["geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name
)
def test_expanddim_in_apply():
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
s = GeoSeries.from_xy([0, 1], [0, 1])
result = s.apply(lambda x: pd.Series([x.x, x.y]))
assert_object(result, pd.DataFrame)
def test_expandim_in_groupby_aggregate_multiple_funcs():
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
# There are two calls to _constructor_expanddim here
# SeriesGroupBy._aggregate_multiple_funcs() and
# SeriesGroupBy._wrap_series_output() len(output) > 1
s = GeoSeries.from_xy([0, 1, 2], [0, 1, 3])
def union(s):
return s.union_all()
def total_area(s):
return s.area.sum()
grouped = s.groupby([0, 1, 0])
agg = grouped.agg([total_area, union])
assert_obj_no_active_geo_col(agg, GeoDataFrame, geo_colname=None)
result = grouped.agg([union, total_area])
assert_obj_no_active_geo_col(result, GeoDataFrame, geo_colname=None)
assert_object(grouped.agg([total_area, total_area]), pd.DataFrame)
assert_object(grouped.agg([total_area]), pd.DataFrame)
def test_expanddim_in_unstack():
# https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443
s = GeoSeries.from_xy(
[0, 1, 2],
[0, 1, 3],
index=pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "a")]),
)
unstack = s.unstack()
expected_geo_name = None
assert_obj_no_active_geo_col(unstack, GeoDataFrame, geo_colname=expected_geo_name)
# https://github.com/geopandas/geopandas/issues/2486
s.name = "geometry"
unstack = s.unstack()
assert_obj_no_active_geo_col(unstack, GeoDataFrame, expected_geo_name)
# indexing / constructor_sliced tests
test_case_column_sets = [
["geometry"],
["geometry2"],
["geometry", "geometry2"],
# non active geo col case
["geometry", "value"],
["geometry", "value_nan"],
["geometry2", "value"],
["geometry2", "value_nan"],
]
@pytest.mark.parametrize(
"column_set",
test_case_column_sets,
ids=[", ".join(i) for i in test_case_column_sets],
)
def test_constructor_sliced_row_slices(df2, column_set):
# https://github.com/geopandas/geopandas/issues/2282
df_subset = df2[column_set]
assert isinstance(df_subset, GeoDataFrame)
res = df_subset.loc[0]
# row slices shouldn't be GeoSeries, even if they have a geometry col
assert type(res) == pd.Series
if "geometry" in column_set:
assert not isinstance(res.geometry, pd.Series)
assert res.geometry == Point(0, 0)
def test_constructor_sliced_column_slices(df2):
# Note loc doesn't use _constructor_sliced so it's not tested here
geo_idx = df2.columns.get_loc("geometry")
sub = df2.head(1)
# column slices should be GeoSeries if of geometry type
assert type(sub.iloc[:, geo_idx]) == GeoSeries
assert type(sub.iloc[[0], geo_idx]) == GeoSeries
sub = df2.head(2)
assert type(sub.iloc[:, geo_idx]) == GeoSeries
assert type(sub.iloc[[0, 1], geo_idx]) == GeoSeries
# check iloc row slices are pd.Series instead
assert type(df2.iloc[0, :]) == pd.Series
def test_constructor_sliced_in_pandas_methods(df2):
# constructor sliced is used in many places, checking a sample of non
# geometry cases are sensible
assert type(df2.count()) == pd.Series
# drop the secondary geometry columns as not hashable
hashable_test_df = df2.drop(columns=["geometry2", "geometry3"])
assert type(hashable_test_df.duplicated()) == pd.Series
assert type(df2.quantile(numeric_only=True)) == pd.Series
assert type(df2.memory_usage()) == pd.Series
def test_merge_preserve_geodataframe():
# https://github.com/geopandas/geopandas/issues/2932
ser = GeoSeries.from_xy([1], [1])
df = GeoDataFrame({"geo": ser})
res = df.merge(df, left_index=True, right_index=True)
assert_obj_no_active_geo_col(res, GeoDataFrame, geo_colname=None)
expected = GeoDataFrame({"geo_x": ser, "geo_y": ser})
assert_geodataframe_equal(expected, res)

View File

@@ -1,891 +0,0 @@
import os
import numpy as np
import pandas as pd
from shapely import make_valid
from shapely.geometry import GeometryCollection, LineString, Point, Polygon, box
import geopandas
from geopandas import GeoDataFrame, GeoSeries, overlay, read_file
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
try:
from fiona.errors import DriverError
except ImportError:
class DriverError(Exception):
pass
DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data", "overlay")
@pytest.fixture
def dfs(request):
s1 = GeoSeries(
[
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s2 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
df2 = GeoDataFrame({"col2": [1, 2], "geometry": s2})
return df1, df2
@pytest.fixture(params=["default-index", "int-index", "string-index"])
def dfs_index(request, dfs):
df1, df2 = dfs
if request.param == "int-index":
df1.index = [1, 2]
df2.index = [0, 2]
if request.param == "string-index":
df1.index = ["row1", "row2"]
return df1, df2
@pytest.fixture(
params=["union", "intersection", "difference", "symmetric_difference", "identity"]
)
def how(request):
return request.param
@pytest.fixture(params=[True, False])
def keep_geom_type(request):
return request.param
def test_overlay(dfs_index, how):
"""
Basic overlay test with small dummy example dataframes (from docs).
Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools ->
Intersection / Union / ...), saved to GeoJSON
"""
df1, df2 = dfs_index
result = overlay(df1, df2, how=how)
# construction of result
def _read(name):
expected = read_file(
os.path.join(DATA, "polys", "df1_df2-{0}.geojson".format(name))
)
expected.geometry.array.crs = None
for col in expected.columns[expected.dtypes == "int32"]:
expected[col] = expected[col].astype("int64")
return expected
if how == "identity":
expected_intersection = _read("intersection")
expected_difference = _read("difference")
expected = pd.concat(
[expected_intersection, expected_difference], ignore_index=True, sort=False
)
expected["col1"] = expected["col1"].astype(float)
else:
expected = _read(how)
# TODO needed adaptations to result
if how == "union":
result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
elif how == "difference":
result = result.reset_index(drop=True)
assert_geodataframe_equal(result, expected, check_column_type=False)
# for difference also reversed
if how == "difference":
result = overlay(df2, df1, how=how)
result = result.reset_index(drop=True)
expected = _read("difference-inverse")
assert_geodataframe_equal(result, expected, check_column_type=False)
@pytest.mark.filterwarnings("ignore:GeoSeries crs mismatch:UserWarning")
def test_overlay_nybb(how, nybb_filename):
polydf = read_file(nybb_filename)
# The circles have been constructed and saved at the time the expected
# results were created (exact output of buffer algorithm can slightly
# change over time -> use saved ones)
# # construct circles dataframe
# N = 10
# b = [int(x) for x in polydf.total_bounds]
# polydf2 = GeoDataFrame(
# [
# {"geometry": Point(x, y).buffer(10000), "value1": x + y, "value2": x - y}
# for x, y in zip(
# range(b[0], b[2], int((b[2] - b[0]) / N)),
# range(b[1], b[3], int((b[3] - b[1]) / N)),
# )
# ],
# crs=polydf.crs,
# )
polydf2 = read_file(os.path.join(DATA, "nybb_qgis", "polydf2.shp"))
result = overlay(polydf, polydf2, how=how)
cols = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area", "value1", "value2"]
if how == "difference":
cols = cols[:-2]
# expected result
if how == "identity":
# read union one, further down below we take the appropriate subset
expected = read_file(os.path.join(DATA, "nybb_qgis", "qgis-union.shp"))
else:
expected = read_file(
os.path.join(DATA, "nybb_qgis", "qgis-{0}.shp".format(how))
)
# The result of QGIS for 'union' contains incorrect geometries:
# 24 is a full original circle overlapping with unioned geometries, and
# 27 is a completely duplicated row)
if how == "union":
expected = expected.drop([24, 27])
expected.reset_index(inplace=True, drop=True)
# Eliminate observations without geometries (issue from QGIS)
expected = expected[expected.is_valid]
expected.reset_index(inplace=True, drop=True)
if how == "identity":
expected = expected[expected.BoroCode.notnull()].copy()
# Order GeoDataFrames
expected = expected.sort_values(cols).reset_index(drop=True)
# TODO needed adaptations to result
result = result.sort_values(cols).reset_index(drop=True)
if how in ("union", "identity"):
# concat < 0.23 sorts, so changes the order of the columns
# but at least we ensure 'geometry' is the last column
assert result.columns[-1] == "geometry"
assert len(result.columns) == len(expected.columns)
result = result.reindex(columns=expected.columns)
# the ordering of the spatial index results causes slight deviations
# in the resultant geometries for multipolygons
# for more details on the discussion, see:
# https://github.com/geopandas/geopandas/pull/1338
# https://github.com/geopandas/geopandas/issues/1337
# Temporary workaround below:
# simplify multipolygon geometry comparison
# since the order of the constituent polygons depends on
# the ordering of spatial indexing results, we cannot
# compare symmetric_difference results directly when the
# resultant geometry is a multipolygon
# first, check that all bounds and areas are approx equal
# this is a very rough check for multipolygon equality
kwargs = {}
pd.testing.assert_series_equal(
result.geometry.area, expected.geometry.area, **kwargs
)
pd.testing.assert_frame_equal(
result.geometry.bounds, expected.geometry.bounds, **kwargs
)
# There are two cases where the multipolygon have a different number
# of sub-geometries -> not solved by normalize (and thus drop for now)
if how == "symmetric_difference":
expected.loc[9, "geometry"] = None
result.loc[9, "geometry"] = None
if how == "union":
expected.loc[24, "geometry"] = None
result.loc[24, "geometry"] = None
# missing values get read as None in read_file for a string column, but
# are introduced as NaN by overlay
expected["BoroName"] = expected["BoroName"].fillna(np.nan)
assert_geodataframe_equal(
result,
expected,
normalize=True,
check_crs=False,
check_column_type=False,
check_less_precise=True,
)
def test_overlay_overlap(how):
"""
Overlay test with overlapping geometries in both dataframes.
Test files are created with::
import geopandas
from geopandas import GeoSeries, GeoDataFrame
from shapely.geometry import Point, Polygon, LineString
s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2)
s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2)
df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]})
df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]})
ax = df1.plot(alpha=0.5)
df2.plot(alpha=0.5, ax=ax, color='C1')
df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson',
driver='GeoJSON')
df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson',
driver='GeoJSON')
and then overlay results are obtained from using QGIS 2.16
(Vector -> Geoprocessing Tools -> Intersection / Union / ...),
saved to GeoJSON.
"""
df1 = read_file(os.path.join(DATA, "overlap", "df1_overlap.geojson"))
df2 = read_file(os.path.join(DATA, "overlap", "df2_overlap.geojson"))
result = overlay(df1, df2, how=how)
if how == "identity":
raise pytest.skip()
expected = read_file(
os.path.join(DATA, "overlap", "df1_df2_overlap-{0}.geojson".format(how))
)
if how == "union":
# the QGIS result has the last row duplicated, so removing this
expected = expected.iloc[:-1]
# TODO needed adaptations to result
result = result.reset_index(drop=True)
if how == "union":
result = result.sort_values(["col1", "col2"]).reset_index(drop=True)
assert_geodataframe_equal(
result,
expected,
normalize=True,
check_column_type=False,
check_less_precise=True,
)
@pytest.mark.parametrize("other_geometry", [False, True])
def test_geometry_not_named_geometry(dfs, how, other_geometry):
# Issue #306
# Add points and flip names
df1, df2 = dfs
df3 = df1.copy()
df3 = df3.rename(columns={"geometry": "polygons"})
df3 = df3.set_geometry("polygons")
if other_geometry:
df3["geometry"] = df1.centroid.geometry
assert df3.geometry.name == "polygons"
res1 = overlay(df1, df2, how=how)
res2 = overlay(df3, df2, how=how)
assert df3.geometry.name == "polygons"
if how == "difference":
# in case of 'difference', column names of left frame are preserved
assert res2.geometry.name == "polygons"
if other_geometry:
assert "geometry" in res2.columns
assert_geoseries_equal(
res2["geometry"], df3["geometry"], check_series_type=False
)
res2 = res2.drop(["geometry"], axis=1)
res2 = res2.rename(columns={"polygons": "geometry"})
res2 = res2.set_geometry("geometry")
# TODO if existing column is overwritten -> geometry not last column
if other_geometry and how == "intersection":
res2 = res2.reindex(columns=res1.columns)
assert_geodataframe_equal(res1, res2)
df4 = df2.copy()
df4 = df4.rename(columns={"geometry": "geom"})
df4 = df4.set_geometry("geom")
if other_geometry:
df4["geometry"] = df2.centroid.geometry
assert df4.geometry.name == "geom"
res1 = overlay(df1, df2, how=how)
res2 = overlay(df1, df4, how=how)
assert_geodataframe_equal(res1, res2)
def test_bad_how(dfs):
df1, df2 = dfs
with pytest.raises(ValueError):
overlay(df1, df2, how="spandex")
def test_duplicate_column_name(dfs, how):
if how == "difference":
pytest.skip("Difference uses columns from one df only.")
df1, df2 = dfs
df2r = df2.rename(columns={"col2": "col1"})
res = overlay(df1, df2r, how=how)
assert ("col1_1" in res.columns) and ("col1_2" in res.columns)
def test_geoseries_warning(dfs):
df1, df2 = dfs
# Issue #305
with pytest.raises(NotImplementedError):
overlay(df1, df2.geometry, how="union")
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_preserve_crs(dfs, how):
df1, df2 = dfs
result = overlay(df1, df2, how=how)
assert result.crs is None
crs = "epsg:4326"
df1.crs = crs
df2.crs = crs
result = overlay(df1, df2, how=how)
assert result.crs == crs
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_crs_mismatch(dfs, how):
df1, df2 = dfs
df1.crs = 4326
df2.crs = 3857
with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
overlay(df1, df2, how=how)
def test_empty_intersection(dfs):
df1, df2 = dfs
polys3 = GeoSeries(
[
Polygon([(-1, -1), (-3, -1), (-3, -3), (-1, -3)]),
Polygon([(-3, -3), (-5, -3), (-5, -5), (-3, -5)]),
]
)
df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2]})
expected = GeoDataFrame([], columns=["col1", "col3", "geometry"])
result = overlay(df1, df3)
assert_geodataframe_equal(result, expected, check_dtype=False)
def test_correct_index(dfs):
# GH883 - case where the index was not properly reset
df1, df2 = dfs
polys3 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2, 3]})
i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)])
i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)])
expected = GeoDataFrame(
[[1, 1, i1], [3, 2, i2]], columns=["col3", "col2", "geometry"]
)
result = overlay(df3, df2, keep_geom_type=True)
assert_geodataframe_equal(result, expected)
def test_warn_on_keep_geom_type(dfs):
df1, df2 = dfs
polys3 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df3 = GeoDataFrame({"geometry": polys3})
with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
overlay(df2, df3, keep_geom_type=None)
@pytest.mark.parametrize(
"geom_types", ["polys", "poly_line", "poly_point", "line_poly", "point_poly"]
)
def test_overlay_strict(how, keep_geom_type, geom_types):
"""
Test of mixed geometry types on input and output. Expected results initially
generated using following snippet.
polys1 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
df1 = gpd.GeoDataFrame({'col1': [1, 2], 'geometry': polys1})
polys2 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
df2 = gpd.GeoDataFrame({'geometry': polys2, 'col2': [1, 2, 3]})
lines1 = gpd.GeoSeries([LineString([(2, 0), (2, 4), (6, 4)]),
LineString([(0, 3), (6, 3)])])
df3 = gpd.GeoDataFrame({'col3': [1, 2], 'geometry': lines1})
points1 = gpd.GeoSeries([Point((2, 2)),
Point((3, 3))])
df4 = gpd.GeoDataFrame({'col4': [1, 2], 'geometry': points1})
params=["union", "intersection", "difference", "symmetric_difference",
"identity"]
stricts = [True, False]
for p in params:
for s in stricts:
exp = gpd.overlay(df1, df2, how=p, keep_geom_type=s)
if not exp.empty:
exp.to_file('polys_{p}_{s}.geojson'.format(p=p, s=s),
driver='GeoJSON')
for p in params:
for s in stricts:
exp = gpd.overlay(df1, df3, how=p, keep_geom_type=s)
if not exp.empty:
exp.to_file('poly_line_{p}_{s}.geojson'.format(p=p, s=s),
driver='GeoJSON')
for p in params:
for s in stricts:
exp = gpd.overlay(df1, df4, how=p, keep_geom_type=s)
if not exp.empty:
exp.to_file('poly_point_{p}_{s}.geojson'.format(p=p, s=s),
driver='GeoJSON')
"""
polys1 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
polys2 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df2 = GeoDataFrame({"geometry": polys2, "col2": [1, 2, 3]})
lines1 = GeoSeries(
[LineString([(2, 0), (2, 4), (6, 4)]), LineString([(0, 3), (6, 3)])]
)
df3 = GeoDataFrame({"col3": [1, 2], "geometry": lines1})
points1 = GeoSeries([Point((2, 2)), Point((3, 3))])
df4 = GeoDataFrame({"col4": [1, 2], "geometry": points1})
if geom_types == "polys":
result = overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "poly_line":
result = overlay(df1, df3, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "poly_point":
result = overlay(df1, df4, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "line_poly":
result = overlay(df3, df1, how=how, keep_geom_type=keep_geom_type)
elif geom_types == "point_poly":
result = overlay(df4, df1, how=how, keep_geom_type=keep_geom_type)
try:
expected = read_file(
os.path.join(
DATA,
"strict",
"{t}_{h}_{s}.geojson".format(t=geom_types, h=how, s=keep_geom_type),
)
)
# the order depends on the spatial index used
# so we sort the resultant dataframes to get a consistent order
# independently of the spatial index implementation
assert all(expected.columns == result.columns), "Column name mismatch"
cols = list(set(result.columns) - {"geometry"})
expected = expected.sort_values(cols, axis=0).reset_index(drop=True)
result = result.sort_values(cols, axis=0).reset_index(drop=True)
# some columns are all-NaN in the result, but get read as object dtype
# column of None values in read_file
for col in ["col1", "col3", "col4"]:
if col in expected.columns and expected[col].isna().all():
expected[col] = expected[col].astype("float64")
assert_geodataframe_equal(
result,
expected,
normalize=True,
check_column_type=False,
check_less_precise=True,
check_crs=False,
check_dtype=False,
)
except DriverError: # fiona >= 1.8
assert result.empty
except OSError: # fiona < 1.8
assert result.empty
except RuntimeError: # pyogrio.DataSourceError
assert result.empty
def test_mixed_geom_error():
polys1 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
mixed = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
dfmixed = GeoDataFrame({"col1": [1, 2], "geometry": mixed})
with pytest.raises(NotImplementedError):
overlay(df1, dfmixed, keep_geom_type=True)
def test_keep_geom_type_error():
gcol = GeoSeries(
GeometryCollection(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
LineString([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
)
dfcol = GeoDataFrame({"col1": [2], "geometry": gcol})
polys1 = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})
with pytest.raises(TypeError):
overlay(dfcol, df1, keep_geom_type=True)
def test_keep_geom_type_geometry_collection():
# GH 1581
df1 = read_file(os.path.join(DATA, "geom_type", "df1.geojson"))
df2 = read_file(os.path.join(DATA, "geom_type", "df2.geojson"))
with pytest.warns(UserWarning, match="`keep_geom_type=True` in overlay"):
intersection = overlay(df1, df2, keep_geom_type=None)
assert len(intersection) == 1
assert (intersection.geom_type == "Polygon").all()
intersection = overlay(df1, df2, keep_geom_type=True)
assert len(intersection) == 1
assert (intersection.geom_type == "Polygon").all()
intersection = overlay(df1, df2, keep_geom_type=False)
assert len(intersection) == 1
assert (intersection.geom_type == "GeometryCollection").all()
def test_keep_geom_type_geometry_collection2():
polys1 = [
box(0, 0, 1, 1),
box(1, 1, 3, 3).union(box(1, 3, 5, 5)),
]
polys2 = [
box(0, 0, 1, 1),
box(3, 1, 4, 2).union(box(4, 1, 5, 4)),
]
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
result1 = overlay(df1, df2, keep_geom_type=True)
expected1 = GeoDataFrame(
{
"left": [0, 1],
"right": [0, 1],
"geometry": [box(0, 0, 1, 1), box(4, 3, 5, 4)],
}
)
assert_geodataframe_equal(result1, expected1)
result1 = overlay(df1, df2, keep_geom_type=False)
expected1 = GeoDataFrame(
{
"left": [0, 1, 1],
"right": [0, 0, 1],
"geometry": [
box(0, 0, 1, 1),
Point(1, 1),
GeometryCollection([box(4, 3, 5, 4), LineString([(3, 1), (3, 2)])]),
],
}
)
assert_geodataframe_equal(result1, expected1)
def test_keep_geom_type_geomcoll_different_types():
polys1 = [box(0, 1, 1, 3), box(10, 10, 12, 12)]
polys2 = [
Polygon([(1, 0), (3, 0), (3, 3), (1, 3), (1, 2), (2, 2), (2, 1), (1, 1)]),
box(11, 11, 13, 13),
]
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
result1 = overlay(df1, df2, keep_geom_type=True)
expected1 = GeoDataFrame(
{
"left": [1],
"right": [1],
"geometry": [box(11, 11, 12, 12)],
}
)
assert_geodataframe_equal(result1, expected1)
result2 = overlay(df1, df2, keep_geom_type=False)
expected2 = GeoDataFrame(
{
"left": [0, 1],
"right": [0, 1],
"geometry": [
GeometryCollection([LineString([(1, 2), (1, 3)]), Point(1, 1)]),
box(11, 11, 12, 12),
],
}
)
assert_geodataframe_equal(result2, expected2)
def test_keep_geom_type_geometry_collection_difference():
# GH 2163
polys1 = [
box(0, 0, 1, 1),
box(1, 1, 2, 2),
]
# the tiny sliver in the second geometry may be converted to a
# linestring during the overlay process due to floating point errors
# on some platforms
polys2 = [
box(0, 0, 1, 1),
box(1, 1, 2, 3).union(box(2, 2, 3, 2.00000000000000001)),
]
df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1})
df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2})
result1 = overlay(df2, df1, keep_geom_type=True, how="difference")
expected1 = GeoDataFrame(
{
"right": [1],
"geometry": [box(1, 2, 2, 3)],
},
)
assert_geodataframe_equal(result1, expected1)
@pytest.mark.parametrize("should_make_valid", [True, False])
def test_overlap_make_valid(should_make_valid):
bowtie = Polygon([(1, 1), (9, 9), (9, 1), (1, 9), (1, 1)])
assert not bowtie.is_valid
fixed_bowtie = make_valid(bowtie)
assert fixed_bowtie.is_valid
df1 = GeoDataFrame({"col1": ["region"], "geometry": GeoSeries([box(0, 0, 10, 10)])})
df_bowtie = GeoDataFrame(
{"col1": ["invalid", "valid"], "geometry": GeoSeries([bowtie, fixed_bowtie])}
)
if should_make_valid:
df_overlay_bowtie = overlay(df1, df_bowtie, make_valid=should_make_valid)
assert df_overlay_bowtie.at[0, "geometry"].equals(fixed_bowtie)
assert df_overlay_bowtie.at[1, "geometry"].equals(fixed_bowtie)
else:
with pytest.raises(ValueError, match="1 invalid input geometries"):
overlay(df1, df_bowtie, make_valid=should_make_valid)
def test_empty_overlay_return_non_duplicated_columns(nybb_filename):
nybb = geopandas.read_file(nybb_filename)
nybb2 = nybb.copy()
nybb2.geometry = nybb2.translate(20000000)
result = geopandas.overlay(nybb, nybb2)
expected = GeoDataFrame(
columns=[
"BoroCode_1",
"BoroName_1",
"Shape_Leng_1",
"Shape_Area_1",
"BoroCode_2",
"BoroName_2",
"Shape_Leng_2",
"Shape_Area_2",
"geometry",
],
crs=nybb.crs,
)
assert_geodataframe_equal(result, expected, check_dtype=False)
def test_non_overlapping(how):
p1 = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])
p2 = Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])
df1 = GeoDataFrame({"col1": [1], "geometry": [p1]})
df2 = GeoDataFrame({"col2": [2], "geometry": [p2]})
result = overlay(df1, df2, how=how)
if how == "intersection":
if PANDAS_GE_20:
index = None
else:
index = pd.Index([], dtype="object")
expected = GeoDataFrame(
{
"col1": np.array([], dtype="int64"),
"col2": np.array([], dtype="int64"),
"geometry": [],
},
index=index,
)
elif how == "union":
expected = GeoDataFrame(
{
"col1": [1, np.nan],
"col2": [np.nan, 2],
"geometry": [p1, p2],
}
)
elif how == "identity":
expected = GeoDataFrame(
{
"col1": [1.0],
"col2": [np.nan],
"geometry": [p1],
}
)
elif how == "symmetric_difference":
expected = GeoDataFrame(
{
"col1": [1, np.nan],
"col2": [np.nan, 2],
"geometry": [p1, p2],
}
)
elif how == "difference":
expected = GeoDataFrame(
{
"col1": [1],
"geometry": [p1],
}
)
assert_geodataframe_equal(result, expected)
def test_no_intersection():
# overlapping bounds but non-overlapping geometries
gs = GeoSeries([Point(x, x).buffer(0.1) for x in range(3)])
gdf1 = GeoDataFrame({"foo": ["a", "b", "c"]}, geometry=gs)
gdf2 = GeoDataFrame({"bar": ["1", "3", "5"]}, geometry=gs.translate(1))
expected = GeoDataFrame(columns=["foo", "bar", "geometry"])
result = overlay(gdf1, gdf2, how="intersection")
assert_geodataframe_equal(result, expected, check_index_type=False)
class TestOverlayWikiExample:
def setup_method(self):
self.layer_a = GeoDataFrame(geometry=[box(0, 2, 6, 6)])
self.layer_b = GeoDataFrame(geometry=[box(4, 0, 10, 4)])
self.intersection = GeoDataFrame(geometry=[box(4, 2, 6, 4)])
self.union = GeoDataFrame(
geometry=[
box(4, 2, 6, 4),
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
]
)
self.a_difference_b = GeoDataFrame(
geometry=[Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)])]
)
self.b_difference_a = GeoDataFrame(
geometry=[
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)])
]
)
self.symmetric_difference = GeoDataFrame(
geometry=[
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
]
)
self.a_identity_b = GeoDataFrame(
geometry=[
box(4, 2, 6, 4),
Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]),
]
)
self.b_identity_a = GeoDataFrame(
geometry=[
box(4, 2, 6, 4),
Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]),
]
)
def test_intersection(self):
df_result = overlay(self.layer_a, self.layer_b, how="intersection")
assert df_result.geom_equals(self.intersection).all()
def test_union(self):
df_result = overlay(self.layer_a, self.layer_b, how="union")
assert_geodataframe_equal(df_result, self.union)
def test_a_difference_b(self):
df_result = overlay(self.layer_a, self.layer_b, how="difference")
assert_geodataframe_equal(df_result, self.a_difference_b)
def test_b_difference_a(self):
df_result = overlay(self.layer_b, self.layer_a, how="difference")
assert_geodataframe_equal(df_result, self.b_difference_a)
def test_symmetric_difference(self):
df_result = overlay(self.layer_a, self.layer_b, how="symmetric_difference")
assert_geodataframe_equal(df_result, self.symmetric_difference)
def test_a_identity_b(self):
df_result = overlay(self.layer_a, self.layer_b, how="identity")
assert_geodataframe_equal(df_result, self.a_identity_b)
def test_b_identity_a(self):
df_result = overlay(self.layer_b, self.layer_a, how="identity")
assert_geodataframe_equal(df_result, self.b_identity_a)

View File

@@ -1,890 +0,0 @@
import os
import warnings
from packaging.version import Version
import numpy as np
import pandas as pd
import shapely
from shapely.geometry import GeometryCollection, LinearRing, LineString, Point
import geopandas
import geopandas._compat as compat
from geopandas import GeoDataFrame, GeoSeries
from geopandas.array import from_shapely
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from numpy.testing import assert_array_equal
from pandas.testing import assert_frame_equal, assert_series_equal
@pytest.fixture
def s():
return GeoSeries([Point(x, y) for x, y in zip(range(3), range(3))])
@pytest.fixture
def df():
return GeoDataFrame(
{
"geometry": [Point(x, x) for x in range(3)],
"value1": np.arange(3, dtype="int64"),
"value2": np.array([1, 2, 1], dtype="int64"),
}
)
def test_repr(s, df):
assert "POINT" in repr(s)
assert "POINT" in repr(df)
assert "POINT" in df._repr_html_()
@pytest.mark.skipif(shapely.geos_version < (3, 9, 0), reason="requires GEOS>=3.9")
def test_repr_boxed_display_precision():
# geographic coordinates
p1 = Point(10.123456789, 50.123456789)
p2 = Point(4.123456789, 20.123456789)
s1 = GeoSeries([p1, p2, None])
assert "POINT (10.12346 50.12346)" in repr(s1)
# geographic coordinates 4326
s3 = GeoSeries([p1, p2], crs=4326)
assert "POINT (10.12346 50.12346)" in repr(s3)
# projected coordinates
p1 = Point(3000.123456789, 3000.123456789)
p2 = Point(4000.123456789, 4000.123456789)
s2 = GeoSeries([p1, p2, None])
assert "POINT (3000.123 3000.123)" in repr(s2)
# projected geographic coordinate
s4 = GeoSeries([p1, p2], crs=3857)
assert "POINT (3000.123 3000.123)" in repr(s4)
geopandas.options.display_precision = 1
assert "POINT (10.1 50.1)" in repr(s1)
geopandas.options.display_precision = 9
assert "POINT (10.123456789 50.123456789)" in repr(s1)
def test_repr_all_missing():
# https://github.com/geopandas/geopandas/issues/1195
s = GeoSeries([None, None, None])
assert "None" in repr(s)
df = GeoDataFrame({"a": [1, 2, 3], "geometry": s})
assert "None" in repr(df)
assert "geometry" in df._repr_html_()
def test_repr_empty():
# https://github.com/geopandas/geopandas/issues/1195
s = GeoSeries([])
assert repr(s) == "GeoSeries([], dtype: geometry)"
df = GeoDataFrame({"a": [], "geometry": s})
assert "Empty GeoDataFrame" in repr(df)
# https://github.com/geopandas/geopandas/issues/1184
assert "geometry" in df._repr_html_()
def test_repr_linearring():
# https://github.com/geopandas/geopandas/pull/2689
# specifically, checking internal shapely/wkt/wkb conversions
# preserve LinearRing
s = GeoSeries([LinearRing([(0, 0), (1, 1), (1, -1)])])
assert "LINEARRING" in str(s.iloc[0]) # shapely scalar repr
assert "LINEARRING" in str(s) # GeoSeries repr
# check something coercible to linearring is not converted
s2 = GeoSeries(
[
LineString([(0, 0), (1, 1), (1, -1)]),
LineString([(0, 0), (1, 1), (1, -1), (0, 0)]),
]
)
assert "LINEARRING" not in str(s2)
def test_indexing(s, df):
# accessing scalar from the geometry (column)
exp = Point(1, 1)
assert s[1] == exp
assert s.loc[1] == exp
assert s.iloc[1] == exp
assert df.loc[1, "geometry"] == exp
assert df.iloc[1, 0] == exp
# multiple values
exp = GeoSeries([Point(2, 2), Point(0, 0)], index=[2, 0])
assert_geoseries_equal(s.loc[[2, 0]], exp)
assert_geoseries_equal(s.iloc[[2, 0]], exp)
assert_geoseries_equal(s.reindex([2, 0]), exp)
assert_geoseries_equal(df.loc[[2, 0], "geometry"], exp)
# TODO here iloc does not return a GeoSeries
assert_series_equal(
df.iloc[[2, 0], 0], exp, check_series_type=False, check_names=False
)
# boolean indexing
exp = GeoSeries([Point(0, 0), Point(2, 2)], index=[0, 2])
mask = np.array([True, False, True])
assert_geoseries_equal(s[mask], exp)
assert_geoseries_equal(s.loc[mask], exp)
assert_geoseries_equal(df[mask]["geometry"], exp)
assert_geoseries_equal(df.loc[mask, "geometry"], exp)
# slices
s.index = [1, 2, 3]
exp = GeoSeries([Point(1, 1), Point(2, 2)], index=[2, 3])
assert_series_equal(s[1:], exp)
assert_series_equal(s.iloc[1:], exp)
assert_series_equal(s.loc[2:], exp)
def test_reindex(s, df):
# GeoSeries reindex
res = s.reindex([1, 2, 3])
exp = GeoSeries([Point(1, 1), Point(2, 2), None], index=[1, 2, 3])
assert_geoseries_equal(res, exp)
# GeoDataFrame reindex index
res = df.reindex(index=[1, 2, 3])
assert_geoseries_equal(res.geometry, exp)
# GeoDataFrame reindex columns
res = df.reindex(columns=["value1", "geometry"])
assert isinstance(res, GeoDataFrame)
assert isinstance(res.geometry, GeoSeries)
assert_frame_equal(res, df[["value1", "geometry"]])
res = df.reindex(columns=["value1", "value2"])
assert type(res) == pd.DataFrame
assert_frame_equal(res, df[["value1", "value2"]])
def test_take(s, df):
inds = np.array([0, 2])
# GeoSeries take
result = s.take(inds)
expected = s.iloc[[0, 2]]
assert isinstance(result, GeoSeries)
assert_geoseries_equal(result, expected)
# GeoDataFrame take axis 0
result = df.take(inds, axis=0)
expected = df.iloc[[0, 2], :]
assert isinstance(result, GeoDataFrame)
assert_geodataframe_equal(result, expected)
# GeoDataFrame take axis 1
df = df.reindex(columns=["value1", "value2", "geometry"]) # ensure consistent order
result = df.take(inds, axis=1)
expected = df[["value1", "geometry"]]
assert isinstance(result, GeoDataFrame)
assert_geodataframe_equal(result, expected)
result = df.take(np.array([0, 1]), axis=1)
expected = df[["value1", "value2"]]
assert isinstance(result, pd.DataFrame)
assert_frame_equal(result, expected)
def test_take_empty(s, df):
# ensure that index type is preserved in an empty take
# https://github.com/geopandas/geopandas/issues/1190
inds = np.array([], dtype="int64")
# use non-default index
df.index = pd.date_range("2012-01-01", periods=len(df))
result = df.take(inds, axis=0)
assert isinstance(result, GeoDataFrame)
assert result.shape == (0, 3)
assert isinstance(result.index, pd.DatetimeIndex)
# the original bug report was an empty boolean mask
for result in [df.loc[df["value1"] > 100], df[df["value1"] > 100]]:
assert isinstance(result, GeoDataFrame)
assert result.shape == (0, 3)
assert isinstance(result.index, pd.DatetimeIndex)
def test_assignment(s, df):
exp = GeoSeries([Point(10, 10), Point(1, 1), Point(2, 2)])
s2 = s.copy()
s2[0] = Point(10, 10)
assert_geoseries_equal(s2, exp)
s2 = s.copy()
s2.loc[0] = Point(10, 10)
assert_geoseries_equal(s2, exp)
s2 = s.copy()
s2.iloc[0] = Point(10, 10)
assert_geoseries_equal(s2, exp)
df2 = df.copy()
df2.loc[0, "geometry"] = Point(10, 10)
assert_geoseries_equal(df2["geometry"], exp)
df2 = df.copy()
df2.iloc[0, 0] = Point(10, 10)
assert_geoseries_equal(df2["geometry"], exp)
def test_assign(df):
res = df.assign(new=1)
exp = df.copy()
exp["new"] = 1
assert isinstance(res, GeoDataFrame)
assert_frame_equal(res, exp)
def test_astype(s, df):
# check geoseries functionality
with pytest.raises(TypeError):
s.astype(int)
assert s.astype(str)[0] == "POINT (0 0)"
res = s.astype(object)
if not (
(Version(pd.__version__) == Version("2.1.0"))
or (Version(pd.__version__) == Version("2.1.1"))
):
# https://github.com/geopandas/geopandas/issues/2948 - bug in pandas 2.1.0
assert isinstance(res, pd.Series) and not isinstance(res, GeoSeries)
assert res.dtype == object
df = df.rename_geometry("geom_list")
# check whether returned object is a geodataframe
res = df.astype({"value1": float})
assert isinstance(res, GeoDataFrame)
# check whether returned object is a dataframe
res = df.astype(str)
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
res = df.astype({"geom_list": str})
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
res = df.astype(object)
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
assert res["geom_list"].dtype == object
def test_astype_invalid_geodataframe():
# https://github.com/geopandas/geopandas/issues/1144
# a GeoDataFrame without geometry column should not error in astype
df = GeoDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
res = df.astype(object)
assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame)
assert res["a"].dtype == object
def test_convert_dtypes(df):
# https://github.com/geopandas/geopandas/issues/1870
# Test geometry col is first col, first, geom_col_name=geometry
# (order is important in concat, used internally)
res1 = df.convert_dtypes()
expected1 = GeoDataFrame(
pd.DataFrame(df).convert_dtypes(), crs=df.crs, geometry=df.geometry.name
)
# Checking type and metadata are right
assert_geodataframe_equal(expected1, res1)
# Test geom last, geom_col_name=geometry
res2 = df[["value1", "value2", "geometry"]].convert_dtypes()
assert_geodataframe_equal(expected1[["value1", "value2", "geometry"]], res2)
if compat.HAS_PYPROJ:
# Test again with crs set and custom geom col name
df2 = df.set_crs(epsg=4326).rename_geometry("points")
expected2 = GeoDataFrame(
pd.DataFrame(df2).convert_dtypes(), crs=df2.crs, geometry=df2.geometry.name
)
res3 = df2.convert_dtypes()
assert_geodataframe_equal(expected2, res3)
# Test geom last, geom_col=geometry
res4 = df2[["value1", "value2", "points"]].convert_dtypes()
assert_geodataframe_equal(expected2[["value1", "value2", "points"]], res4)
def test_to_csv(df):
exp = (
"geometry,value1,value2\nPOINT (0 0),0,1\nPOINT (1 1),1,2\nPOINT (2 2),2,1\n"
).replace("\n", os.linesep)
assert df.to_csv(index=False) == exp
@pytest.mark.filterwarnings(
"ignore:Dropping of nuisance columns in DataFrame reductions"
)
def test_numerical_operations(s, df):
# df methods ignore the geometry column
exp = pd.Series([3, 4], index=["value1", "value2"])
if not compat.PANDAS_GE_20:
res = df.sum()
else:
res = df.sum(numeric_only=True)
assert_series_equal(res, exp)
# series methods raise error (not supported for geometry)
with pytest.raises(TypeError):
s.sum()
with pytest.raises(TypeError):
s.max()
with pytest.raises((TypeError, ValueError)):
# TODO: remove ValueError after pandas-dev/pandas#32749
s.idxmax()
# numerical ops raise an error
with pytest.raises(TypeError):
df + 1
with pytest.raises(TypeError):
s + 1
# boolean comparisons work
res = df == 100
exp = pd.DataFrame(False, index=df.index, columns=df.columns)
assert_frame_equal(res, exp)
def test_where(s):
res = s.where(np.array([True, False, True]))
exp = GeoSeries([Point(0, 0), None, Point(2, 2)])
assert_series_equal(res, exp)
def test_select_dtypes(df):
res = df.select_dtypes(include=[np.number])
exp = df[["value1", "value2"]]
assert_frame_equal(res, exp)
def test_equals(s, df):
# https://github.com/geopandas/geopandas/issues/1420
s2 = s.copy()
assert s.equals(s2) is True
s2.iloc[0] = None
assert s.equals(s2) is False
df2 = df.copy()
assert df.equals(df2) is True
df2.loc[0, "geometry"] = Point(10, 10)
assert df.equals(df2) is False
df2 = df.copy()
df2.loc[0, "value1"] = 10
assert df.equals(df2) is False
# Missing values
def test_fillna_scalar(s, df):
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
res = s2.fillna(Point(1, 1))
assert_geoseries_equal(res, s)
# allow np.nan although this does not change anything
# https://github.com/geopandas/geopandas/issues/1149
res = s2.fillna(np.nan)
assert_geoseries_equal(res, s2)
# raise exception if trying to fill missing geometry w/ non-geometry
df2 = df.copy()
df2["geometry"] = s2
res = df2.fillna(Point(1, 1))
assert_geodataframe_equal(res, df)
with pytest.raises((NotImplementedError, TypeError)): # GH2351
df2.fillna(0)
# allow non-geometry fill value if there are no missing values
# https://github.com/geopandas/geopandas/issues/1149
df3 = df.copy()
df3.loc[0, "value1"] = np.nan
res = df3.fillna(0)
assert_geodataframe_equal(res.astype({"value1": "int64"}), df)
def test_fillna_series(s):
# fill na with another GeoSeries
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
# check na filled with the same index
res = s2.fillna(GeoSeries([Point(1, 1)] * 3))
assert_geoseries_equal(res, s)
# check na filled based on index, not position
index = [3, 2, 1]
res = s2.fillna(GeoSeries([Point(i, i) for i in index], index=index))
assert_geoseries_equal(res, s)
# check na filled but the input length is different
res = s2.fillna(GeoSeries([Point(1, 1)], index=[1]))
assert_geoseries_equal(res, s)
# check na filled but the inputting index is different
res = s2.fillna(GeoSeries([Point(1, 1)], index=[9]))
assert_geoseries_equal(res, s2)
def test_fillna_inplace(s):
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
arr = s2.array
s2.fillna(Point(1, 1), inplace=True)
assert_geoseries_equal(s2, s)
if compat.PANDAS_GE_21:
# starting from pandas 2.1, there is support to do this actually inplace
assert s2.array is arr
def test_dropna():
s2 = GeoSeries([Point(0, 0), None, Point(2, 2)])
res = s2.dropna()
exp = s2.loc[[0, 2]]
assert_geoseries_equal(res, exp)
@pytest.mark.parametrize("NA", [None, np.nan])
def test_isna(NA):
s2 = GeoSeries([Point(0, 0), NA, Point(2, 2)], index=[2, 4, 5], name="tt")
exp = pd.Series([False, True, False], index=[2, 4, 5], name="tt")
res = s2.isnull()
assert type(res) == pd.Series
assert_series_equal(res, exp)
res = s2.isna()
assert_series_equal(res, exp)
res = s2.notnull()
assert_series_equal(res, ~exp)
res = s2.notna()
assert_series_equal(res, ~exp)
# Any / all
def test_any_all():
empty = GeometryCollection([])
s = GeoSeries([empty, Point(1, 1)])
assert not s.all()
assert s.any()
s = GeoSeries([Point(1, 1), Point(1, 1)])
assert s.all()
assert s.any()
s = GeoSeries([empty, empty])
assert not s.all()
assert not s.any()
# Groupby / algos
def test_sort_values():
s = GeoSeries([Point(0, 0), Point(2, 2), Point(0, 2)])
res = s.sort_values()
assert res.index.tolist() == [0, 2, 1]
res2 = s.sort_values(ascending=False)
assert res2.index.tolist() == [1, 2, 0]
# empty geoseries
assert_geoseries_equal(s.iloc[:0].sort_values(), s.iloc[:0])
def test_sort_values_empty_missing():
s = GeoSeries([Point(0, 0), None, Point(), Point(1, 1)])
# default: NA sorts last, empty first
res = s.sort_values()
assert res.index.tolist() == [2, 0, 3, 1]
# descending: NA sorts last, empty last
res = s.sort_values(ascending=False)
assert res.index.tolist() == [3, 0, 2, 1]
# NAs first, empty first after NAs
res = s.sort_values(na_position="first")
assert res.index.tolist() == [1, 2, 0, 3]
# NAs first, descending with empyt last
res = s.sort_values(ascending=False, na_position="first")
assert res.index.tolist() == [1, 3, 0, 2]
# all missing / empty
s = GeoSeries([None, None, None])
res = s.sort_values()
assert res.index.tolist() == [0, 1, 2]
s = GeoSeries([Point(), Point(), Point()])
res = s.sort_values()
assert res.index.tolist() == [0, 1, 2]
s = GeoSeries([Point(), None, Point()])
res = s.sort_values()
assert res.index.tolist() == [0, 2, 1]
def test_unique():
s = GeoSeries([Point(0, 0), Point(0, 0), Point(2, 2)])
exp = from_shapely([Point(0, 0), Point(2, 2)])
# TODO should have specialized GeometryArray assert method
assert_array_equal(s.unique(), exp)
def pd14_compat_index(index):
if compat.PANDAS_GE_14:
return from_shapely(index)
else:
return index
def test_value_counts():
# each object is considered unique
s = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)])
res = s.value_counts()
if compat.PANDAS_GE_20:
name = "count"
else:
name = None
exp = pd.Series(
[2, 1], index=pd14_compat_index([Point(0, 0), Point(1, 1)]), name=name
)
assert_series_equal(res, exp)
# Check crs doesn't make a difference - note it is not kept in output index anyway
s2 = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)], crs="EPSG:4326")
res2 = s2.value_counts()
assert_series_equal(res2, exp)
if compat.PANDAS_GE_14:
# TODO should/ can we fix CRS being lost
assert s2.value_counts().index.array.crs is None
# check mixed geometry
s3 = GeoSeries([Point(0, 0), LineString([[1, 1], [2, 2]]), Point(0, 0)])
res3 = s3.value_counts()
index = pd14_compat_index([Point(0, 0), LineString([[1, 1], [2, 2]])])
exp3 = pd.Series([2, 1], index=index, name=name)
assert_series_equal(res3, exp3)
# check None is handled
s4 = GeoSeries([Point(0, 0), None, Point(0, 0)])
res4 = s4.value_counts(dropna=True)
exp4_dropna = pd.Series([2], index=pd14_compat_index([Point(0, 0)]), name=name)
assert_series_equal(res4, exp4_dropna)
exp4_keepna = pd.Series(
[2, 1], index=pd14_compat_index([Point(0, 0), None]), name=name
)
res4_keepna = s4.value_counts(dropna=False)
assert_series_equal(res4_keepna, exp4_keepna)
@pytest.mark.xfail(strict=False)
def test_drop_duplicates_series():
# duplicated does not yet use EA machinery
# (https://github.com/pandas-dev/pandas/issues/27264)
# but relies on unstable hashing of unhashable objects in numpy array
# giving flaky test (https://github.com/pandas-dev/pandas/issues/27035)
dups = GeoSeries([Point(0, 0), Point(0, 0)])
dropped = dups.drop_duplicates()
assert len(dropped) == 1
@pytest.mark.xfail(strict=False)
def test_drop_duplicates_frame():
# duplicated does not yet use EA machinery, see above
gdf_len = 3
dup_gdf = GeoDataFrame(
{"geometry": [Point(0, 0) for _ in range(gdf_len)], "value1": range(gdf_len)}
)
dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
assert len(dropped_geometry) == 1
dropped_all = dup_gdf.drop_duplicates()
assert len(dropped_all) == gdf_len
def test_groupby(df):
# counts work fine
res = df.groupby("value2").count()
exp = pd.DataFrame(
{"geometry": [2, 1], "value1": [2, 1], "value2": [1, 2]}
).set_index("value2")
assert_frame_equal(res, exp)
# reductions ignore geometry column
if not compat.PANDAS_GE_20:
res = df.groupby("value2").sum()
else:
res = df.groupby("value2").sum(numeric_only=True)
exp = pd.DataFrame({"value1": [2, 1], "value2": [1, 2]}, dtype="int64").set_index(
"value2"
)
assert_frame_equal(res, exp)
# applying on the geometry column
res = df.groupby("value2")["geometry"].apply(lambda x: x.union_all())
exp = GeoSeries(
[shapely.geometry.MultiPoint([(0, 0), (2, 2)]), Point(1, 1)],
index=pd.Index([1, 2], name="value2"),
name="geometry",
)
assert_series_equal(res, exp)
# apply on geometry column not resulting in new geometry
res = df.groupby("value2")["geometry"].apply(lambda x: x.union_all().area)
exp = pd.Series([0.0, 0.0], index=pd.Index([1, 2], name="value2"), name="geometry")
assert_series_equal(res, exp)
def test_groupby_groups(df):
g = df.groupby("value2")
res = g.get_group(1)
assert isinstance(res, GeoDataFrame)
exp = df.loc[[0, 2]]
assert_frame_equal(res, exp)
@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
@pytest.mark.parametrize("geometry_name", ["geometry", "geom"])
def test_groupby_metadata(crs, geometry_name):
if crs and not compat.HAS_PYPROJ:
pytest.skip("requires pyproj")
# https://github.com/geopandas/geopandas/issues/2294
df = GeoDataFrame(
{
geometry_name: [Point(0, 0), Point(1, 1), Point(0, 0)],
"value1": np.arange(3, dtype="int64"),
"value2": np.array([1, 2, 1], dtype="int64"),
},
crs=crs,
geometry=geometry_name,
)
kwargs = {}
if compat.PANDAS_GE_22:
# pandas is deprecating that the group key is present as column in the
# dataframe passed to `func`. To suppress this warning, it introduced
# a new include_groups keyword
kwargs = dict(include_groups=False)
# dummy test asserting we can access the crs
def func(group):
assert isinstance(group, GeoDataFrame)
assert group.crs == crs
df.groupby("value2").apply(func, **kwargs)
# selecting the non-group columns -> no need to pass the keyword
if (
compat.PANDAS_GE_22
or (compat.PANDAS_GE_20 and geometry_name == "geometry")
or not compat.PANDAS_GE_20
):
df.groupby("value2")[[geometry_name, "value1"]].apply(func)
else:
# https://github.com/geopandas/geopandas/pull/2966#issuecomment-1878816712
# with pandas 2.0 and 2.1 with geom col != geometry this is failing
with pytest.raises(AttributeError):
df.groupby("value2")[[geometry_name, "value1"]].apply(func)
# actual test with functionality
res = df.groupby("value2").apply(
lambda x: geopandas.sjoin(x, x[[geometry_name, "value1"]], how="inner"),
**kwargs,
)
expected = (
df.take([0, 0, 2, 2, 1])
.set_index("value2", drop=compat.PANDAS_GE_22, append=True)
.swaplevel()
.rename(columns={"value1": "value1_left"})
.assign(value1_right=[0, 2, 0, 2, 1])
)
assert_geodataframe_equal(res.drop(columns=["index_right"]), expected)
def test_apply(s):
# function that returns geometry preserves GeoSeries class
def geom_func(geom):
assert isinstance(geom, Point)
return geom
result = s.apply(geom_func)
assert isinstance(result, GeoSeries)
assert_geoseries_equal(result, s)
# function that returns non-geometry results in Series
def numeric_func(geom):
assert isinstance(geom, Point)
return geom.x
result = s.apply(numeric_func)
assert not isinstance(result, GeoSeries)
assert_series_equal(result, pd.Series([0.0, 1.0, 2.0]))
def test_apply_loc_len1(df):
# subset of len 1 with loc -> bug in pandas with inconsistent Block ndim
# resulting in bug in apply
# https://github.com/geopandas/geopandas/issues/1078
subset = df.loc[[0], "geometry"]
result = subset.apply(lambda geom: geom.is_empty)
expected = subset.is_empty
np.testing.assert_allclose(result, expected)
@pytest.mark.skipif(compat.PANDAS_GE_30, reason="convert_dtype is removed in pandas 3")
def test_apply_convert_dtypes_keyword(s):
# ensure the convert_dtypes keyword is accepted
if not compat.PANDAS_GE_21:
recorder = warnings.catch_warnings(record=True)
else:
recorder = pytest.warns()
with recorder as record:
res = s.apply(lambda x: x, convert_dtype=True, args=())
assert_geoseries_equal(res, s)
if compat.PANDAS_GE_21:
assert len(record) == 1
assert "the convert_dtype parameter" in str(record[0].message)
else:
assert len(record) == 0
@pytest.mark.parametrize("crs", [None, "EPSG:4326"])
def test_apply_no_geometry_result(df, crs):
if crs:
if not compat.HAS_PYPROJ:
pytest.skip("requires pyproj")
df = df.set_crs(crs)
result = df.apply(lambda col: col.astype(str), axis=0)
assert type(result) is pd.DataFrame
expected = df.astype(str)
assert_frame_equal(result, expected)
result = df.apply(lambda col: col.astype(str), axis=1)
assert type(result) is pd.DataFrame
assert_frame_equal(result, expected)
def test_apply_preserves_geom_col_name(df):
df = df.rename_geometry("geom")
result = df.apply(lambda col: col, axis=0)
assert result.geometry.name == "geom"
def test_df_apply_returning_series(df):
# https://github.com/geopandas/geopandas/issues/2283
result = df.apply(lambda row: row.geometry, axis=1)
assert_geoseries_equal(result, df.geometry, check_crs=False)
result = df.apply(lambda row: row.value1, axis=1)
assert_series_equal(result, df["value1"].rename(None))
# https://github.com/geopandas/geopandas/issues/2480
result = df.apply(lambda x: float("NaN"), axis=1)
assert result.dtype == "float64"
# assert list of nones is not promoted to GeometryDtype
result = df.apply(lambda x: None, axis=1)
assert result.dtype == "object"
# https://github.com/geopandas/geopandas/issues/2889
# contrived case such that `from_shapely` receives an array of geodataframes
res = df.apply(lambda row: df.geometry.to_frame(), axis=1)
assert res.dtype == "object"
def test_df_apply_geometry_dtypes(df):
# https://github.com/geopandas/geopandas/issues/1852
apply_types = []
def get_dtypes(srs):
apply_types.append((srs.name, type(srs)))
df["geom2"] = df.geometry
df.apply(get_dtypes)
expected = [
("geometry", GeoSeries),
("value1", pd.Series),
("value2", pd.Series),
("geom2", GeoSeries),
]
assert apply_types == expected
def test_pivot(df):
# https://github.com/geopandas/geopandas/issues/2057
# pivot failing due to creating a MultiIndex
result = df.pivot(columns="value1")
expected = GeoDataFrame(pd.DataFrame(df).pivot(columns="value1"))
assert_geodataframe_equal(result, expected)
def test_preserve_attrs(df):
# https://github.com/geopandas/geopandas/issues/1654
df.attrs["name"] = "my_name"
attrs = {"name": "my_name"}
assert df.attrs == attrs
# preserve attrs in indexing operations
for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
assert df.attrs == attrs
# preserve attrs in methods
df2 = df.reset_index()
assert df2.attrs == attrs
# https://github.com/geopandas/geopandas/issues/1875
df3 = df2.explode(index_parts=True)
assert df3.attrs == attrs
def test_preserve_flags(df):
# https://github.com/geopandas/geopandas/issues/1654
df = df.set_flags(allows_duplicate_labels=False)
assert df.flags.allows_duplicate_labels is False
# preserve flags in indexing operations
for subset in [df[:2], df[df["value1"] > 2], df[["value2", "geometry"]]]:
assert df.flags.allows_duplicate_labels is False
# preserve attrs in methods
df2 = df.reset_index()
assert df2.flags.allows_duplicate_labels is False
# it is honored for operations that introduce duplicate labels
with pytest.raises(ValueError):
df.reindex([0, 0, 1])
with pytest.raises(ValueError):
df[["value1", "value1", "geometry"]]
with pytest.raises(ValueError):
pd.concat([df, df])
def test_ufunc():
# this is calling a shapely ufunc, but we currently rely on pandas' implementation
# of `__array_ufunc__` to wrap the result back into a GeoSeries
ser = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
result = shapely.buffer(ser, 2)
assert isinstance(result, GeoSeries)
# ensure the result is still writeable
# (https://github.com/geopandas/geopandas/issues/3178)
assert result.array._data.flags.writeable
result.loc[0] = Point(10, 10)
assert result.iloc[0] == Point(10, 10)

View File

@@ -1,51 +0,0 @@
from geopandas.tools._show_versions import (
_get_C_info,
_get_deps_info,
_get_sys_info,
show_versions,
)
def test_get_sys_info():
sys_info = _get_sys_info()
assert "python" in sys_info
assert "executable" in sys_info
assert "machine" in sys_info
def test_get_c_info():
C_info = _get_C_info()
assert "GEOS" in C_info
assert "GEOS lib" in C_info
assert "GDAL" in C_info
assert "GDAL data dir" in C_info
assert "PROJ" in C_info
assert "PROJ data dir" in C_info
def test_get_deps_info():
deps_info = _get_deps_info()
assert "geopandas" in deps_info
assert "pandas" in deps_info
assert "fiona" in deps_info
assert "numpy" in deps_info
assert "shapely" in deps_info
assert "pyproj" in deps_info
assert "matplotlib" in deps_info
assert "mapclassify" in deps_info
assert "geopy" in deps_info
assert "psycopg" in deps_info
assert "psycopg2" in deps_info
assert "geoalchemy2" in deps_info
def test_show_versions(capsys):
show_versions()
out, err = capsys.readouterr()
assert "python" in out
assert "GEOS" in out
assert "geopandas" in out

View File

@@ -1,959 +0,0 @@
from math import sqrt
import numpy as np
import shapely
from shapely.geometry import (
GeometryCollection,
LineString,
MultiPolygon,
Point,
Polygon,
box,
)
import geopandas
from geopandas import GeoDataFrame, GeoSeries, read_file
from geopandas import _compat as compat
import pytest
from numpy.testing import assert_array_equal
class TestSeriesSindex:
def test_has_sindex(self):
"""Test the has_sindex method."""
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
d = GeoDataFrame({"geom": [t1, t2]}, geometry="geom")
assert not d.has_sindex
d.sindex
assert d.has_sindex
d.geometry.values._sindex = None
assert not d.has_sindex
d.sindex
assert d.has_sindex
s = GeoSeries([t1, t2])
assert not s.has_sindex
s.sindex
assert s.has_sindex
s.values._sindex = None
assert not s.has_sindex
s.sindex
assert s.has_sindex
def test_empty_geoseries(self):
"""Tests creating a spatial index from an empty GeoSeries."""
s = GeoSeries(dtype=object)
assert not s.sindex
assert len(s.sindex) == 0
def test_point(self):
s = GeoSeries([Point(0, 0)])
assert s.sindex.size == 1
hits = s.sindex.intersection((-1, -1, 1, 1))
assert len(list(hits)) == 1
hits = s.sindex.intersection((-2, -2, -1, -1))
assert len(list(hits)) == 0
def test_empty_point(self):
"""Tests that a single empty Point results in an empty tree."""
s = GeoSeries([Point()])
assert not s.sindex
assert len(s.sindex) == 0
def test_polygons(self):
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
s = GeoSeries([t1, t2, sq])
assert s.sindex.size == 3
@pytest.mark.filterwarnings("ignore:The series.append method is deprecated")
@pytest.mark.skipif(compat.PANDAS_GE_20, reason="append removed in pandas 2.0")
def test_polygons_append(self):
t1 = Polygon([(0, 0), (1, 0), (1, 1)])
t2 = Polygon([(0, 0), (1, 1), (0, 1)])
sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
s = GeoSeries([t1, t2, sq])
t = GeoSeries([t1, t2, sq], [3, 4, 5])
s = s.append(t)
assert len(s) == 6
assert s.sindex.size == 6
def test_lazy_build(self):
s = GeoSeries([Point(0, 0)])
assert s.values._sindex is None
assert s.sindex.size == 1
assert s.values._sindex is not None
def test_rebuild_on_item_change(self):
s = GeoSeries([Point(0, 0)])
original_index = s.sindex
s.iloc[0] = Point(0, 0)
assert s.sindex is not original_index
def test_rebuild_on_slice(self):
s = GeoSeries([Point(0, 0), Point(0, 0)])
original_index = s.sindex
# Select a couple of rows
sliced = s.iloc[:1]
assert sliced.sindex is not original_index
# Select all rows
sliced = s.iloc[:]
assert sliced.sindex is original_index
# Select all rows and flip
sliced = s.iloc[::-1]
assert sliced.sindex is not original_index
class TestFrameSindex:
def setup_method(self):
data = {
"A": range(5),
"B": range(-5, 0),
"geom": [Point(x, y) for x, y in zip(range(5), range(5))],
}
self.df = GeoDataFrame(data, geometry="geom")
def test_sindex(self):
self.df.crs = "epsg:4326"
assert self.df.sindex.size == 5
hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4)))
assert len(hits) == 2
assert hits[0] == 3
def test_lazy_build(self):
assert self.df.geometry.values._sindex is None
assert self.df.sindex.size == 5
assert self.df.geometry.values._sindex is not None
def test_sindex_rebuild_on_set_geometry(self):
# First build the sindex
assert self.df.sindex is not None
original_index = self.df.sindex
self.df.set_geometry(
[Point(x, y) for x, y in zip(range(5, 10), range(5, 10))], inplace=True
)
assert self.df.sindex is not original_index
def test_rebuild_on_row_slice(self):
# Select a subset of rows rebuilds
original_index = self.df.sindex
sliced = self.df.iloc[:1]
assert sliced.sindex is not original_index
# Slicing all does not rebuild
original_index = self.df.sindex
sliced = self.df.iloc[:]
assert sliced.sindex is original_index
# Re-ordering rebuilds
sliced = self.df.iloc[::-1]
assert sliced.sindex is not original_index
def test_rebuild_on_single_col_selection(self):
"""Selecting a single column should not rebuild the spatial index."""
# Selecting geometry column preserves the index
original_index = self.df.sindex
geometry_col = self.df["geom"]
assert geometry_col.sindex is original_index
geometry_col = self.df.geometry
assert geometry_col.sindex is original_index
def test_rebuild_on_multiple_col_selection(self):
"""Selecting a subset of columns preserves the index."""
original_index = self.df.sindex
# Selecting a subset of columns preserves the index for pandas < 2.0
# with pandas 2.0, the column is now copied, losing the index. But
# with pandas >= 3.0 and Copy-on-Write this is preserved again
subset1 = self.df[["geom", "A"]]
if compat.PANDAS_GE_20 and not compat.PANDAS_GE_30:
assert subset1.sindex is not original_index
else:
assert subset1.sindex is original_index
subset2 = self.df[["A", "geom"]]
if compat.PANDAS_GE_20 and not compat.PANDAS_GE_30:
assert subset2.sindex is not original_index
else:
assert subset2.sindex is original_index
def test_rebuild_on_update_inplace(self):
gdf = self.df.copy()
old_sindex = gdf.sindex
# sorting in place
gdf.sort_values("A", ascending=False, inplace=True)
# spatial index should be invalidated
assert not gdf.has_sindex
new_sindex = gdf.sindex
# and should be different
assert new_sindex is not old_sindex
# sorting should still have happened though
assert gdf.index.tolist() == [4, 3, 2, 1, 0]
def test_update_inplace_no_rebuild(self):
gdf = self.df.copy()
old_sindex = gdf.sindex
gdf.rename(columns={"A": "AA"}, inplace=True)
# a rename shouldn't invalidate the index
assert gdf.has_sindex
# and the "new" should be the same
new_sindex = gdf.sindex
assert old_sindex is new_sindex
# Skip to accommodate Shapely geometries being unhashable # TODO unskip?
@pytest.mark.skip
@pytest.mark.usefixtures("_setup_class_nybb_filename")
class TestJoinSindex:
def setup_method(self):
self.boros = read_file(self.nybb_filename)
def test_merge_geo(self):
# First check that we gets hits from the boros frame.
tree = self.boros.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = [self.boros.iloc[hit]["BoroName"] for hit in hits]
assert res == ["Bronx", "Queens"]
# Check that we only get the Bronx from this view.
first = self.boros[self.boros["BoroCode"] < 3]
tree = first.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = [first.iloc[hit]["BoroName"] for hit in hits]
assert res == ["Bronx"]
# Check that we only get Queens from this view.
second = self.boros[self.boros["BoroCode"] >= 3]
tree = second.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = ([second.iloc[hit]["BoroName"] for hit in hits],)
assert res == ["Queens"]
# Get both the Bronx and Queens again.
merged = first.merge(second, how="outer")
assert len(merged) == 5
assert merged.sindex.size == 5
tree = merged.sindex
hits = tree.intersection((1012821.80, 229228.26))
res = [merged.iloc[hit]["BoroName"] for hit in hits]
assert res == ["Bronx", "Queens"]
class TestShapelyInterface:
def setup_method(self):
data = {
"geom": [Point(x, y) for x, y in zip(range(5), range(5))]
+ [box(10, 10, 20, 20)] # include a box geometry
}
self.df = GeoDataFrame(data, geometry="geom")
self.expected_size = len(data["geom"])
# --------------------------- `intersection` tests -------------------------- #
@pytest.mark.parametrize(
"test_geom, expected",
(
((-1, -1, -0.5, -0.5), []),
((-0.5, -0.5, 0.5, 0.5), [0]),
((0, 0, 1, 1), [0, 1]),
((0, 0), [0]),
),
)
def test_intersection_bounds_tuple(self, test_geom, expected):
"""Tests the `intersection` method with valid inputs."""
res = list(self.df.sindex.intersection(test_geom))
assert_array_equal(res, expected)
@pytest.mark.parametrize("test_geom", ((-1, -1, -0.5), -0.5, None, Point(0, 0)))
def test_intersection_invalid_bounds_tuple(self, test_geom):
"""Tests the `intersection` method with invalid inputs."""
with pytest.raises(TypeError):
# we raise a useful TypeError
self.df.sindex.intersection(test_geom)
# ------------------------------ `query` tests ------------------------------ #
@pytest.mark.parametrize(
"predicate, test_geom, expected",
(
(None, box(-1, -1, -0.5, -0.5), []), # bbox does not intersect
(None, box(-0.5, -0.5, 0.5, 0.5), [0]), # bbox intersects
(None, box(0, 0, 1, 1), [0, 1]), # bbox intersects multiple
(
None,
LineString([(0, 1), (1, 0)]),
[0, 1],
), # bbox intersects but not geometry
("intersects", box(-1, -1, -0.5, -0.5), []), # bbox does not intersect
(
"intersects",
box(-0.5, -0.5, 0.5, 0.5),
[0],
), # bbox and geometry intersect
(
"intersects",
box(0, 0, 1, 1),
[0, 1],
), # bbox and geometry intersect multiple
(
"intersects",
LineString([(0, 1), (1, 0)]),
[],
), # bbox intersects but not geometry
("within", box(0.25, 0.28, 0.75, 0.75), []), # does not intersect
("within", box(0, 0, 10, 10), []), # intersects but is not within
("within", box(11, 11, 12, 12), [5]), # intersects and is within
("within", LineString([(0, 1), (1, 0)]), []), # intersects but not within
("contains", box(0, 0, 1, 1), []), # intersects but does not contain
("contains", box(0, 0, 1.001, 1.001), [1]), # intersects and contains
("contains", box(0.5, 0.5, 1.5, 1.5), [1]), # intersects and contains
("contains", box(-1, -1, 2, 2), [0, 1]), # intersects and contains multiple
(
"contains",
LineString([(0, 1), (1, 0)]),
[],
), # intersects but not contains
("touches", box(-1, -1, 0, 0), [0]), # bbox intersects and touches
(
"touches",
box(-0.5, -0.5, 1.5, 1.5),
[],
), # bbox intersects but geom does not touch
(
"contains",
box(10, 10, 20, 20),
[5],
), # contains but does not contains_properly
(
"covers",
box(-0.5, -0.5, 1, 1),
[0, 1],
), # covers (0, 0) and (1, 1)
(
"covers",
box(0.001, 0.001, 0.99, 0.99),
[],
), # does not cover any
(
"covers",
box(0, 0, 1, 1),
[0, 1],
), # covers but does not contain
(
"contains_properly",
box(0, 0, 1, 1),
[],
), # intersects but does not contain
(
"contains_properly",
box(0, 0, 1.001, 1.001),
[1],
), # intersects 2 and contains 1
(
"contains_properly",
box(0.5, 0.5, 1.001, 1.001),
[1],
), # intersects 1 and contains 1
(
"contains_properly",
box(0.5, 0.5, 1.5, 1.5),
[1],
), # intersects and contains
(
"contains_properly",
box(-1, -1, 2, 2),
[0, 1],
), # intersects and contains multiple
(
"contains_properly",
box(10, 10, 20, 20),
[],
), # contains but does not contains_properly
),
)
def test_query(self, predicate, test_geom, expected):
"""Tests the `query` method with valid inputs and valid predicates."""
res = self.df.sindex.query(test_geom, predicate=predicate)
assert_array_equal(res, expected)
def test_query_invalid_geometry(self):
"""Tests the `query` method with invalid geometry."""
with pytest.raises(TypeError):
self.df.sindex.query("notavalidgeom")
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="Requires GEOS 3.10")
@pytest.mark.parametrize(
"distance, test_geom, expected",
(
# bounds don't intersect and not within distance=0
(
0,
box(9.0, 9.0, 9.9, 9.9),
[],
),
# bounds don't intersect but is within distance=1
(
1,
box(9.0, 9.0, 9.9, 9.9),
[5],
),
# within 1-D absolute distance in both axes, but not euclidean distance
(
0.5,
Point(0.5, 0.5),
[],
),
# same as before but within euclidean distance
(
sqrt(2 * 0.5**2) + 1e-9,
Point(0.5, 0.5),
[0, 1],
),
# less than euclidean distance between points, multi-object
(
sqrt(2) - 1e-9,
[
Polygon([(0, 0), (1, 0), (1, 1)]),
Polygon([(1, 1), (2, 1), (2, 2)]),
], # multi-object test
[[0, 0, 1, 1], [0, 1, 1, 2]],
),
# more than euclidean distance between points, multi-object
(
sqrt(2) + 1e-9,
[
Polygon([(0, 0), (1, 0), (1, 1)]),
Polygon([(1, 1), (2, 1), (2, 2)]),
],
[[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]],
),
# distance is array-like, broadcastable to geometry
(
[2, 10],
[Point(0.5, 0.5), Point(1, 1)],
[[0, 0, 1, 1, 1, 1, 1], [0, 1, 0, 1, 2, 3, 4]],
),
),
)
def test_query_dwithin(self, distance, test_geom, expected):
"""Tests the `query` method with predicates that require keyword arguments."""
res = self.df.sindex.query(test_geom, predicate="dwithin", distance=distance)
assert_array_equal(res, expected)
@pytest.mark.skipif(not compat.GEOS_GE_310, reason="Requires GEOS 3.10")
def test_dwithin_no_distance(self):
"""Tests the `query` method with keyword arguments that are
invalid for certain predicates."""
with pytest.raises(
ValueError, match="'distance' parameter is required for 'dwithin' predicate"
):
self.df.sindex.query(Point(0, 0), predicate="dwithin")
@pytest.mark.parametrize(
"predicate",
[
None,
"contains",
"contains_properly",
"covered_by",
"covers",
"crosses",
"intersects",
"overlaps",
"touches",
"within",
],
)
def test_query_distance_invalid(self, predicate):
"""Tests the `query` method with keyword arguments that are
invalid for certain predicates."""
msg = "'distance' parameter is only supported in combination with 'dwithin'"
with pytest.raises(ValueError, match=msg):
self.df.sindex.query(Point(0, 0), predicate=predicate, distance=0)
@pytest.mark.skipif(
compat.GEOS_GE_310, reason="Test for 'dwithin'-incompatible versions of GEOS"
)
def test_dwithin_requirements(self):
"""Tests whether a ValueError is raised when trying to use dwithin with
incompatible versions of shapely or pyGEOS
"""
with pytest.raises(
ValueError, match="predicate = 'dwithin' requires GEOS >= 3.10.0"
):
self.df.sindex.query(Point(0, 0), predicate="dwithin", distance=0)
@pytest.mark.parametrize(
"test_geom, expected_value",
[
(None, []),
(GeometryCollection(), []),
(Point(), []),
(MultiPolygon(), []),
(Polygon(), []),
],
)
def test_query_empty_geometry(self, test_geom, expected_value):
"""Tests the `query` method with empty geometry."""
res = self.df.sindex.query(test_geom)
assert_array_equal(res, expected_value)
def test_query_invalid_predicate(self):
"""Tests the `query` method with invalid predicates."""
test_geom = box(-1, -1, -0.5, -0.5)
with pytest.raises(ValueError):
self.df.sindex.query(test_geom, predicate="test")
@pytest.mark.parametrize(
"sort, expected",
(
(True, [[0, 0, 0], [0, 1, 2]]),
# False could be anything, at least we'll know if it changes
(False, [[0, 0, 0], [0, 1, 2]]),
),
)
def test_query_sorting(self, sort, expected):
"""Check that results from `query` don't depend on the
order of geometries.
"""
# these geometries come from a reported issue:
# https://github.com/geopandas/geopandas/issues/1337
# there is no theoretical reason they were chosen
test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
tree_polys = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
expected = [0, 1, 2]
test_geo = test_polys.values[0]
res = tree_polys.sindex.query(test_geo, sort=sort)
# asserting the same elements
assert sorted(res) == sorted(expected)
# asserting the exact array can fail if sort=False
try:
assert_array_equal(res, expected)
except AssertionError as e:
if sort is False:
pytest.xfail(
"rtree results are known to be unordered, see "
"https://github.com/geopandas/geopandas/issues/1337\n"
"Expected:\n {}\n".format(expected)
+ "Got:\n {}\n".format(res.tolist())
)
raise e
# ------------------------- `query_bulk` tests -------------------------- #
@pytest.mark.parametrize(
"predicate, test_geom, expected",
(
(None, [(-1, -1, -0.5, -0.5)], [[], []]),
(None, [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
(None, [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
("intersects", [(-1, -1, -0.5, -0.5)], [[], []]),
("intersects", [(-0.5, -0.5, 0.5, 0.5)], [[0], [0]]),
("intersects", [(0, 0, 1, 1)], [[0, 0], [0, 1]]),
# only second geom intersects
("intersects", [(-1, -1, -0.5, -0.5), (-0.5, -0.5, 0.5, 0.5)], [[1], [0]]),
# both geoms intersect
(
"intersects",
[(-1, -1, 1, 1), (-0.5, -0.5, 0.5, 0.5)],
[[0, 0, 1], [0, 1, 0]],
),
("within", [(0.25, 0.28, 0.75, 0.75)], [[], []]), # does not intersect
("within", [(0, 0, 10, 10)], [[], []]), # intersects but is not within
("within", [(11, 11, 12, 12)], [[0], [5]]), # intersects and is within
(
"contains",
[(0, 0, 1, 1)],
[[], []],
), # intersects and covers, but does not contain
(
"contains",
[(0, 0, 1.001, 1.001)],
[[0], [1]],
), # intersects 2 and contains 1
(
"contains",
[(0.5, 0.5, 1.001, 1.001)],
[[0], [1]],
), # intersects 1 and contains 1
("contains", [(0.5, 0.5, 1.5, 1.5)], [[0], [1]]), # intersects and contains
(
"contains",
[(-1, -1, 2, 2)],
[[0, 0], [0, 1]],
), # intersects and contains multiple
(
"contains",
[(10, 10, 20, 20)],
[[0], [5]],
), # contains but does not contains_properly
("touches", [(-1, -1, 0, 0)], [[0], [0]]), # bbox intersects and touches
(
"touches",
[(-0.5, -0.5, 1.5, 1.5)],
[[], []],
), # bbox intersects but geom does not touch
(
"covers",
[(-0.5, -0.5, 1, 1)],
[[0, 0], [0, 1]],
), # covers (0, 0) and (1, 1)
(
"covers",
[(0.001, 0.001, 0.99, 0.99)],
[[], []],
), # does not cover any
(
"covers",
[(0, 0, 1, 1)],
[[0, 0], [0, 1]],
), # covers but does not contain
(
"contains_properly",
[(0, 0, 1, 1)],
[[], []],
), # intersects but does not contain
(
"contains_properly",
[(0, 0, 1.001, 1.001)],
[[0], [1]],
), # intersects 2 and contains 1
(
"contains_properly",
[(0.5, 0.5, 1.001, 1.001)],
[[0], [1]],
), # intersects 1 and contains 1
(
"contains_properly",
[(0.5, 0.5, 1.5, 1.5)],
[[0], [1]],
), # intersects and contains
(
"contains_properly",
[(-1, -1, 2, 2)],
[[0, 0], [0, 1]],
), # intersects and contains multiple
(
"contains_properly",
[(10, 10, 20, 20)],
[[], []],
), # contains but does not contains_properly
),
)
def test_query_bulk(self, predicate, test_geom, expected):
"""Tests the `query` method with valid
inputs and valid predicates.
"""
res = self.df.sindex.query(
[box(*geom) for geom in test_geom], predicate=predicate
)
assert_array_equal(res, expected)
@pytest.mark.parametrize(
"test_geoms, expected_value",
[
# single empty geometry
([GeometryCollection()], [[], []]),
# None should be skipped
([GeometryCollection(), None], [[], []]),
([None], [[], []]),
([None, box(-0.5, -0.5, 0.5, 0.5), None], [[1], [0]]),
],
)
def test_query_bulk_empty_geometry(self, test_geoms, expected_value):
"""Tests the `query` method with an empty geometries."""
res = self.df.sindex.query(test_geoms)
assert_array_equal(res, expected_value)
def test_query_bulk_empty_input_array(self):
"""Tests the `query` method with an empty input array."""
test_array = np.array([], dtype=object)
expected_value = [[], []]
res = self.df.sindex.query(test_array)
assert_array_equal(res, expected_value)
def test_query_bulk_invalid_input_geometry(self):
"""
Tests the `query` method with invalid input for the `geometry` parameter.
"""
test_array = "notanarray"
with pytest.raises(TypeError):
self.df.sindex.query(test_array)
def test_query_bulk_invalid_predicate(self):
"""Tests the `query` method with invalid predicates."""
test_geom_bounds = (-1, -1, -0.5, -0.5)
test_predicate = "test"
with pytest.raises(ValueError):
self.df.sindex.query([box(*test_geom_bounds)], predicate=test_predicate)
@pytest.mark.parametrize(
"predicate, test_geom, expected",
(
(None, (-1, -1, -0.5, -0.5), [[], []]),
("intersects", (-1, -1, -0.5, -0.5), [[], []]),
("contains", (-1, -1, 1, 1), [[0], [0]]),
),
)
def test_query_bulk_input_type(self, predicate, test_geom, expected):
"""Tests that query can accept a GeoSeries, GeometryArray or
numpy array.
"""
# pass through GeoSeries to test input type
test_geom = geopandas.GeoSeries([box(*test_geom)], index=["0"])
# test GeoSeries
res = self.df.sindex.query(test_geom, predicate=predicate)
assert_array_equal(res, expected)
# test GeometryArray
res = self.df.sindex.query(test_geom.geometry, predicate=predicate)
assert_array_equal(res, expected)
res = self.df.sindex.query(test_geom.geometry.values, predicate=predicate)
assert_array_equal(res, expected)
# test numpy array
res = self.df.sindex.query(
test_geom.geometry.values.to_numpy(), predicate=predicate
)
assert_array_equal(res, expected)
res = self.df.sindex.query(
test_geom.geometry.values.to_numpy(), predicate=predicate
)
assert_array_equal(res, expected)
@pytest.mark.parametrize(
"sort, expected",
(
(True, [[0, 0, 0], [0, 1, 2]]),
# False could be anything, at least we'll know if it changes
(False, [[0, 0, 0], [0, 1, 2]]),
),
)
def test_query_bulk_sorting(self, sort, expected):
"""Check that results from `query` don't depend
on the order of geometries.
"""
# these geometries come from a reported issue:
# https://github.com/geopandas/geopandas/issues/1337
# there is no theoretical reason they were chosen
test_polys = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)])])
tree_polys = GeoSeries(
[
Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
]
)
res = tree_polys.sindex.query(test_polys, sort=sort)
# asserting the same elements
assert sorted(res[0]) == sorted(expected[0])
assert sorted(res[1]) == sorted(expected[1])
# asserting the exact array can fail if sort=False
try:
assert_array_equal(res, expected)
except AssertionError as e:
if sort is False:
pytest.xfail(
"rtree results are known to be unordered, see "
"https://github.com/geopandas/geopandas/issues/1337\n"
"Expected:\n {}\n".format(expected)
+ "Got:\n {}\n".format(res.tolist())
)
raise e
# ------------------------- `nearest` tests ------------------------- #
@pytest.mark.parametrize("return_all", [True, False])
@pytest.mark.parametrize(
"geometry,expected",
[
([0.25, 0.25], [[0], [0]]),
([0.75, 0.75], [[0], [1]]),
],
)
def test_nearest_single(self, geometry, expected, return_all):
geoms = shapely.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
p = Point(geometry)
res = df.sindex.nearest(p, return_all=return_all)
assert_array_equal(res, expected)
p = shapely.points(geometry)
res = df.sindex.nearest(p, return_all=return_all)
assert_array_equal(res, expected)
@pytest.mark.parametrize("return_all", [True, False])
@pytest.mark.parametrize(
"geometry,expected",
[
([(1, 1), (0, 0)], [[0, 1], [1, 0]]),
([(1, 1), (0.25, 1)], [[0, 1], [1, 1]]),
],
)
def test_nearest_multi(self, geometry, expected, return_all):
geoms = shapely.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
ps = [Point(p) for p in geometry]
res = df.sindex.nearest(ps, return_all=return_all)
assert_array_equal(res, expected)
ps = shapely.points(geometry)
res = df.sindex.nearest(ps, return_all=return_all)
assert_array_equal(res, expected)
s = geopandas.GeoSeries(ps)
res = df.sindex.nearest(s, return_all=return_all)
assert_array_equal(res, expected)
x, y = zip(*geometry)
ga = geopandas.points_from_xy(x, y)
res = df.sindex.nearest(ga, return_all=return_all)
assert_array_equal(res, expected)
@pytest.mark.parametrize("return_all", [True, False])
@pytest.mark.parametrize(
"geometry,expected",
[
(None, [[], []]),
([None], [[], []]),
],
)
def test_nearest_none(self, geometry, expected, return_all):
geoms = shapely.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
res = df.sindex.nearest(geometry, return_all=return_all)
assert_array_equal(res, expected)
@pytest.mark.parametrize("return_distance", [True, False])
@pytest.mark.parametrize(
"return_all,max_distance,expected",
[
(True, None, ([[0, 0, 1], [0, 1, 5]], [sqrt(0.5), sqrt(0.5), sqrt(50)])),
(False, None, ([[0, 1], [0, 5]], [sqrt(0.5), sqrt(50)])),
(True, 1, ([[0, 0], [0, 1]], [sqrt(0.5), sqrt(0.5)])),
(False, 1, ([[0], [0]], [sqrt(0.5)])),
],
)
def test_nearest_max_distance(
self, expected, max_distance, return_all, return_distance
):
geoms = shapely.points(np.arange(10), np.arange(10))
df = geopandas.GeoDataFrame({"geometry": geoms})
ps = [Point(0.5, 0.5), Point(0, 10)]
res = df.sindex.nearest(
ps,
return_all=return_all,
max_distance=max_distance,
return_distance=return_distance,
)
if return_distance:
assert_array_equal(res[0], expected[0])
assert_array_equal(res[1], expected[1])
else:
assert_array_equal(res, expected[0])
@pytest.mark.parametrize("return_distance", [True, False])
@pytest.mark.parametrize(
"return_all,max_distance,exclusive,expected",
[
(False, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
(False, None, True, ([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3]], 5 * [sqrt(2)])),
(True, None, False, ([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], 5 * [0])),
(
True,
None,
True,
([[0, 1, 1, 2, 2, 3, 3, 4], [1, 0, 2, 1, 3, 2, 4, 3]], 8 * [sqrt(2)]),
),
(False, 1.1, True, ([[1, 2, 5], [5, 5, 1]], 3 * [1])),
(True, 1.1, True, ([[1, 2, 5, 5], [5, 5, 1, 2]], 4 * [1])),
],
)
def test_nearest_exclusive(
self, expected, max_distance, return_all, return_distance, exclusive
):
geoms = shapely.points(np.arange(5), np.arange(5))
if max_distance:
# add a non grid point
geoms = np.append(geoms, [Point(1, 2)])
df = geopandas.GeoDataFrame({"geometry": geoms})
ps = geoms
res = df.sindex.nearest(
ps,
return_all=return_all,
max_distance=max_distance,
return_distance=return_distance,
exclusive=exclusive,
)
if return_distance:
assert_array_equal(res[0], expected[0])
assert_array_equal(res[1], expected[1])
else:
assert_array_equal(res, expected[0])
# --------------------------- misc tests ---------------------------- #
def test_empty_tree_geometries(self):
"""Tests building sindex with interleaved empty geometries."""
geoms = [Point(0, 0), None, Point(), Point(1, 1), Point()]
df = geopandas.GeoDataFrame(geometry=geoms)
assert df.sindex.query(Point(1, 1))[0] == 3
def test_size(self):
"""Tests the `size` property."""
assert self.df.sindex.size == self.expected_size
def test_len(self):
"""Tests the `__len__` method of spatial indexes."""
assert len(self.df.sindex) == self.expected_size
def test_is_empty(self):
"""Tests the `is_empty` property."""
# create empty tree
empty = geopandas.GeoSeries([], dtype=object)
assert empty.sindex.is_empty
empty = geopandas.GeoSeries([None])
assert empty.sindex.is_empty
empty = geopandas.GeoSeries([Point()])
assert empty.sindex.is_empty
# create a non-empty tree
non_empty = geopandas.GeoSeries([Point(0, 0)])
assert not non_empty.sindex.is_empty
@pytest.mark.parametrize(
"predicate, expected_shape",
[
(None, (2, 471)),
("intersects", (2, 213)),
("within", (2, 213)),
("contains", (2, 0)),
("overlaps", (2, 0)),
("crosses", (2, 0)),
("touches", (2, 0)),
],
)
def test_integration_natural_earth(
self, predicate, expected_shape, naturalearth_lowres, naturalearth_cities
):
"""Tests output sizes for the naturalearth datasets."""
world = read_file(naturalearth_lowres)
capitals = read_file(naturalearth_cities)
res = world.sindex.query(capitals.geometry, predicate)
assert res.shape == expected_shape

View File

@@ -1,186 +0,0 @@
import warnings
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from shapely.geometry import Point, Polygon
from geopandas import GeoDataFrame, GeoSeries
from geopandas._compat import HAS_PYPROJ
from geopandas.array import from_shapely
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
s1 = GeoSeries(
[
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s2 = GeoSeries(
[
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s3 = Series(
[
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
a = from_shapely(
[
Polygon([(0, 2), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
s4 = Series(a)
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1})
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s2})
s4 = s1.copy()
s4.array.crs = 4326
s5 = s2.copy()
s5.array.crs = 27700
s6 = GeoSeries(
[
Polygon([(0, 3), (0, 0), (2, 0), (2, 2)]),
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)]),
]
)
df4 = GeoDataFrame(
{"col1": [1, 2], "geometry": s1.copy(), "geom2": s4.copy(), "geom3": s5.copy()},
crs=3857,
)
df5 = GeoDataFrame(
{"col1": [1, 2], "geometry": s1.copy(), "geom3": s5.copy(), "geom2": s4.copy()},
crs=3857,
)
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_geoseries():
assert_geoseries_equal(s1, s2)
assert_geoseries_equal(s1, s3, check_series_type=False, check_dtype=False)
assert_geoseries_equal(s3, s2, check_series_type=False, check_dtype=False)
assert_geoseries_equal(s1, s4, check_series_type=False)
with pytest.raises(AssertionError) as error:
assert_geoseries_equal(s1, s2, check_less_precise=True)
assert "1 out of 2 geometries are not almost equal" in str(error.value)
assert "not almost equal: [0]" in str(error.value)
with pytest.raises(AssertionError) as error:
assert_geoseries_equal(s2, s6, check_less_precise=False)
assert "1 out of 2 geometries are not equal" in str(error.value)
assert "not equal: [0]" in str(error.value)
def test_geodataframe():
assert_geodataframe_equal(df1, df2)
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df2, check_less_precise=True)
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df2[["geometry", "col1"]])
assert_geodataframe_equal(df1, df2[["geometry", "col1"]], check_like=True)
df3 = df2.copy()
df3.loc[0, "col1"] = 10
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df3)
assert_geodataframe_equal(df5, df4, check_like=True)
if HAS_PYPROJ:
df5["geom2"] = df5.geom2.set_crs(3857, allow_override=True)
with pytest.raises(AssertionError):
assert_geodataframe_equal(df5, df4, check_like=True)
def test_equal_nans():
s = GeoSeries([Point(0, 0), np.nan])
assert_geoseries_equal(s, s.copy())
assert_geoseries_equal(s, s.copy(), check_less_precise=True)
def test_no_crs():
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs=None)
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs={})
assert_geodataframe_equal(df1, df2)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not available")
def test_ignore_crs_mismatch():
df1 = GeoDataFrame({"col1": [1, 2], "geometry": s1.copy()}, crs="EPSG:4326")
df2 = GeoDataFrame({"col1": [1, 2], "geometry": s1}, crs="EPSG:31370")
with pytest.raises(AssertionError):
assert_geodataframe_equal(df1, df2)
# assert that with `check_crs=False` the assert passes, and also does not
# generate any warning from comparing both geometries with different crs
with warnings.catch_warnings(record=True) as record:
assert_geodataframe_equal(df1, df2, check_crs=False)
assert len(record) == 0
def test_almost_equal_but_not_equal():
s_origin = GeoSeries([Point(0, 0)])
s_almost_origin = GeoSeries([Point(0.0000001, 0)])
assert_geoseries_equal(s_origin, s_almost_origin, check_less_precise=True)
with pytest.raises(AssertionError):
assert_geoseries_equal(s_origin, s_almost_origin)
def test_geodataframe_no_active_geometry_column():
def create_dataframe():
gdf = GeoDataFrame({"value": [1, 2], "geometry": [Point(1, 1), Point(2, 2)]})
gdf["geom2"] = GeoSeries([Point(3, 3), Point(4, 4)])
return gdf
# no active geometry column (None)
df1 = create_dataframe()
df1._geometry_column_name = None
df2 = create_dataframe()
df2._geometry_column_name = None
assert_geodataframe_equal(df1, df2)
# active geometry column ("geometry") not present
df1 = create_dataframe()[["value", "geom2"]]
df2 = create_dataframe()[["value", "geom2"]]
assert_geodataframe_equal(df1, df2)
df1 = GeoDataFrame(create_dataframe()[["value"]])
df2 = GeoDataFrame(create_dataframe()[["value"]])
assert_geodataframe_equal(df1, df2)
def test_geodataframe_multiindex():
def create_dataframe():
gdf = DataFrame([[Point(0, 0), Point(1, 1)], [Point(2, 2), Point(3, 3)]])
gdf = GeoDataFrame(gdf.astype("geometry"))
gdf.columns = pd.MultiIndex.from_product([["geometry"], [0, 1]])
return gdf
df1 = create_dataframe()
df2 = create_dataframe()
assert_geodataframe_equal(df1, df2)
df1 = create_dataframe()
df1._geometry_column_name = None
df2 = create_dataframe()
df2._geometry_column_name = None
assert_geodataframe_equal(df1, df2)

View File

@@ -1,85 +0,0 @@
from pandas import DataFrame, Series
from shapely.geometry import Point
from geopandas import GeoDataFrame, GeoSeries
class TestSeries:
def setup_method(self):
N = self.N = 10
r = 0.5
self.pts = GeoSeries([Point(x, y) for x, y in zip(range(N), range(N))])
self.polys = self.pts.buffer(r)
def test_slice(self):
assert type(self.pts[:2]) is GeoSeries
assert type(self.pts[::2]) is GeoSeries
assert type(self.polys[:2]) is GeoSeries
def test_head(self):
assert type(self.pts.head()) is GeoSeries
def test_tail(self):
assert type(self.pts.tail()) is GeoSeries
def test_sort_index(self):
assert type(self.pts.sort_index()) is GeoSeries
def test_loc(self):
assert type(self.pts.loc[5:]) is GeoSeries
def test_iloc(self):
assert type(self.pts.iloc[5:]) is GeoSeries
def test_fancy(self):
idx = (self.pts.index.to_series() % 2).astype(bool)
assert type(self.pts[idx]) is GeoSeries
def test_take(self):
assert type(self.pts.take(list(range(0, self.N, 2)))) is GeoSeries
def test_groupby(self):
for f, s in self.pts.groupby(lambda x: x % 2):
assert type(s) is GeoSeries
class TestDataFrame:
def setup_method(self):
N = 10
self.df = GeoDataFrame(
[
{"geometry": Point(x, y), "value1": x + y, "value2": x * y}
for x, y in zip(range(N), range(N))
]
)
def test_geometry(self):
assert type(self.df.geometry) is GeoSeries
# still GeoSeries if different name
df2 = GeoDataFrame(
{
"coords": [Point(x, y) for x, y in zip(range(5), range(5))],
"nums": range(5),
},
geometry="coords",
)
assert type(df2.geometry) is GeoSeries
assert type(df2["coords"]) is GeoSeries
def test_nongeometry(self):
assert type(self.df["value1"]) is Series
def test_geometry_multiple(self):
assert type(self.df[["geometry", "value1"]]) is GeoDataFrame
def test_nongeometry_multiple(self):
assert type(self.df[["value1", "value2"]]) is DataFrame
def test_slice(self):
assert type(self.df[:2]) is GeoDataFrame
assert type(self.df[::2]) is GeoDataFrame
def test_fancy(self):
idx = (self.df.index.to_series() % 2).astype(bool)
assert type(self.df[idx]) is GeoDataFrame

View File

@@ -1,151 +0,0 @@
import os.path
from pandas import Series
from geopandas import GeoDataFrame
from geopandas.testing import ( # noqa: F401
assert_geoseries_equal,
geom_almost_equals,
geom_equals,
)
HERE = os.path.abspath(os.path.dirname(__file__))
PACKAGE_DIR = os.path.dirname(os.path.dirname(HERE))
_TEST_DATA_DIR = os.path.join(PACKAGE_DIR, "geopandas", "tests", "data")
_NYBB = "zip://" + os.path.join(_TEST_DATA_DIR, "nybb_16a.zip")
_NATURALEARTH_CITIES = os.path.join(
_TEST_DATA_DIR, "naturalearth_cities", "naturalearth_cities.shp"
)
_NATURALEARTH_LOWRES = os.path.join(
_TEST_DATA_DIR, "naturalearth_lowres", "naturalearth_lowres.shp"
)
# mock not used here, but the import from here is used in other modules
try:
from unittest import mock
except ImportError:
import mock # noqa: F401
def validate_boro_df(df, case_sensitive=False):
"""Tests a GeoDataFrame that has been read in from the nybb dataset."""
assert isinstance(df, GeoDataFrame)
# Make sure all the columns are there and the geometries
# were properly loaded as MultiPolygons
assert len(df) == 5
columns = ("BoroCode", "BoroName", "Shape_Leng", "Shape_Area")
if case_sensitive:
for col in columns:
assert col in df.columns
else:
for col in columns:
assert col.lower() in (dfcol.lower() for dfcol in df.columns)
assert Series(df.geometry.geom_type).dropna().eq("MultiPolygon").all()
def get_srid(df):
"""Return srid from `df.crs`."""
if df.crs is not None:
return df.crs.to_epsg() or 0
return 0
def create_spatialite(con, df):
"""
Return a SpatiaLite connection containing the nybb table.
Parameters
----------
`con`: ``sqlite3.Connection``
`df`: ``GeoDataFrame``
"""
with con:
geom_col = df.geometry.name
srid = get_srid(df)
con.execute(
"CREATE TABLE IF NOT EXISTS nybb "
"( ogc_fid INTEGER PRIMARY KEY"
", borocode INTEGER"
", boroname TEXT"
", shape_leng REAL"
", shape_area REAL"
")"
)
con.execute(
"SELECT AddGeometryColumn(?, ?, ?, ?)",
("nybb", geom_col, srid, df.geom_type.dropna().iat[0].upper()),
)
con.execute("SELECT CreateSpatialIndex(?, ?)", ("nybb", geom_col))
sql_row = "INSERT INTO nybb VALUES(?, ?, ?, ?, ?, GeomFromText(?, ?))"
con.executemany(
sql_row,
(
(
None,
row.BoroCode,
row.BoroName,
row.Shape_Leng,
row.Shape_Area,
row.geometry.wkt if row.geometry else None,
srid,
)
for row in df.itertuples(index=False)
),
)
def create_postgis(con, df, srid=None, geom_col="geom"):
"""
Create a nybb table in the test_geopandas PostGIS database.
Returns a boolean indicating whether the database table was successfully
created
"""
# Try to create the database, skip the db tests if something goes
# wrong
# If you'd like these tests to run, create a database called
# 'test_geopandas' and enable postgis in it:
# > createdb test_geopandas
# > psql -c "CREATE EXTENSION postgis" -d test_geopandas
if srid is not None:
geom_schema = "geometry(MULTIPOLYGON, {})".format(srid)
geom_insert = "ST_SetSRID(ST_GeometryFromText(%s), {})".format(srid)
else:
geom_schema = "geometry"
geom_insert = "ST_GeometryFromText(%s)"
try:
cursor = con.cursor()
cursor.execute("DROP TABLE IF EXISTS nybb;")
sql = """CREATE TABLE nybb (
{geom_col} {geom_schema},
borocode integer,
boroname varchar(40),
shape_leng float,
shape_area float
);""".format(
geom_col=geom_col, geom_schema=geom_schema
)
cursor.execute(sql)
for i, row in df.iterrows():
sql = """INSERT INTO nybb VALUES ({}, %s, %s, %s, %s
);""".format(
geom_insert
)
cursor.execute(
sql,
(
row["geometry"].wkt,
row["BoroCode"],
row["BoroName"],
row["Shape_Leng"],
row["Shape_Area"],
),
)
finally:
cursor.close()
con.commit()