library packages
This commit is contained in:
29
.venv/lib/python3.12/site-packages/geopandas/__init__.py
Normal file
29
.venv/lib/python3.12/site-packages/geopandas/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from geopandas._config import options
|
||||
|
||||
from geopandas.geoseries import GeoSeries
|
||||
from geopandas.geodataframe import GeoDataFrame
|
||||
from geopandas.array import points_from_xy
|
||||
|
||||
from geopandas.io.file import _read_file as read_file
|
||||
from geopandas.io.file import _list_layers as list_layers
|
||||
from geopandas.io.arrow import _read_parquet as read_parquet
|
||||
from geopandas.io.arrow import _read_feather as read_feather
|
||||
from geopandas.io.sql import _read_postgis as read_postgis
|
||||
from geopandas.tools import sjoin, sjoin_nearest
|
||||
from geopandas.tools import overlay
|
||||
from geopandas.tools._show_versions import show_versions
|
||||
from geopandas.tools import clip
|
||||
|
||||
|
||||
import geopandas.datasets
|
||||
|
||||
|
||||
# make the interactive namespace easier to use
|
||||
# for `from geopandas import *` demos.
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from . import _version
|
||||
|
||||
__version__ = _version.get_versions()["version"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
92
.venv/lib/python3.12/site-packages/geopandas/_compat.py
Normal file
92
.venv/lib/python3.12/site-packages/geopandas/_compat.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import importlib
|
||||
from packaging.version import Version
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
import shapely.geos
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# pandas compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0")
|
||||
PANDAS_GE_15 = Version(pd.__version__) >= Version("1.5.0")
|
||||
PANDAS_GE_20 = Version(pd.__version__) >= Version("2.0.0")
|
||||
PANDAS_GE_202 = Version(pd.__version__) >= Version("2.0.2")
|
||||
PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0")
|
||||
PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0")
|
||||
PANDAS_GE_30 = Version(pd.__version__) >= Version("3.0.0.dev0")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Shapely / GEOS compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
SHAPELY_GE_204 = Version(shapely.__version__) >= Version("2.0.4")
|
||||
|
||||
GEOS_GE_390 = shapely.geos.geos_version >= (3, 9, 0)
|
||||
GEOS_GE_310 = shapely.geos.geos_version >= (3, 10, 0)
|
||||
|
||||
|
||||
def import_optional_dependency(name: str, extra: str = ""):
|
||||
"""
|
||||
Import an optional dependency.
|
||||
|
||||
Adapted from pandas.compat._optional::import_optional_dependency
|
||||
|
||||
Raises a formatted ImportError if the module is not present.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
The module name.
|
||||
extra : str
|
||||
Additional text to include in the ImportError message.
|
||||
Returns
|
||||
-------
|
||||
module
|
||||
"""
|
||||
msg = """Missing optional dependency '{name}'. {extra} "
|
||||
"Use pip or conda to install {name}.""".format(
|
||||
name=name, extra=extra
|
||||
)
|
||||
|
||||
if not isinstance(name, str):
|
||||
raise ValueError(
|
||||
"Invalid module name: '{name}'; must be a string".format(name=name)
|
||||
)
|
||||
|
||||
try:
|
||||
module = importlib.import_module(name)
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(msg) from None
|
||||
|
||||
return module
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# pyproj compat
|
||||
# -----------------------------------------------------------------------------
|
||||
try:
|
||||
import pyproj # noqa: F401
|
||||
|
||||
HAS_PYPROJ = True
|
||||
|
||||
except ImportError as err:
|
||||
HAS_PYPROJ = False
|
||||
pyproj_import_error = str(err)
|
||||
|
||||
|
||||
def requires_pyproj(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
if not HAS_PYPROJ:
|
||||
raise ImportError(
|
||||
f"The 'pyproj' package is required for {func.__name__} to work. "
|
||||
"Install it and initialize the object with a CRS before using it."
|
||||
f"\nImporting pyproj resulted in: {pyproj_import_error}"
|
||||
)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
133
.venv/lib/python3.12/site-packages/geopandas/_config.py
Normal file
133
.venv/lib/python3.12/site-packages/geopandas/_config.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Lightweight options machinery.
|
||||
|
||||
Based on https://github.com/topper-123/optioneer, but simplified (don't deal
|
||||
with nested options, deprecated options, ..), just the attribute-style dict
|
||||
like holding the options and giving a nice repr.
|
||||
"""
|
||||
|
||||
import textwrap
|
||||
import warnings
|
||||
from collections import namedtuple
|
||||
|
||||
Option = namedtuple("Option", "key default_value doc validator callback")
|
||||
|
||||
|
||||
class Options(object):
|
||||
"""Provide attribute-style access to configuration dict."""
|
||||
|
||||
def __init__(self, options):
|
||||
super().__setattr__("_options", options)
|
||||
# populate with default values
|
||||
config = {}
|
||||
for key, option in options.items():
|
||||
config[key] = option.default_value
|
||||
|
||||
super().__setattr__("_config", config)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
# you can't set new keys
|
||||
if key in self._config:
|
||||
option = self._options[key]
|
||||
if option.validator:
|
||||
option.validator(value)
|
||||
self._config[key] = value
|
||||
if option.callback:
|
||||
option.callback(key, value)
|
||||
else:
|
||||
msg = "You can only set the value of existing options"
|
||||
raise AttributeError(msg)
|
||||
|
||||
def __getattr__(self, key):
|
||||
try:
|
||||
return self._config[key]
|
||||
except KeyError:
|
||||
raise AttributeError("No such option")
|
||||
|
||||
def __dir__(self):
|
||||
return list(self._config.keys())
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__.__name__
|
||||
description = ""
|
||||
for key, option in self._options.items():
|
||||
descr = "{key}: {cur!r} [default: {default!r}]\n".format(
|
||||
key=key, cur=self._config[key], default=option.default_value
|
||||
)
|
||||
description += descr
|
||||
|
||||
if option.doc:
|
||||
doc_text = "\n".join(textwrap.wrap(option.doc, width=70))
|
||||
else:
|
||||
doc_text = "No description available."
|
||||
doc_text = textwrap.indent(doc_text, prefix=" ")
|
||||
description += doc_text + "\n"
|
||||
space = "\n "
|
||||
description = description.replace("\n", space)
|
||||
return "{}({}{})".format(cls, space, description)
|
||||
|
||||
|
||||
def _validate_display_precision(value):
|
||||
if value is not None:
|
||||
if not isinstance(value, int) or not (0 <= value <= 16):
|
||||
raise ValueError("Invalid value, needs to be an integer [0-16]")
|
||||
|
||||
|
||||
display_precision = Option(
|
||||
key="display_precision",
|
||||
default_value=None,
|
||||
doc=(
|
||||
"The precision (maximum number of decimals) of the coordinates in "
|
||||
"the WKT representation in the Series/DataFrame display. "
|
||||
"By default (None), it tries to infer and use 3 decimals for projected "
|
||||
"coordinates and 5 decimals for geographic coordinates."
|
||||
),
|
||||
validator=_validate_display_precision,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
|
||||
def _warn_use_pygeos_deprecated(_value):
|
||||
warnings.warn(
|
||||
"pygeos support was removed in 1.0. "
|
||||
"geopandas.use_pygeos is a no-op and will be removed in geopandas 1.1.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
|
||||
def _validate_io_engine(value):
|
||||
if value is not None:
|
||||
if value not in ("pyogrio", "fiona"):
|
||||
raise ValueError(f"Expected 'pyogrio' or 'fiona', got '{value}'")
|
||||
|
||||
|
||||
io_engine = Option(
|
||||
key="io_engine",
|
||||
default_value=None,
|
||||
doc=(
|
||||
"The default engine for ``read_file`` and ``to_file``. "
|
||||
"Options are 'pyogrio' and 'fiona'."
|
||||
),
|
||||
validator=_validate_io_engine,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
# TODO: deprecate this
|
||||
use_pygeos = Option(
|
||||
key="use_pygeos",
|
||||
default_value=False,
|
||||
doc=(
|
||||
"Deprecated option previously used to enable PyGEOS. "
|
||||
"It will be removed in GeoPandas 1.1."
|
||||
),
|
||||
validator=_warn_use_pygeos_deprecated,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
options = Options(
|
||||
{
|
||||
"display_precision": display_precision,
|
||||
"use_pygeos": use_pygeos,
|
||||
"io_engine": io_engine,
|
||||
}
|
||||
)
|
||||
52
.venv/lib/python3.12/site-packages/geopandas/_decorator.py
Normal file
52
.venv/lib/python3.12/site-packages/geopandas/_decorator.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from textwrap import dedent
|
||||
from typing import Callable, Union
|
||||
|
||||
# doc decorator function ported with modifications from Pandas
|
||||
# https://github.com/pandas-dev/pandas/blob/master/pandas/util/_decorators.py
|
||||
|
||||
|
||||
def doc(*docstrings: Union[str, Callable], **params) -> Callable:
|
||||
"""
|
||||
A decorator take docstring templates, concatenate them and perform string
|
||||
substitution on it.
|
||||
This decorator will add a variable "_docstring_components" to the wrapped
|
||||
callable to keep track the original docstring template for potential usage.
|
||||
If it should be consider as a template, it will be saved as a string.
|
||||
Otherwise, it will be saved as callable, and later user __doc__ and dedent
|
||||
to get docstring.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*docstrings : str or callable
|
||||
The string / docstring / docstring template to be appended in order
|
||||
after default docstring under callable.
|
||||
**params
|
||||
The string which would be used to format docstring template.
|
||||
"""
|
||||
|
||||
def decorator(decorated: Callable) -> Callable:
|
||||
# collecting docstring and docstring templates
|
||||
docstring_components: list[Union[str, Callable]] = []
|
||||
if decorated.__doc__:
|
||||
docstring_components.append(dedent(decorated.__doc__))
|
||||
|
||||
for docstring in docstrings:
|
||||
if hasattr(docstring, "_docstring_components"):
|
||||
docstring_components.extend(docstring._docstring_components)
|
||||
elif isinstance(docstring, str) or docstring.__doc__:
|
||||
docstring_components.append(docstring)
|
||||
|
||||
# formatting templates and concatenating docstring
|
||||
decorated.__doc__ = "".join(
|
||||
(
|
||||
component.format(**params)
|
||||
if isinstance(component, str)
|
||||
else dedent(component.__doc__ or "")
|
||||
)
|
||||
for component in docstring_components
|
||||
)
|
||||
|
||||
decorated._docstring_components = docstring_components
|
||||
return decorated
|
||||
|
||||
return decorator
|
||||
21
.venv/lib/python3.12/site-packages/geopandas/_version.py
Normal file
21
.venv/lib/python3.12/site-packages/geopandas/_version.py
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
# This file was generated by 'versioneer.py' (0.29) from
|
||||
# revision-control system data, or from the parent directory name of an
|
||||
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
||||
# of this file.
|
||||
|
||||
import json
|
||||
|
||||
version_json = '''
|
||||
{
|
||||
"date": "2024-07-02T14:23:16+0200",
|
||||
"dirty": false,
|
||||
"error": null,
|
||||
"full-revisionid": "747d66ee6fcf00b819c08f11ecded53736c4652b",
|
||||
"version": "1.0.1"
|
||||
}
|
||||
''' # END VERSION_JSON
|
||||
|
||||
|
||||
def get_versions():
|
||||
return json.loads(version_json)
|
||||
1760
.venv/lib/python3.12/site-packages/geopandas/array.py
Normal file
1760
.venv/lib/python3.12/site-packages/geopandas/array.py
Normal file
File diff suppressed because it is too large
Load Diff
6202
.venv/lib/python3.12/site-packages/geopandas/base.py
Normal file
6202
.venv/lib/python3.12/site-packages/geopandas/base.py
Normal file
File diff suppressed because it is too large
Load Diff
47
.venv/lib/python3.12/site-packages/geopandas/conftest.py
Normal file
47
.venv/lib/python3.12/site-packages/geopandas/conftest.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os.path
|
||||
|
||||
import geopandas
|
||||
|
||||
import pytest
|
||||
from geopandas.tests.util import _NATURALEARTH_CITIES, _NATURALEARTH_LOWRES, _NYBB
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def add_geopandas(doctest_namespace):
|
||||
doctest_namespace["geopandas"] = geopandas
|
||||
|
||||
|
||||
# Datasets used in our tests
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def naturalearth_lowres() -> str:
|
||||
# skip if data missing, unless on github actions
|
||||
if os.path.isfile(_NATURALEARTH_LOWRES) or os.getenv("GITHUB_ACTIONS"):
|
||||
return _NATURALEARTH_LOWRES
|
||||
else:
|
||||
pytest.skip("Naturalearth lowres dataset not found")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def naturalearth_cities() -> str:
|
||||
# skip if data missing, unless on github actions
|
||||
if os.path.isfile(_NATURALEARTH_CITIES) or os.getenv("GITHUB_ACTIONS"):
|
||||
return _NATURALEARTH_CITIES
|
||||
else:
|
||||
pytest.skip("Naturalearth cities dataset not found")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def nybb_filename() -> str:
|
||||
# skip if data missing, unless on github actions
|
||||
if os.path.isfile(_NYBB[len("zip://") :]) or os.getenv("GITHUB_ACTIONS"):
|
||||
return _NYBB
|
||||
else:
|
||||
pytest.skip("NYBB dataset not found")
|
||||
|
||||
|
||||
@pytest.fixture(scope="class")
|
||||
def _setup_class_nybb_filename(nybb_filename, request):
|
||||
"""Attach nybb_filename class attribute for unittest style setup_method"""
|
||||
request.cls.nybb_filename = nybb_filename
|
||||
@@ -0,0 +1,25 @@
|
||||
__all__ = []
|
||||
available = [] # previously part of __all__
|
||||
_prev_available = ["naturalearth_cities", "naturalearth_lowres", "nybb"]
|
||||
|
||||
|
||||
def get_path(dataset):
|
||||
ne_message = "https://www.naturalearthdata.com/downloads/110m-cultural-vectors/."
|
||||
nybb_message = (
|
||||
"the geodatasets package.\n\nfrom geodatasets import get_path\n"
|
||||
"path_to_file = get_path('nybb')\n"
|
||||
)
|
||||
error_msg = (
|
||||
"The geopandas.dataset has been deprecated and was removed in GeoPandas "
|
||||
f"1.0. You can get the original '{dataset}' data from "
|
||||
f"{ne_message if 'natural' in dataset else nybb_message}"
|
||||
)
|
||||
if dataset in _prev_available:
|
||||
raise AttributeError(error_msg)
|
||||
else:
|
||||
error_msg = (
|
||||
"The geopandas.dataset has been deprecated and "
|
||||
"was removed in GeoPandas 1.0. New sample datasets are now available "
|
||||
"in the geodatasets package (https://geodatasets.readthedocs.io/en/latest/)"
|
||||
)
|
||||
raise AttributeError(error_msg)
|
||||
Binary file not shown.
1038
.venv/lib/python3.12/site-packages/geopandas/explore.py
Normal file
1038
.venv/lib/python3.12/site-packages/geopandas/explore.py
Normal file
File diff suppressed because it is too large
Load Diff
2690
.venv/lib/python3.12/site-packages/geopandas/geodataframe.py
Normal file
2690
.venv/lib/python3.12/site-packages/geopandas/geodataframe.py
Normal file
File diff suppressed because it is too large
Load Diff
1520
.venv/lib/python3.12/site-packages/geopandas/geoseries.py
Normal file
1520
.venv/lib/python3.12/site-packages/geopandas/geoseries.py
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
614
.venv/lib/python3.12/site-packages/geopandas/io/_geoarrow.py
Normal file
614
.venv/lib/python3.12/site-packages/geopandas/io/_geoarrow.py
Normal file
@@ -0,0 +1,614 @@
|
||||
import json
|
||||
from packaging.version import Version
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from numpy.typing import NDArray
|
||||
|
||||
import shapely
|
||||
from shapely import GeometryType
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas._compat import SHAPELY_GE_204
|
||||
from geopandas.array import from_shapely, from_wkb
|
||||
|
||||
GEOARROW_ENCODINGS = [
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
]
|
||||
|
||||
|
||||
## GeoPandas -> GeoArrow
|
||||
|
||||
|
||||
class ArrowTable:
|
||||
"""
|
||||
Wrapper class for Arrow data.
|
||||
|
||||
This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
|
||||
``__arrow_c_stream__`` method). This object can then be consumed by
|
||||
your Arrow implementation of choice that supports this protocol.
|
||||
|
||||
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> import pyarrow as pa
|
||||
>>> pa.table(gdf.to_arrow()) # doctest: +SKIP
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pa_table):
|
||||
self._pa_table = pa_table
|
||||
|
||||
def __arrow_c_stream__(self, requested_schema=None):
|
||||
return self._pa_table.__arrow_c_stream__(requested_schema=requested_schema)
|
||||
|
||||
|
||||
class GeoArrowArray:
|
||||
"""
|
||||
Wrapper class for a geometry array as Arrow data.
|
||||
|
||||
This class implements the `Arrow PyCapsule Protocol`_ (i.e. having an
|
||||
``__arrow_c_array/stream__`` method). This object can then be consumed by
|
||||
your Arrow implementation of choice that supports this protocol.
|
||||
|
||||
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> import pyarrow as pa
|
||||
>>> pa.array(ser.to_arrow()) # doctest: +SKIP
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pa_field, pa_array):
|
||||
self._pa_array = pa_array
|
||||
self._pa_field = pa_field
|
||||
|
||||
def __arrow_c_array__(self, requested_schema=None):
|
||||
if requested_schema is not None:
|
||||
raise NotImplementedError(
|
||||
"Requested schema is not supported for geometry arrays"
|
||||
)
|
||||
return (
|
||||
self._pa_field.__arrow_c_schema__(),
|
||||
self._pa_array.__arrow_c_array__()[1],
|
||||
)
|
||||
|
||||
|
||||
def geopandas_to_arrow(
|
||||
df,
|
||||
index=None,
|
||||
geometry_encoding="WKB",
|
||||
interleaved=True,
|
||||
include_z=None,
|
||||
):
|
||||
"""
|
||||
Convert GeoDataFrame to a pyarrow.Table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
The GeoDataFrame to convert.
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
|
||||
The GeoArrow encoding to use for the data conversion.
|
||||
interleaved : bool, default True
|
||||
Only relevant for 'geoarrow' encoding. If True, the geometries'
|
||||
coordinates are interleaved in a single fixed size list array.
|
||||
If False, the coordinates are stored as separate arrays in a
|
||||
struct type.
|
||||
include_z : bool, default None
|
||||
Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
|
||||
of the individial geometries is preserved).
|
||||
If False, return 2D geometries. If True, include the third dimension
|
||||
in the output (if a geometry has no third dimension, the z-coordinates
|
||||
will be NaN). By default, will infer the dimensionality from the
|
||||
input geometries. Note that this inference can be unreliable with
|
||||
empty geometries (for a guaranteed result, it is recommended to
|
||||
specify the keyword).
|
||||
|
||||
"""
|
||||
mask = df.dtypes == "geometry"
|
||||
geometry_columns = df.columns[mask]
|
||||
geometry_indices = np.asarray(mask).nonzero()[0]
|
||||
|
||||
df_attr = pd.DataFrame(df.copy(deep=False))
|
||||
|
||||
# replace geometry columns with dummy values -> will get converted to
|
||||
# Arrow null column (not holding any memory), so we can afterwards
|
||||
# fill the resulting table with the correct geometry fields
|
||||
for col in geometry_columns:
|
||||
df_attr[col] = None
|
||||
|
||||
table = pa.Table.from_pandas(df_attr, preserve_index=index)
|
||||
|
||||
geometry_encoding_dict = {}
|
||||
|
||||
if geometry_encoding.lower() == "geoarrow":
|
||||
if Version(pa.__version__) < Version("10.0.0"):
|
||||
raise ValueError("Converting to 'geoarrow' requires pyarrow >= 10.0.")
|
||||
|
||||
# Encode all geometry columns to GeoArrow
|
||||
for i, col in zip(geometry_indices, geometry_columns):
|
||||
field, geom_arr = construct_geometry_array(
|
||||
np.array(df[col].array),
|
||||
include_z=include_z,
|
||||
field_name=col,
|
||||
crs=df[col].crs,
|
||||
interleaved=interleaved,
|
||||
)
|
||||
table = table.set_column(i, field, geom_arr)
|
||||
geometry_encoding_dict[col] = (
|
||||
field.metadata[b"ARROW:extension:name"]
|
||||
.decode()
|
||||
.removeprefix("geoarrow.")
|
||||
)
|
||||
|
||||
elif geometry_encoding.lower() == "wkb":
|
||||
# Encode all geometry columns to WKB
|
||||
for i, col in zip(geometry_indices, geometry_columns):
|
||||
field, wkb_arr = construct_wkb_array(
|
||||
np.asarray(df[col].array), field_name=col, crs=df[col].crs
|
||||
)
|
||||
table = table.set_column(i, field, wkb_arr)
|
||||
geometry_encoding_dict[col] = "WKB"
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Expected geometry encoding 'WKB' or 'geoarrow' got {geometry_encoding}"
|
||||
)
|
||||
return table, geometry_encoding_dict
|
||||
|
||||
|
||||
def construct_wkb_array(
|
||||
shapely_arr: NDArray[np.object_],
|
||||
*,
|
||||
field_name: str = "geometry",
|
||||
crs: Optional[str] = None,
|
||||
) -> Tuple[pa.Field, pa.Array]:
|
||||
|
||||
if shapely.geos_version > (3, 10, 0):
|
||||
kwargs = {"flavor": "iso"}
|
||||
else:
|
||||
if shapely.has_z(shapely_arr).any():
|
||||
raise ValueError("Cannot write 3D geometries with GEOS<3.10")
|
||||
kwargs = {}
|
||||
|
||||
wkb_arr = shapely.to_wkb(shapely_arr, **kwargs)
|
||||
extension_metadata = {"ARROW:extension:name": "geoarrow.wkb"}
|
||||
if crs is not None:
|
||||
extension_metadata["ARROW:extension:metadata"] = json.dumps(
|
||||
{"crs": crs.to_json()}
|
||||
)
|
||||
else:
|
||||
# In theory this should not be needed, but otherwise pyarrow < 17
|
||||
# crashes on receiving such data through C Data Interface
|
||||
# https://github.com/apache/arrow/issues/41741
|
||||
extension_metadata["ARROW:extension:metadata"] = "{}"
|
||||
|
||||
field = pa.field(
|
||||
field_name, type=pa.binary(), nullable=True, metadata=extension_metadata
|
||||
)
|
||||
parr = pa.array(np.asarray(wkb_arr), pa.binary())
|
||||
return field, parr
|
||||
|
||||
|
||||
def _convert_inner_coords(coords, interleaved, dims, mask=None):
|
||||
if interleaved:
|
||||
coords_field = pa.field(dims, pa.float64(), nullable=False)
|
||||
typ = pa.list_(coords_field, len(dims))
|
||||
if mask is None:
|
||||
# mask keyword only added in pyarrow 15.0.0
|
||||
parr = pa.FixedSizeListArray.from_arrays(coords.ravel(), type=typ)
|
||||
else:
|
||||
parr = pa.FixedSizeListArray.from_arrays(
|
||||
coords.ravel(), type=typ, mask=mask
|
||||
)
|
||||
else:
|
||||
if dims == "xy":
|
||||
fields = [
|
||||
pa.field("x", pa.float64(), nullable=False),
|
||||
pa.field("y", pa.float64(), nullable=False),
|
||||
]
|
||||
parr = pa.StructArray.from_arrays(
|
||||
[coords[:, 0].copy(), coords[:, 1].copy()], fields=fields, mask=mask
|
||||
)
|
||||
else:
|
||||
fields = [
|
||||
pa.field("x", pa.float64(), nullable=False),
|
||||
pa.field("y", pa.float64(), nullable=False),
|
||||
pa.field("z", pa.float64(), nullable=False),
|
||||
]
|
||||
parr = pa.StructArray.from_arrays(
|
||||
[coords[:, 0].copy(), coords[:, 1].copy(), coords[:, 2].copy()],
|
||||
fields=fields,
|
||||
mask=mask,
|
||||
)
|
||||
return parr
|
||||
|
||||
|
||||
def _linestring_type(point_type):
|
||||
return pa.list_(pa.field("vertices", point_type, nullable=False))
|
||||
|
||||
|
||||
def _polygon_type(point_type):
|
||||
return pa.list_(
|
||||
pa.field(
|
||||
"rings",
|
||||
pa.list_(pa.field("vertices", point_type, nullable=False)),
|
||||
nullable=False,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _multipoint_type(point_type):
|
||||
return pa.list_(pa.field("points", point_type, nullable=False))
|
||||
|
||||
|
||||
def _multilinestring_type(point_type):
|
||||
return pa.list_(
|
||||
pa.field("linestrings", _linestring_type(point_type), nullable=False)
|
||||
)
|
||||
|
||||
|
||||
def _multipolygon_type(point_type):
|
||||
return pa.list_(pa.field("polygons", _polygon_type(point_type), nullable=False))
|
||||
|
||||
|
||||
def construct_geometry_array(
|
||||
shapely_arr: NDArray[np.object_],
|
||||
include_z: Optional[bool] = None,
|
||||
*,
|
||||
field_name: str = "geometry",
|
||||
crs: Optional[str] = None,
|
||||
interleaved: bool = True,
|
||||
) -> Tuple[pa.Field, pa.Array]:
|
||||
# NOTE: this implementation returns a (field, array) pair so that it can set the
|
||||
# extension metadata on the field without instantiating extension types into the
|
||||
# global pyarrow registry
|
||||
geom_type, coords, offsets = shapely.to_ragged_array(
|
||||
shapely_arr, include_z=include_z
|
||||
)
|
||||
|
||||
mask = shapely.is_missing(shapely_arr)
|
||||
if mask.any():
|
||||
if (
|
||||
geom_type == GeometryType.POINT
|
||||
and interleaved
|
||||
and Version(pa.__version__) < Version("15.0.0")
|
||||
):
|
||||
raise ValueError(
|
||||
"Converting point geometries with missing values is not supported "
|
||||
"for interleaved coordinates with pyarrow < 15.0.0. Please "
|
||||
"upgrade to a newer version of pyarrow."
|
||||
)
|
||||
mask = pa.array(mask, type=pa.bool_())
|
||||
|
||||
if geom_type == GeometryType.POINT and not SHAPELY_GE_204:
|
||||
# bug in shapely < 2.0.4, see https://github.com/shapely/shapely/pull/2034
|
||||
# this workaround only works if there are no empty points
|
||||
indices = np.nonzero(mask)[0]
|
||||
indices = indices - np.arange(len(indices))
|
||||
coords = np.insert(coords, indices, np.nan, axis=0)
|
||||
|
||||
else:
|
||||
mask = None
|
||||
|
||||
if coords.shape[-1] == 2:
|
||||
dims = "xy"
|
||||
elif coords.shape[-1] == 3:
|
||||
dims = "xyz"
|
||||
else:
|
||||
raise ValueError(f"Unexpected coords dimensions: {coords.shape}")
|
||||
|
||||
extension_metadata: Dict[str, str] = {}
|
||||
if crs is not None:
|
||||
extension_metadata["ARROW:extension:metadata"] = json.dumps(
|
||||
{"crs": crs.to_json()}
|
||||
)
|
||||
else:
|
||||
# In theory this should not be needed, but otherwise pyarrow < 17
|
||||
# crashes on receiving such data through C Data Interface
|
||||
# https://github.com/apache/arrow/issues/41741
|
||||
extension_metadata["ARROW:extension:metadata"] = "{}"
|
||||
|
||||
if geom_type == GeometryType.POINT:
|
||||
parr = _convert_inner_coords(coords, interleaved, dims, mask=mask)
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.point"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.LINESTRING:
|
||||
assert len(offsets) == 1, "Expected one offsets array"
|
||||
(geom_offsets,) = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
parr = pa.ListArray.from_arrays(
|
||||
pa.array(geom_offsets), _parr, _linestring_type(_parr.type), mask=mask
|
||||
)
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.linestring"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.POLYGON:
|
||||
assert len(offsets) == 2, "Expected two offsets arrays"
|
||||
ring_offsets, geom_offsets = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
|
||||
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
|
||||
parr = parr.cast(_polygon_type(_parr.type))
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.polygon"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.MULTIPOINT:
|
||||
assert len(offsets) == 1, "Expected one offsets array"
|
||||
(geom_offsets,) = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
parr = pa.ListArray.from_arrays(
|
||||
pa.array(geom_offsets), _parr, type=_multipoint_type(_parr.type), mask=mask
|
||||
)
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.multipoint"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.MULTILINESTRING:
|
||||
assert len(offsets) == 2, "Expected two offsets arrays"
|
||||
ring_offsets, geom_offsets = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
|
||||
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1, mask=mask)
|
||||
parr = parr.cast(_multilinestring_type(_parr.type))
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.multilinestring"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
elif geom_type == GeometryType.MULTIPOLYGON:
|
||||
assert len(offsets) == 3, "Expected three offsets arrays"
|
||||
ring_offsets, polygon_offsets, geom_offsets = offsets
|
||||
_parr = _convert_inner_coords(coords, interleaved, dims)
|
||||
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
|
||||
_parr2 = pa.ListArray.from_arrays(pa.array(polygon_offsets), _parr1)
|
||||
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr2, mask=mask)
|
||||
parr = parr.cast(_multipolygon_type(_parr.type))
|
||||
extension_metadata["ARROW:extension:name"] = "geoarrow.multipolygon"
|
||||
field = pa.field(
|
||||
field_name,
|
||||
parr.type,
|
||||
nullable=True,
|
||||
metadata=extension_metadata,
|
||||
)
|
||||
return field, parr
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported type for geoarrow: {geom_type}")
|
||||
|
||||
|
||||
## GeoArrow -> GeoPandas
|
||||
|
||||
|
||||
def _get_arrow_geometry_field(field):
|
||||
if (meta := field.metadata) is not None:
|
||||
if (ext_name := meta.get(b"ARROW:extension:name", None)) is not None:
|
||||
if ext_name.startswith(b"geoarrow."):
|
||||
if (
|
||||
ext_meta := meta.get(b"ARROW:extension:metadata", None)
|
||||
) is not None:
|
||||
ext_meta = json.loads(ext_meta.decode())
|
||||
return ext_name.decode(), ext_meta
|
||||
|
||||
if isinstance(field.type, pa.ExtensionType):
|
||||
ext_name = field.type.extension_name
|
||||
if ext_name.startswith("geoarrow."):
|
||||
ext_meta_ser = field.type.__arrow_ext_serialize__()
|
||||
if ext_meta_ser:
|
||||
ext_meta = json.loads(ext_meta_ser.decode())
|
||||
else:
|
||||
ext_meta = None
|
||||
return ext_name, ext_meta
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def arrow_to_geopandas(table, geometry=None):
|
||||
"""
|
||||
Convert Arrow table object to a GeoDataFrame based on GeoArrow extension types.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
table : pyarrow.Table
|
||||
The Arrow table to convert.
|
||||
geometry : str, default None
|
||||
The name of the geometry column to set as the active geometry
|
||||
column. If None, the first geometry column found will be used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
"""
|
||||
if not isinstance(table, pa.Table):
|
||||
table = pa.table(table)
|
||||
|
||||
geom_fields = []
|
||||
|
||||
for i, field in enumerate(table.schema):
|
||||
geom = _get_arrow_geometry_field(field)
|
||||
if geom is not None:
|
||||
geom_fields.append((i, field.name, *geom))
|
||||
|
||||
if len(geom_fields) == 0:
|
||||
raise ValueError("No geometry column found in the Arrow table.")
|
||||
|
||||
table_attr = table.drop([f[1] for f in geom_fields])
|
||||
df = table_attr.to_pandas()
|
||||
|
||||
for i, col, ext_name, ext_meta in geom_fields:
|
||||
crs = None
|
||||
if ext_meta is not None and "crs" in ext_meta:
|
||||
crs = ext_meta["crs"]
|
||||
|
||||
if ext_name == "geoarrow.wkb":
|
||||
geom_arr = from_wkb(np.array(table[col]), crs=crs)
|
||||
elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
|
||||
|
||||
geom_arr = from_shapely(
|
||||
construct_shapely_array(table[col].combine_chunks(), ext_name), crs=crs
|
||||
)
|
||||
else:
|
||||
raise TypeError(f"Unknown GeoArrow extension type: {ext_name}")
|
||||
|
||||
df.insert(i, col, geom_arr)
|
||||
|
||||
return GeoDataFrame(df, geometry=geometry or geom_fields[0][1])
|
||||
|
||||
|
||||
def arrow_to_geometry_array(arr):
|
||||
"""
|
||||
Convert Arrow array object (representing single GeoArrow array) to a
|
||||
geopandas GeometryArray.
|
||||
|
||||
Specifically for GeoSeries.from_arrow.
|
||||
"""
|
||||
if Version(pa.__version__) < Version("14.0.0"):
|
||||
raise ValueError("Importing from Arrow requires pyarrow >= 14.0.")
|
||||
|
||||
schema_capsule, array_capsule = arr.__arrow_c_array__()
|
||||
field = pa.Field._import_from_c_capsule(schema_capsule)
|
||||
pa_arr = pa.Array._import_from_c_capsule(field.__arrow_c_schema__(), array_capsule)
|
||||
|
||||
geom_info = _get_arrow_geometry_field(field)
|
||||
if geom_info is None:
|
||||
raise ValueError("No GeoArrow geometry field found.")
|
||||
ext_name, ext_meta = geom_info
|
||||
|
||||
crs = None
|
||||
if ext_meta is not None and "crs" in ext_meta:
|
||||
crs = ext_meta["crs"]
|
||||
|
||||
if ext_name == "geoarrow.wkb":
|
||||
geom_arr = from_wkb(np.array(pa_arr), crs=crs)
|
||||
elif ext_name.split(".")[1] in GEOARROW_ENCODINGS:
|
||||
|
||||
geom_arr = from_shapely(construct_shapely_array(pa_arr, ext_name), crs=crs)
|
||||
else:
|
||||
raise ValueError(f"Unknown GeoArrow extension type: {ext_name}")
|
||||
|
||||
return geom_arr
|
||||
|
||||
|
||||
def _get_inner_coords(arr):
|
||||
if pa.types.is_struct(arr.type):
|
||||
if arr.type.num_fields == 2:
|
||||
coords = np.column_stack(
|
||||
[np.asarray(arr.field("x")), np.asarray(arr.field("y"))]
|
||||
)
|
||||
else:
|
||||
coords = np.column_stack(
|
||||
[
|
||||
np.asarray(arr.field("x")),
|
||||
np.asarray(arr.field("y")),
|
||||
np.asarray(arr.field("z")),
|
||||
]
|
||||
)
|
||||
return coords
|
||||
else:
|
||||
# fixed size list
|
||||
return np.asarray(arr.values).reshape(len(arr), -1)
|
||||
|
||||
|
||||
def construct_shapely_array(arr: pa.Array, extension_name: str):
|
||||
"""
|
||||
Construct a NumPy array of shapely geometries from a pyarrow.Array
|
||||
with GeoArrow extension type.
|
||||
|
||||
"""
|
||||
if isinstance(arr, pa.ExtensionArray):
|
||||
arr = arr.storage
|
||||
|
||||
if extension_name == "geoarrow.point":
|
||||
coords = _get_inner_coords(arr)
|
||||
result = shapely.from_ragged_array(GeometryType.POINT, coords, None)
|
||||
|
||||
elif extension_name == "geoarrow.linestring":
|
||||
coords = _get_inner_coords(arr.values)
|
||||
offsets1 = np.asarray(arr.offsets)
|
||||
offsets = (offsets1,)
|
||||
result = shapely.from_ragged_array(GeometryType.LINESTRING, coords, offsets)
|
||||
|
||||
elif extension_name == "geoarrow.polygon":
|
||||
coords = _get_inner_coords(arr.values.values)
|
||||
offsets2 = np.asarray(arr.offsets)
|
||||
offsets1 = np.asarray(arr.values.offsets)
|
||||
offsets = (offsets1, offsets2)
|
||||
result = shapely.from_ragged_array(GeometryType.POLYGON, coords, offsets)
|
||||
|
||||
elif extension_name == "geoarrow.multipoint":
|
||||
coords = _get_inner_coords(arr.values)
|
||||
offsets1 = np.asarray(arr.offsets)
|
||||
offsets = (offsets1,)
|
||||
result = shapely.from_ragged_array(GeometryType.MULTIPOINT, coords, offsets)
|
||||
|
||||
elif extension_name == "geoarrow.multilinestring":
|
||||
coords = _get_inner_coords(arr.values.values)
|
||||
offsets2 = np.asarray(arr.offsets)
|
||||
offsets1 = np.asarray(arr.values.offsets)
|
||||
offsets = (offsets1, offsets2)
|
||||
result = shapely.from_ragged_array(
|
||||
GeometryType.MULTILINESTRING, coords, offsets
|
||||
)
|
||||
|
||||
elif extension_name == "geoarrow.multipolygon":
|
||||
coords = _get_inner_coords(arr.values.values.values)
|
||||
offsets3 = np.asarray(arr.offsets)
|
||||
offsets2 = np.asarray(arr.values.offsets)
|
||||
offsets1 = np.asarray(arr.values.values.offsets)
|
||||
offsets = (offsets1, offsets2, offsets3)
|
||||
result = shapely.from_ragged_array(GeometryType.MULTIPOLYGON, coords, offsets)
|
||||
|
||||
else:
|
||||
raise ValueError(extension_name)
|
||||
|
||||
# apply validity mask
|
||||
if arr.null_count:
|
||||
mask = np.asarray(arr.is_null())
|
||||
result = np.where(mask, None, result)
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,72 @@
|
||||
from packaging.version import Version
|
||||
|
||||
import pyarrow
|
||||
|
||||
_ERROR_MSG = """\
|
||||
Disallowed deserialization of 'arrow.py_extension_type':
|
||||
storage_type = {storage_type}
|
||||
serialized = {serialized}
|
||||
pickle disassembly:\n{pickle_disassembly}
|
||||
|
||||
Reading of untrusted Parquet or Feather files with a PyExtensionType column
|
||||
allows arbitrary code execution.
|
||||
If you trust this file, you can enable reading the extension type by one of:
|
||||
|
||||
- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)`
|
||||
- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running
|
||||
`import pyarrow_hotfix; pyarrow_hotfix.uninstall()`
|
||||
|
||||
We strongly recommend updating your Parquet/Feather files to use extension types
|
||||
derived from `pyarrow.ExtensionType` instead, and register this type explicitly.
|
||||
See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types
|
||||
for more details.
|
||||
"""
|
||||
|
||||
|
||||
def patch_pyarrow():
|
||||
# starting from pyarrow 14.0.1, it has its own mechanism
|
||||
if Version(pyarrow.__version__) >= Version("14.0.1"):
|
||||
return
|
||||
|
||||
# if the user has pyarrow_hotfix (https://github.com/pitrou/pyarrow-hotfix)
|
||||
# installed, use this instead (which also ensures it works if they had
|
||||
# called `pyarrow_hotfix.uninstall()`)
|
||||
try:
|
||||
import pyarrow_hotfix # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
# if the hotfix is already installed and enabled
|
||||
if getattr(pyarrow, "_hotfix_installed", False):
|
||||
return
|
||||
|
||||
class ForbiddenExtensionType(pyarrow.ExtensionType):
|
||||
def __arrow_ext_serialize__(self):
|
||||
return b""
|
||||
|
||||
@classmethod
|
||||
def __arrow_ext_deserialize__(cls, storage_type, serialized):
|
||||
import io
|
||||
import pickletools
|
||||
|
||||
out = io.StringIO()
|
||||
pickletools.dis(serialized, out)
|
||||
raise RuntimeError(
|
||||
_ERROR_MSG.format(
|
||||
storage_type=storage_type,
|
||||
serialized=serialized,
|
||||
pickle_disassembly=out.getvalue(),
|
||||
)
|
||||
)
|
||||
|
||||
pyarrow.unregister_extension_type("arrow.py_extension_type")
|
||||
pyarrow.register_extension_type(
|
||||
ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type")
|
||||
)
|
||||
|
||||
pyarrow._hotfix_installed = True
|
||||
|
||||
|
||||
patch_pyarrow()
|
||||
913
.venv/lib/python3.12/site-packages/geopandas/io/arrow.py
Normal file
913
.venv/lib/python3.12/site-packages/geopandas/io/arrow.py
Normal file
@@ -0,0 +1,913 @@
|
||||
import json
|
||||
import warnings
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
import shapely
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas._compat import import_optional_dependency
|
||||
from geopandas.array import from_shapely, from_wkb
|
||||
|
||||
from .file import _expand_user
|
||||
|
||||
METADATA_VERSION = "1.0.0"
|
||||
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
|
||||
GEOARROW_ENCODINGS = [
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
]
|
||||
SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
|
||||
|
||||
# reference: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
# Metadata structure:
|
||||
# {
|
||||
# "geo": {
|
||||
# "columns": {
|
||||
# "<name>": {
|
||||
# "encoding": "WKB"
|
||||
# "geometry_types": <list of str: REQUIRED>
|
||||
# "crs": "<PROJJSON or None: OPTIONAL>",
|
||||
# "orientation": "<'counterclockwise' or None: OPTIONAL>"
|
||||
# "edges": "planar"
|
||||
# "bbox": <list of [xmin, ymin, xmax, ymax]: OPTIONAL>
|
||||
# "epoch": <float: OPTIONAL>
|
||||
# }
|
||||
# },
|
||||
# "primary_column": "<str: REQUIRED>",
|
||||
# "version": "<METADATA_VERSION>",
|
||||
#
|
||||
# # Additional GeoPandas specific metadata (not in metadata spec)
|
||||
# "creator": {
|
||||
# "library": "geopandas",
|
||||
# "version": "<geopandas.__version__>"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
def _is_fsspec_url(url):
|
||||
return (
|
||||
isinstance(url, str)
|
||||
and "://" in url
|
||||
and not url.startswith(("http://", "https://"))
|
||||
)
|
||||
|
||||
|
||||
def _remove_id_from_member_of_ensembles(json_dict):
|
||||
"""
|
||||
Older PROJ versions will not recognize IDs of datum ensemble members that
|
||||
were added in more recent PROJ database versions.
|
||||
|
||||
Cf https://github.com/opengeospatial/geoparquet/discussions/110
|
||||
and https://github.com/OSGeo/PROJ/pull/3221
|
||||
|
||||
Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872
|
||||
"""
|
||||
for key, value in json_dict.items():
|
||||
if isinstance(value, dict):
|
||||
_remove_id_from_member_of_ensembles(value)
|
||||
elif key == "members" and isinstance(value, list):
|
||||
for member in value:
|
||||
member.pop("id", None)
|
||||
|
||||
|
||||
# type ids 0 to 7
|
||||
_geometry_type_names = [
|
||||
"Point",
|
||||
"LineString",
|
||||
"LineString",
|
||||
"Polygon",
|
||||
"MultiPoint",
|
||||
"MultiLineString",
|
||||
"MultiPolygon",
|
||||
"GeometryCollection",
|
||||
]
|
||||
_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
|
||||
|
||||
|
||||
def _get_geometry_types(series):
|
||||
"""
|
||||
Get unique geometry types from a GeoSeries.
|
||||
"""
|
||||
arr_geometry_types = shapely.get_type_id(series.array._data)
|
||||
# ensure to include "... Z" for 3D geometries
|
||||
has_z = shapely.has_z(series.array._data)
|
||||
arr_geometry_types[has_z] += 8
|
||||
|
||||
geometry_types = Series(arr_geometry_types).unique().tolist()
|
||||
# drop missing values (shapely.get_type_id returns -1 for those)
|
||||
if -1 in geometry_types:
|
||||
geometry_types.remove(-1)
|
||||
|
||||
return sorted([_geometry_type_names[idx] for idx in geometry_types])
|
||||
|
||||
|
||||
def _create_metadata(
|
||||
df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
|
||||
):
|
||||
"""Create and encode geo metadata dict.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
write_covering_bbox : bool, default False
|
||||
Writes the bounding box column for each row entry with column
|
||||
name 'bbox'. Writing a bbox column can be computationally
|
||||
expensive, hence is default setting is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
if schema_version is None:
|
||||
if geometry_encoding and any(
|
||||
encoding != "WKB" for encoding in geometry_encoding.values()
|
||||
):
|
||||
schema_version = "1.1.0"
|
||||
else:
|
||||
schema_version = METADATA_VERSION
|
||||
|
||||
if schema_version not in SUPPORTED_VERSIONS:
|
||||
raise ValueError(
|
||||
f"schema_version must be one of: {', '.join(SUPPORTED_VERSIONS)}"
|
||||
)
|
||||
|
||||
# Construct metadata for each geometry
|
||||
column_metadata = {}
|
||||
for col in df.columns[df.dtypes == "geometry"]:
|
||||
series = df[col]
|
||||
|
||||
geometry_types = _get_geometry_types(series)
|
||||
if schema_version[0] == "0":
|
||||
geometry_types_name = "geometry_type"
|
||||
if len(geometry_types) == 1:
|
||||
geometry_types = geometry_types[0]
|
||||
else:
|
||||
geometry_types_name = "geometry_types"
|
||||
|
||||
crs = None
|
||||
if series.crs:
|
||||
if schema_version == "0.1.0":
|
||||
crs = series.crs.to_wkt()
|
||||
else: # version >= 0.4.0
|
||||
crs = series.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
|
||||
column_metadata[col] = {
|
||||
"encoding": geometry_encoding[col],
|
||||
"crs": crs,
|
||||
geometry_types_name: geometry_types,
|
||||
}
|
||||
|
||||
bbox = series.total_bounds.tolist()
|
||||
if np.isfinite(bbox).all():
|
||||
# don't add bbox with NaNs for empty / all-NA geometry column
|
||||
column_metadata[col]["bbox"] = bbox
|
||||
|
||||
if write_covering_bbox:
|
||||
column_metadata[col]["covering"] = {
|
||||
"bbox": {
|
||||
"xmin": ["bbox", "xmin"],
|
||||
"ymin": ["bbox", "ymin"],
|
||||
"xmax": ["bbox", "xmax"],
|
||||
"ymax": ["bbox", "ymax"],
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"primary_column": df._geometry_column_name,
|
||||
"columns": column_metadata,
|
||||
"version": schema_version,
|
||||
"creator": {"library": "geopandas", "version": geopandas.__version__},
|
||||
}
|
||||
|
||||
|
||||
def _encode_metadata(metadata):
|
||||
"""Encode metadata dict to UTF-8 JSON string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata : dict
|
||||
|
||||
Returns
|
||||
-------
|
||||
UTF-8 encoded JSON string
|
||||
"""
|
||||
return json.dumps(metadata).encode("utf-8")
|
||||
|
||||
|
||||
def _decode_metadata(metadata_str):
|
||||
"""Decode a UTF-8 encoded JSON string to dict
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata_str : string (UTF-8 encoded)
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
if metadata_str is None:
|
||||
return None
|
||||
|
||||
return json.loads(metadata_str.decode("utf-8"))
|
||||
|
||||
|
||||
def _validate_dataframe(df):
|
||||
"""Validate that the GeoDataFrame conforms to requirements for writing
|
||||
to Parquet format.
|
||||
|
||||
Raises `ValueError` if the GeoDataFrame is not valid.
|
||||
|
||||
copied from `pandas.io.parquet`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
"""
|
||||
|
||||
if not isinstance(df, DataFrame):
|
||||
raise ValueError("Writing to Parquet/Feather only supports IO with DataFrames")
|
||||
|
||||
# must have value column names (strings only)
|
||||
if df.columns.inferred_type not in {"string", "unicode", "empty"}:
|
||||
raise ValueError("Writing to Parquet/Feather requires string column names")
|
||||
|
||||
# index level names must be strings
|
||||
valid_names = all(
|
||||
isinstance(name, str) for name in df.index.names if name is not None
|
||||
)
|
||||
if not valid_names:
|
||||
raise ValueError("Index level names must be strings")
|
||||
|
||||
|
||||
def _validate_geo_metadata(metadata):
|
||||
"""Validate geo metadata.
|
||||
Must not be empty, and must contain the structure specified above.
|
||||
|
||||
Raises ValueError if metadata is not valid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata : dict
|
||||
"""
|
||||
|
||||
if not metadata:
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
# version was schema_version in 0.1.0
|
||||
version = metadata.get("version", metadata.get("schema_version"))
|
||||
if not version:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key: "
|
||||
"'version'"
|
||||
)
|
||||
|
||||
required_keys = ("primary_column", "columns")
|
||||
for key in required_keys:
|
||||
if metadata.get(key, None) is None:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key: "
|
||||
"'{key}'".format(key=key)
|
||||
)
|
||||
|
||||
if not isinstance(metadata["columns"], dict):
|
||||
raise ValueError("'columns' in 'geo' metadata must be a dict")
|
||||
|
||||
# Validate that geometry columns have required metadata and values
|
||||
# leaving out "geometry_type" for compatibility with 0.1
|
||||
required_col_keys = ("encoding",)
|
||||
for col, column_metadata in metadata["columns"].items():
|
||||
for key in required_col_keys:
|
||||
if key not in column_metadata:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key "
|
||||
"'{key}' for column '{col}'".format(key=key, col=col)
|
||||
)
|
||||
|
||||
if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
|
||||
raise ValueError(
|
||||
"Only WKB geometry encoding or one of the native encodings "
|
||||
f"({GEOARROW_ENCODINGS!r}) are supported, "
|
||||
f"got: {column_metadata['encoding']}"
|
||||
)
|
||||
|
||||
if column_metadata.get("edges", "planar") == "spherical":
|
||||
warnings.warn(
|
||||
f"The geo metadata indicate that column '{col}' has spherical edges, "
|
||||
"but because GeoPandas currently does not support spherical "
|
||||
"geometry, it ignores this metadata and will interpret the edges of "
|
||||
"the geometries as planar.",
|
||||
UserWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
if "covering" in column_metadata:
|
||||
covering = column_metadata["covering"]
|
||||
if "bbox" in covering:
|
||||
bbox = covering["bbox"]
|
||||
for var in ["xmin", "ymin", "xmax", "ymax"]:
|
||||
if var not in bbox.keys():
|
||||
raise ValueError("Metadata for bbox column is malformed.")
|
||||
|
||||
|
||||
def _geopandas_to_arrow(
|
||||
df,
|
||||
index=None,
|
||||
geometry_encoding="WKB",
|
||||
schema_version=None,
|
||||
write_covering_bbox=None,
|
||||
):
|
||||
"""
|
||||
Helper function with main, shared logic for to_parquet/to_feather.
|
||||
"""
|
||||
from pyarrow import StructArray
|
||||
|
||||
from geopandas.io._geoarrow import geopandas_to_arrow
|
||||
|
||||
_validate_dataframe(df)
|
||||
|
||||
if schema_version is not None:
|
||||
if geometry_encoding != "WKB" and schema_version != "1.1.0":
|
||||
raise ValueError(
|
||||
"'geoarrow' encoding is only supported with schema version >= 1.1.0"
|
||||
)
|
||||
|
||||
table, geometry_encoding_dict = geopandas_to_arrow(
|
||||
df, geometry_encoding=geometry_encoding, index=index, interleaved=False
|
||||
)
|
||||
geo_metadata = _create_metadata(
|
||||
df,
|
||||
schema_version=schema_version,
|
||||
geometry_encoding=geometry_encoding_dict,
|
||||
write_covering_bbox=write_covering_bbox,
|
||||
)
|
||||
|
||||
if write_covering_bbox:
|
||||
if "bbox" in df.columns:
|
||||
raise ValueError(
|
||||
"An existing column 'bbox' already exists in the dataframe. "
|
||||
"Please rename to write covering bbox."
|
||||
)
|
||||
bounds = df.bounds
|
||||
bbox_array = StructArray.from_arrays(
|
||||
[bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
|
||||
names=["xmin", "ymin", "xmax", "ymax"],
|
||||
)
|
||||
table = table.append_column("bbox", bbox_array)
|
||||
|
||||
# Store geopandas specific file-level metadata
|
||||
# This must be done AFTER creating the table or it is not persisted
|
||||
metadata = table.schema.metadata
|
||||
metadata.update({b"geo": _encode_metadata(geo_metadata)})
|
||||
|
||||
return table.replace_schema_metadata(metadata)
|
||||
|
||||
|
||||
def _to_parquet(
|
||||
df,
|
||||
path,
|
||||
index=None,
|
||||
compression="snappy",
|
||||
geometry_encoding="WKB",
|
||||
schema_version=None,
|
||||
write_covering_bbox=False,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write a GeoDataFrame to the Parquet format.
|
||||
|
||||
Any geometry columns present are serialized to WKB format in the file.
|
||||
|
||||
Requires 'pyarrow'.
|
||||
|
||||
This is tracking version 1.0.0 of the GeoParquet specification at:
|
||||
https://github.com/opengeospatial/geoparquet. Writing older versions is
|
||||
supported using the `schema_version` keyword.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
|
||||
Name of the compression to use. Use ``None`` for no compression.
|
||||
geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
|
||||
The encoding to use for the geometry columns. Defaults to "WKB"
|
||||
for maximum interoperability. Specify "geoarrow" to use one of the
|
||||
native GeoArrow-based single-geometry type encodings.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
write_covering_bbox : bool, default False
|
||||
Writes the bounding box column for each row entry with column
|
||||
name 'bbox'. Writing a bbox column can be computationally
|
||||
expensive, hence is default setting is False.
|
||||
**kwargs
|
||||
Additional keyword arguments passed to pyarrow.parquet.write_table().
|
||||
"""
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(
|
||||
df,
|
||||
index=index,
|
||||
geometry_encoding=geometry_encoding,
|
||||
schema_version=schema_version,
|
||||
write_covering_bbox=write_covering_bbox,
|
||||
)
|
||||
parquet.write_table(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _to_feather(df, path, index=None, compression=None, schema_version=None, **kwargs):
|
||||
"""
|
||||
Write a GeoDataFrame to the Feather format.
|
||||
|
||||
Any geometry columns present are serialized to WKB format in the file.
|
||||
|
||||
Requires 'pyarrow' >= 0.17.
|
||||
|
||||
This is tracking version 1.0.0 of the GeoParquet specification for
|
||||
the metadata at: https://github.com/opengeospatial/geoparquet. Writing
|
||||
older versions is supported using the `schema_version` keyword.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'zstd', 'lz4', 'uncompressed'}, optional
|
||||
Name of the compression to use. Use ``"uncompressed"`` for no
|
||||
compression. By default uses LZ4 if available, otherwise uncompressed.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version for the metadata; if not provided
|
||||
will default to latest supported version.
|
||||
kwargs
|
||||
Additional keyword arguments passed to pyarrow.feather.write_feather().
|
||||
"""
|
||||
feather = import_optional_dependency(
|
||||
"pyarrow.feather", extra="pyarrow is required for Feather support."
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
|
||||
feather.write_feather(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _arrow_to_geopandas(table, geo_metadata=None):
|
||||
"""
|
||||
Helper function with main, shared logic for read_parquet/read_feather.
|
||||
"""
|
||||
if geo_metadata is None:
|
||||
# Note: this path of not passing metadata is also used by dask-geopandas
|
||||
geo_metadata = _validate_and_decode_metadata(table.schema.metadata)
|
||||
|
||||
# Find all geometry columns that were read from the file. May
|
||||
# be a subset if 'columns' parameter is used.
|
||||
geometry_columns = [
|
||||
col for col in geo_metadata["columns"] if col in table.column_names
|
||||
]
|
||||
result_column_names = list(table.slice(0, 0).to_pandas().columns)
|
||||
geometry_columns.sort(key=result_column_names.index)
|
||||
|
||||
if not len(geometry_columns):
|
||||
raise ValueError(
|
||||
"""No geometry columns are included in the columns read from
|
||||
the Parquet/Feather file. To read this file without geometry columns,
|
||||
use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
geometry = geo_metadata["primary_column"]
|
||||
|
||||
# Missing geometry likely indicates a subset of columns was read;
|
||||
# promote the first available geometry to the primary geometry.
|
||||
if len(geometry_columns) and geometry not in geometry_columns:
|
||||
geometry = geometry_columns[0]
|
||||
|
||||
# if there are multiple non-primary geometry columns, raise a warning
|
||||
if len(geometry_columns) > 1:
|
||||
warnings.warn(
|
||||
"Multiple non-primary geometry columns read from Parquet/Feather "
|
||||
"file. The first column read was promoted to the primary geometry.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
table_attr = table.drop(geometry_columns)
|
||||
df = table_attr.to_pandas()
|
||||
|
||||
# Convert the WKB columns that are present back to geometry.
|
||||
for col in geometry_columns:
|
||||
col_metadata = geo_metadata["columns"][col]
|
||||
if "crs" in col_metadata:
|
||||
crs = col_metadata["crs"]
|
||||
if isinstance(crs, dict):
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
else:
|
||||
# per the GeoParquet spec, missing CRS is to be interpreted as
|
||||
# OGC:CRS84
|
||||
crs = "OGC:CRS84"
|
||||
|
||||
if col_metadata["encoding"] == "WKB":
|
||||
geom_arr = from_wkb(np.array(table[col]), crs=crs)
|
||||
else:
|
||||
from geopandas.io._geoarrow import construct_shapely_array
|
||||
|
||||
geom_arr = from_shapely(
|
||||
construct_shapely_array(
|
||||
table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
|
||||
),
|
||||
crs=crs,
|
||||
)
|
||||
|
||||
df.insert(result_column_names.index(col), col, geom_arr)
|
||||
|
||||
return GeoDataFrame(df, geometry=geometry)
|
||||
|
||||
|
||||
def _get_filesystem_path(path, filesystem=None, storage_options=None):
|
||||
"""
|
||||
Get the filesystem and path for a given filesystem and path.
|
||||
|
||||
If the filesystem is not None then it's just returned as is.
|
||||
"""
|
||||
import pyarrow
|
||||
|
||||
if (
|
||||
isinstance(path, str)
|
||||
and storage_options is None
|
||||
and filesystem is None
|
||||
and Version(pyarrow.__version__) >= Version("5.0.0")
|
||||
):
|
||||
# Use the native pyarrow filesystem if possible.
|
||||
try:
|
||||
from pyarrow.fs import FileSystem
|
||||
|
||||
filesystem, path = FileSystem.from_uri(path)
|
||||
except Exception:
|
||||
# fallback to use get_handle / fsspec for filesystems
|
||||
# that pyarrow doesn't support
|
||||
pass
|
||||
|
||||
if _is_fsspec_url(path) and filesystem is None:
|
||||
fsspec = import_optional_dependency(
|
||||
"fsspec", extra="fsspec is requred for 'storage_options'."
|
||||
)
|
||||
filesystem, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
|
||||
|
||||
if filesystem is None and storage_options:
|
||||
raise ValueError(
|
||||
"Cannot provide 'storage_options' with non-fsspec path '{}'".format(path)
|
||||
)
|
||||
|
||||
return filesystem, path
|
||||
|
||||
|
||||
def _ensure_arrow_fs(filesystem):
|
||||
"""
|
||||
Simplified version of pyarrow.fs._ensure_filesystem. This is only needed
|
||||
below because `pyarrow.parquet.read_metadata` does not yet accept a
|
||||
filesystem keyword (https://issues.apache.org/jira/browse/ARROW-16719)
|
||||
"""
|
||||
from pyarrow import fs
|
||||
|
||||
if isinstance(filesystem, fs.FileSystem):
|
||||
return filesystem
|
||||
|
||||
# handle fsspec-compatible filesystems
|
||||
try:
|
||||
import fsspec
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(filesystem, fsspec.AbstractFileSystem):
|
||||
return fs.PyFileSystem(fs.FSSpecHandler(filesystem))
|
||||
|
||||
return filesystem
|
||||
|
||||
|
||||
def _validate_and_decode_metadata(metadata):
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
raise ValueError(
|
||||
"""Missing geo metadata in Parquet/Feather file.
|
||||
Use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
# check for malformed metadata
|
||||
try:
|
||||
decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
|
||||
except (TypeError, json.decoder.JSONDecodeError):
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
_validate_geo_metadata(decoded_geo_metadata)
|
||||
return decoded_geo_metadata
|
||||
|
||||
|
||||
def _read_parquet_schema_and_metadata(path, filesystem):
|
||||
"""
|
||||
Opening the Parquet file/dataset a first time to get the schema and metadata.
|
||||
|
||||
TODO: we should look into how we can reuse opened dataset for reading the
|
||||
actual data, to avoid discovering the dataset twice (problem right now is
|
||||
that the ParquetDataset interface doesn't allow passing the filters on read)
|
||||
|
||||
"""
|
||||
import pyarrow
|
||||
from pyarrow import parquet
|
||||
|
||||
kwargs = {}
|
||||
if Version(pyarrow.__version__) < Version("15.0.0"):
|
||||
kwargs = dict(use_legacy_dataset=False)
|
||||
|
||||
try:
|
||||
schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
|
||||
except Exception:
|
||||
schema = parquet.read_schema(path, filesystem=filesystem)
|
||||
|
||||
metadata = schema.metadata
|
||||
|
||||
# read metadata separately to get the raw Parquet FileMetaData metadata
|
||||
# (pyarrow doesn't properly exposes those in schema.metadata for files
|
||||
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
try:
|
||||
metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return schema, metadata
|
||||
|
||||
|
||||
def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
|
||||
"""
|
||||
Load a Parquet object from the file path, returning a GeoDataFrame.
|
||||
|
||||
You can read a subset of columns in the file using the ``columns`` parameter.
|
||||
However, the structure of the returned GeoDataFrame will depend on which
|
||||
columns you read:
|
||||
|
||||
* if no geometry columns are read, this will raise a ``ValueError`` - you
|
||||
should use the pandas `read_parquet` method instead.
|
||||
* if the primary geometry column saved to this file is not included in
|
||||
columns, the first available geometry column will be set as the geometry
|
||||
column of the returned GeoDataFrame.
|
||||
|
||||
Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
|
||||
specification at: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
If 'crs' key is not present in the GeoParquet metadata associated with the
|
||||
Parquet object, it will default to "OGC:CRS84" according to the specification.
|
||||
|
||||
Requires 'pyarrow'.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
columns : list-like of strings, default=None
|
||||
If not None, only these columns will be read from the file. If
|
||||
the primary geometry column is not included, the first secondary
|
||||
geometry read from the file will be set as the geometry column
|
||||
of the returned GeoDataFrame. If no geometry columns are present,
|
||||
a ``ValueError`` will be raised.
|
||||
storage_options : dict, optional
|
||||
Extra options that make sense for a particular storage connection, e.g. host,
|
||||
port, username, password, etc. For HTTP(S) URLs the key-value pairs are
|
||||
forwarded to urllib as header options. For other URLs (e.g. starting with
|
||||
"s3://", and "gcs://") the key-value pairs are forwarded to fsspec. Please
|
||||
see fsspec and urllib for more details.
|
||||
|
||||
When no storage options are provided and a filesystem is implemented by
|
||||
both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
|
||||
filesystem is preferred. Provide the instantiated fsspec filesystem using
|
||||
the ``filesystem`` keyword if you wish to use its implementation.
|
||||
bbox : tuple, optional
|
||||
Bounding box to be used to filter selection from geoparquet data. This
|
||||
is only usable if the data was saved with the bbox covering metadata.
|
||||
Input is of the tuple format (xmin, ymin, xmax, ymax).
|
||||
|
||||
**kwargs
|
||||
Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_parquet("data.parquet") # doctest: +SKIP
|
||||
|
||||
Specifying columns to read:
|
||||
|
||||
>>> df = geopandas.read_parquet(
|
||||
... "data.parquet",
|
||||
... columns=["geometry", "pop_est"]
|
||||
... ) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
# TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
|
||||
# adds filesystem as a keyword and match that.
|
||||
filesystem = kwargs.pop("filesystem", None)
|
||||
filesystem, path = _get_filesystem_path(
|
||||
path, filesystem=filesystem, storage_options=storage_options
|
||||
)
|
||||
path = _expand_user(path)
|
||||
schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
|
||||
|
||||
geo_metadata = _validate_and_decode_metadata(metadata)
|
||||
|
||||
bbox_filter = (
|
||||
_get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
|
||||
)
|
||||
|
||||
if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
|
||||
|
||||
# by default, bbox column is not read in, so must specify which
|
||||
# columns are read in if it exists.
|
||||
if not columns and if_bbox_column_exists:
|
||||
columns = _get_non_bbox_columns(schema, geo_metadata)
|
||||
|
||||
# if both bbox and filters kwargs are used, must splice together.
|
||||
if "filters" in kwargs:
|
||||
filters_kwarg = kwargs.pop("filters")
|
||||
filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
|
||||
else:
|
||||
filters = bbox_filter
|
||||
|
||||
kwargs["use_pandas_metadata"] = True
|
||||
|
||||
table = parquet.read_table(
|
||||
path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
|
||||
)
|
||||
|
||||
return _arrow_to_geopandas(table, geo_metadata)
|
||||
|
||||
|
||||
def _read_feather(path, columns=None, **kwargs):
|
||||
"""
|
||||
Load a Feather object from the file path, returning a GeoDataFrame.
|
||||
|
||||
You can read a subset of columns in the file using the ``columns`` parameter.
|
||||
However, the structure of the returned GeoDataFrame will depend on which
|
||||
columns you read:
|
||||
|
||||
* if no geometry columns are read, this will raise a ``ValueError`` - you
|
||||
should use the pandas `read_feather` method instead.
|
||||
* if the primary geometry column saved to this file is not included in
|
||||
columns, the first available geometry column will be set as the geometry
|
||||
column of the returned GeoDataFrame.
|
||||
|
||||
Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
|
||||
specification at: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
If 'crs' key is not present in the Feather metadata associated with the
|
||||
Parquet object, it will default to "OGC:CRS84" according to the specification.
|
||||
|
||||
Requires 'pyarrow' >= 0.17.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
columns : list-like of strings, default=None
|
||||
If not None, only these columns will be read from the file. If
|
||||
the primary geometry column is not included, the first secondary
|
||||
geometry read from the file will be set as the geometry column
|
||||
of the returned GeoDataFrame. If no geometry columns are present,
|
||||
a ``ValueError`` will be raised.
|
||||
**kwargs
|
||||
Any additional kwargs passed to pyarrow.feather.read_table().
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_feather("data.feather") # doctest: +SKIP
|
||||
|
||||
Specifying columns to read:
|
||||
|
||||
>>> df = geopandas.read_feather(
|
||||
... "data.feather",
|
||||
... columns=["geometry", "pop_est"]
|
||||
... ) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
feather = import_optional_dependency(
|
||||
"pyarrow.feather", extra="pyarrow is required for Feather support."
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
path = _expand_user(path)
|
||||
|
||||
table = feather.read_table(path, columns=columns, **kwargs)
|
||||
return _arrow_to_geopandas(table)
|
||||
|
||||
|
||||
def _get_parquet_bbox_filter(geo_metadata, bbox):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
|
||||
if _check_if_covering_in_geo_metadata(geo_metadata):
|
||||
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
|
||||
return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
|
||||
|
||||
elif geo_metadata["columns"][primary_column]["encoding"] == "point":
|
||||
import pyarrow.compute as pc
|
||||
|
||||
return (
|
||||
(pc.field((primary_column, "x")) >= bbox[0])
|
||||
& (pc.field((primary_column, "x")) <= bbox[2])
|
||||
& (pc.field((primary_column, "y")) >= bbox[1])
|
||||
& (pc.field((primary_column, "y")) <= bbox[3])
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Specifying 'bbox' not supported for this Parquet file (it should either "
|
||||
"have a bbox covering column or use 'point' encoding)."
|
||||
)
|
||||
|
||||
|
||||
def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
|
||||
import pyarrow.compute as pc
|
||||
|
||||
return ~(
|
||||
(pc.field((bbox_column_name, "xmin")) > bbox[2])
|
||||
| (pc.field((bbox_column_name, "ymin")) > bbox[3])
|
||||
| (pc.field((bbox_column_name, "xmax")) < bbox[0])
|
||||
| (pc.field((bbox_column_name, "ymax")) < bbox[1])
|
||||
)
|
||||
|
||||
|
||||
def _check_if_covering_in_geo_metadata(geo_metadata):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
return "covering" in geo_metadata["columns"][primary_column].keys()
|
||||
|
||||
|
||||
def _get_bbox_encoding_column_name(geo_metadata):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
|
||||
|
||||
|
||||
def _get_non_bbox_columns(schema, geo_metadata):
|
||||
|
||||
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
|
||||
columns = schema.names
|
||||
if bbox_column_name in columns:
|
||||
columns.remove(bbox_column_name)
|
||||
return columns
|
||||
|
||||
|
||||
def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
if bbox_filter is None:
|
||||
return kwarg_filters
|
||||
|
||||
filters_expression = parquet.filters_to_expression(kwarg_filters)
|
||||
return bbox_filter & filters_expression
|
||||
851
.venv/lib/python3.12/site-packages/geopandas/io/file.py
Normal file
851
.venv/lib/python3.12/site-packages/geopandas/io/file.py
Normal file
@@ -0,0 +1,851 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import urllib.request
|
||||
import warnings
|
||||
from io import IOBase
|
||||
from packaging.version import Version
|
||||
from pathlib import Path
|
||||
|
||||
# Adapted from pandas.io.common
|
||||
from urllib.parse import urlparse as parse_url
|
||||
from urllib.parse import uses_netloc, uses_params, uses_relative
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_integer_dtype
|
||||
|
||||
import shapely
|
||||
from shapely.geometry import mapping
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
|
||||
from geopandas.io.util import vsi_path
|
||||
|
||||
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
|
||||
_VALID_URLS.discard("")
|
||||
# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
|
||||
_VALID_URLS.discard("file")
|
||||
|
||||
fiona = None
|
||||
fiona_env = None
|
||||
fiona_import_error = None
|
||||
FIONA_GE_19 = False
|
||||
|
||||
|
||||
def _import_fiona():
|
||||
global fiona
|
||||
global fiona_env
|
||||
global fiona_import_error
|
||||
global FIONA_GE_19
|
||||
|
||||
if fiona is None:
|
||||
try:
|
||||
import fiona
|
||||
|
||||
# only try to import fiona.Env if the main fiona import succeeded
|
||||
# (otherwise you can get confusing "AttributeError: module 'fiona'
|
||||
# has no attribute '_loading'" / partially initialized module errors)
|
||||
try:
|
||||
from fiona import Env as fiona_env
|
||||
except ImportError:
|
||||
try:
|
||||
from fiona import drivers as fiona_env
|
||||
except ImportError:
|
||||
fiona_env = None
|
||||
|
||||
FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
|
||||
"1.9.0"
|
||||
)
|
||||
|
||||
except ImportError as err:
|
||||
fiona = False
|
||||
fiona_import_error = str(err)
|
||||
|
||||
|
||||
pyogrio = None
|
||||
pyogrio_import_error = None
|
||||
|
||||
|
||||
def _import_pyogrio():
|
||||
global pyogrio
|
||||
global pyogrio_import_error
|
||||
|
||||
if pyogrio is None:
|
||||
try:
|
||||
import pyogrio
|
||||
|
||||
except ImportError as err:
|
||||
pyogrio = False
|
||||
pyogrio_import_error = str(err)
|
||||
|
||||
|
||||
def _check_fiona(func):
|
||||
if not fiona:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'fiona' package, but it is not installed or does "
|
||||
f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
|
||||
)
|
||||
|
||||
|
||||
def _check_pyogrio(func):
|
||||
if not pyogrio:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'pyogrio' package, but it is not installed "
|
||||
"or does not import correctly."
|
||||
"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
)
|
||||
|
||||
|
||||
def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
|
||||
if metadata is None:
|
||||
return
|
||||
if driver != "GPKG":
|
||||
raise NotImplementedError(
|
||||
"The 'metadata' keyword is only supported for the GPKG driver."
|
||||
)
|
||||
|
||||
if engine == "fiona" and not FIONA_GE_19:
|
||||
raise NotImplementedError(
|
||||
"The 'metadata' keyword is only supported for Fiona >= 1.9."
|
||||
)
|
||||
|
||||
|
||||
def _check_engine(engine, func):
|
||||
# if not specified through keyword or option, then default to "pyogrio" if
|
||||
# installed, otherwise try fiona
|
||||
if engine is None:
|
||||
import geopandas
|
||||
|
||||
engine = geopandas.options.io_engine
|
||||
|
||||
if engine is None:
|
||||
_import_pyogrio()
|
||||
if pyogrio:
|
||||
engine = "pyogrio"
|
||||
else:
|
||||
_import_fiona()
|
||||
if fiona:
|
||||
engine = "fiona"
|
||||
|
||||
if engine == "pyogrio":
|
||||
_import_pyogrio()
|
||||
_check_pyogrio(func)
|
||||
elif engine == "fiona":
|
||||
_import_fiona()
|
||||
_check_fiona(func)
|
||||
elif engine is None:
|
||||
raise ImportError(
|
||||
f"The {func} requires the 'pyogrio' or 'fiona' package, "
|
||||
"but neither is installed or imports correctly."
|
||||
f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
f"\nImporting fiona resulted in: {fiona_import_error}"
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
_EXTENSION_TO_DRIVER = {
|
||||
".bna": "BNA",
|
||||
".dxf": "DXF",
|
||||
".csv": "CSV",
|
||||
".shp": "ESRI Shapefile",
|
||||
".dbf": "ESRI Shapefile",
|
||||
".json": "GeoJSON",
|
||||
".geojson": "GeoJSON",
|
||||
".geojsonl": "GeoJSONSeq",
|
||||
".geojsons": "GeoJSONSeq",
|
||||
".gpkg": "GPKG",
|
||||
".gml": "GML",
|
||||
".xml": "GML",
|
||||
".gpx": "GPX",
|
||||
".gtm": "GPSTrackMaker",
|
||||
".gtz": "GPSTrackMaker",
|
||||
".tab": "MapInfo File",
|
||||
".mif": "MapInfo File",
|
||||
".mid": "MapInfo File",
|
||||
".dgn": "DGN",
|
||||
".fgb": "FlatGeobuf",
|
||||
}
|
||||
|
||||
|
||||
def _expand_user(path):
|
||||
"""Expand paths that use ~."""
|
||||
if isinstance(path, str):
|
||||
path = os.path.expanduser(path)
|
||||
elif isinstance(path, Path):
|
||||
path = path.expanduser()
|
||||
return path
|
||||
|
||||
|
||||
def _is_url(url):
|
||||
"""Check to see if *url* has a valid protocol."""
|
||||
try:
|
||||
return parse_url(url).scheme in _VALID_URLS
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _read_file(
|
||||
filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame from a file or URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
Either the absolute or relative path to the file or URL to
|
||||
be opened, or any object with a read() method (such as an open file
|
||||
or StringIO)
|
||||
bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None
|
||||
Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely
|
||||
geometry. With engine="fiona", CRS mis-matches are resolved if given a GeoSeries
|
||||
or GeoDataFrame. With engine="pyogrio", bbox must be in the same CRS as the
|
||||
dataset. Tuple is (minx, miny, maxx, maxy) to match the bounds property of
|
||||
shapely geometry objects. Cannot be used with mask.
|
||||
mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None
|
||||
Filter for features that intersect with the given dict-like geojson
|
||||
geometry, GeoSeries, GeoDataFrame or shapely geometry.
|
||||
CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
|
||||
Cannot be used with bbox. If multiple geometries are passed, this will
|
||||
first union all geometries, which may be computationally expensive.
|
||||
columns : list, optional
|
||||
List of column names to import from the data source. Column names
|
||||
must exactly match the names in the data source. To avoid reading
|
||||
any columns (besides the geometry column), pass an empty list-like.
|
||||
By default reads all columns.
|
||||
rows : int or slice, default None
|
||||
Load in specific rows by passing an integer (first `n` rows) or a
|
||||
slice() object.
|
||||
engine : str, "pyogrio" or "fiona"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
|
||||
installed, otherwise tries "fiona". Engine can also be set globally
|
||||
with the ``geopandas.options.io_engine`` option.
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
In case of the "pyogrio" engine, the keyword arguments are passed to
|
||||
`pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
|
||||
arguments are passed to fiona.open`. For more information on possible
|
||||
keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_file("nybb.shp") # doctest: +SKIP
|
||||
|
||||
Specifying layer of GPKG:
|
||||
|
||||
>>> df = geopandas.read_file("file.gpkg", layer='cities') # doctest: +SKIP
|
||||
|
||||
Reading only first 10 rows:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", rows=10) # doctest: +SKIP
|
||||
|
||||
Reading only geometries intersecting ``mask``:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", mask=polygon) # doctest: +SKIP
|
||||
|
||||
Reading only geometries intersecting ``bbox``:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", bbox=(0, 0, 10, 20)) # doctest: +SKIP
|
||||
|
||||
Returns
|
||||
-------
|
||||
:obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
|
||||
If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The format drivers will attempt to detect the encoding of your data, but
|
||||
may fail. In this case, the proper encoding can be specified explicitly
|
||||
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
|
||||
|
||||
When specifying a URL, geopandas will check if the server supports reading
|
||||
partial data and in that case pass the URL as is to the underlying engine,
|
||||
which will then use the network file system handler of GDAL to read from
|
||||
the URL. Otherwise geopandas will download the data from the URL and pass
|
||||
all data in-memory to the underlying engine.
|
||||
If you need more control over how the URL is read, you can specify the
|
||||
GDAL virtual filesystem manually (e.g. ``/vsicurl/https://...``). See the
|
||||
GDAL documentation on filesystems for more details
|
||||
(https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access).
|
||||
|
||||
"""
|
||||
engine = _check_engine(engine, "'read_file' function")
|
||||
|
||||
filename = _expand_user(filename)
|
||||
|
||||
from_bytes = False
|
||||
if _is_url(filename):
|
||||
# if it is a url that supports random access -> pass through to
|
||||
# pyogrio/fiona as is (to support downloading only part of the file)
|
||||
# otherwise still download manually because pyogrio/fiona don't support
|
||||
# all types of urls (https://github.com/geopandas/geopandas/issues/2908)
|
||||
with urllib.request.urlopen(filename) as response:
|
||||
if not response.headers.get("Accept-Ranges") == "bytes":
|
||||
filename = response.read()
|
||||
from_bytes = True
|
||||
|
||||
if engine == "pyogrio":
|
||||
return _read_file_pyogrio(
|
||||
filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
|
||||
)
|
||||
|
||||
elif engine == "fiona":
|
||||
if pd.api.types.is_file_like(filename):
|
||||
data = filename.read()
|
||||
path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data
|
||||
from_bytes = True
|
||||
else:
|
||||
path_or_bytes = filename
|
||||
|
||||
return _read_file_fiona(
|
||||
path_or_bytes,
|
||||
from_bytes,
|
||||
bbox=bbox,
|
||||
mask=mask,
|
||||
columns=columns,
|
||||
rows=rows,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _read_file_fiona(
|
||||
path_or_bytes,
|
||||
from_bytes,
|
||||
bbox=None,
|
||||
mask=None,
|
||||
columns=None,
|
||||
rows=None,
|
||||
where=None,
|
||||
**kwargs,
|
||||
):
|
||||
if where is not None and not FIONA_GE_19:
|
||||
raise NotImplementedError("where requires fiona 1.9+")
|
||||
|
||||
if columns is not None:
|
||||
if "include_fields" in kwargs:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'include_fields' and 'columns' keywords"
|
||||
)
|
||||
if not FIONA_GE_19:
|
||||
raise NotImplementedError("'columns' keyword requires fiona 1.9+")
|
||||
kwargs["include_fields"] = columns
|
||||
elif "include_fields" in kwargs:
|
||||
# alias to columns, as this variable is used below to specify column order
|
||||
# in the dataframe creation
|
||||
columns = kwargs["include_fields"]
|
||||
|
||||
if not from_bytes:
|
||||
# Opening a file via URL or file-like-object above automatically detects a
|
||||
# zipped file. In order to match that behavior, attempt to add a zip scheme
|
||||
# if missing.
|
||||
path_or_bytes = vsi_path(str(path_or_bytes))
|
||||
|
||||
if from_bytes:
|
||||
reader = fiona.BytesCollection
|
||||
else:
|
||||
reader = fiona.open
|
||||
|
||||
with fiona_env():
|
||||
with reader(path_or_bytes, **kwargs) as features:
|
||||
crs = features.crs_wkt
|
||||
# attempt to get EPSG code
|
||||
try:
|
||||
# fiona 1.9+
|
||||
epsg = features.crs.to_epsg(confidence_threshold=100)
|
||||
if epsg is not None:
|
||||
crs = epsg
|
||||
except AttributeError:
|
||||
# fiona <= 1.8
|
||||
try:
|
||||
crs = features.crs["init"]
|
||||
except (TypeError, KeyError):
|
||||
pass
|
||||
|
||||
# handle loading the bounding box
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
bbox = tuple(bbox.to_crs(crs).total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
assert len(bbox) == 4
|
||||
# handle loading the mask
|
||||
elif isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
mask = mapping(mask.to_crs(crs).union_all())
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = mapping(mask)
|
||||
|
||||
filters = {}
|
||||
if bbox is not None:
|
||||
filters["bbox"] = bbox
|
||||
if mask is not None:
|
||||
filters["mask"] = mask
|
||||
if where is not None:
|
||||
filters["where"] = where
|
||||
|
||||
# setup the data loading filter
|
||||
if rows is not None:
|
||||
if isinstance(rows, int):
|
||||
rows = slice(rows)
|
||||
elif not isinstance(rows, slice):
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
f_filt = features.filter(rows.start, rows.stop, rows.step, **filters)
|
||||
elif filters:
|
||||
f_filt = features.filter(**filters)
|
||||
else:
|
||||
f_filt = features
|
||||
# get list of columns
|
||||
columns = columns or list(features.schema["properties"])
|
||||
datetime_fields = [
|
||||
k for (k, v) in features.schema["properties"].items() if v == "datetime"
|
||||
]
|
||||
if (
|
||||
kwargs.get("ignore_geometry", False)
|
||||
or features.schema["geometry"] == "None"
|
||||
):
|
||||
df = pd.DataFrame(
|
||||
[record["properties"] for record in f_filt], columns=columns
|
||||
)
|
||||
else:
|
||||
df = GeoDataFrame.from_features(
|
||||
f_filt, crs=crs, columns=columns + ["geometry"]
|
||||
)
|
||||
for k in datetime_fields:
|
||||
as_dt = None
|
||||
# plain try catch for when pandas will raise in the future
|
||||
# TODO we can tighten the exception type in future when it does
|
||||
try:
|
||||
with warnings.catch_warnings():
|
||||
# pandas 2.x does not yet enforce this behaviour but raises a
|
||||
# warning -> we want to to suppress this warning for our users,
|
||||
# and do this by turning it into an error so we take the
|
||||
# `except` code path to try again with utc=True
|
||||
warnings.filterwarnings(
|
||||
"error",
|
||||
"In a future version of pandas, parsing datetimes with "
|
||||
"mixed time zones will raise an error",
|
||||
FutureWarning,
|
||||
)
|
||||
as_dt = pd.to_datetime(df[k])
|
||||
except Exception:
|
||||
pass
|
||||
if as_dt is None or as_dt.dtype == "object":
|
||||
# if to_datetime failed, try again for mixed timezone offsets
|
||||
# This can still fail if there are invalid datetimes
|
||||
try:
|
||||
as_dt = pd.to_datetime(df[k], utc=True)
|
||||
except Exception:
|
||||
pass
|
||||
# if to_datetime succeeded, round datetimes as
|
||||
# fiona only supports up to ms precision (any microseconds are
|
||||
# floating point rounding error)
|
||||
if as_dt is not None and not (as_dt.dtype == "object"):
|
||||
if PANDAS_GE_20:
|
||||
df[k] = as_dt.dt.as_unit("ms")
|
||||
else:
|
||||
df[k] = as_dt.dt.round(freq="ms")
|
||||
return df
|
||||
|
||||
|
||||
def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if rows is not None:
|
||||
if isinstance(rows, int):
|
||||
kwargs["max_features"] = rows
|
||||
elif isinstance(rows, slice):
|
||||
if rows.start is not None:
|
||||
if rows.start < 0:
|
||||
raise ValueError(
|
||||
"Negative slice start not supported with the 'pyogrio' engine."
|
||||
)
|
||||
kwargs["skip_features"] = rows.start
|
||||
if rows.stop is not None:
|
||||
kwargs["max_features"] = rows.stop - (rows.start or 0)
|
||||
if rows.step is not None:
|
||||
raise ValueError("slice with step is not supported")
|
||||
else:
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
|
||||
if bbox is not None and mask is not None:
|
||||
# match error message from Fiona
|
||||
raise ValueError("mask and bbox can not be set together")
|
||||
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
crs = pyogrio.read_info(path_or_bytes).get("crs")
|
||||
if isinstance(path_or_bytes, IOBase):
|
||||
path_or_bytes.seek(0)
|
||||
|
||||
bbox = tuple(bbox.to_crs(crs).total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
if len(bbox) != 4:
|
||||
raise ValueError("'bbox' should be a length-4 tuple.")
|
||||
|
||||
if mask is not None:
|
||||
# NOTE: mask cannot be used at same time as bbox keyword
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
crs = pyogrio.read_info(path_or_bytes).get("crs")
|
||||
if isinstance(path_or_bytes, IOBase):
|
||||
path_or_bytes.seek(0)
|
||||
|
||||
mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = shapely.unary_union(mask)
|
||||
elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
|
||||
# convert GeoJSON to shapely geometry
|
||||
mask = shapely.geometry.shape(mask)
|
||||
|
||||
kwargs["mask"] = mask
|
||||
|
||||
if kwargs.pop("ignore_geometry", False):
|
||||
kwargs["read_geometry"] = False
|
||||
|
||||
# translate `ignore_fields`/`include_fields` keyword for back compat with fiona
|
||||
if "ignore_fields" in kwargs and "include_fields" in kwargs:
|
||||
raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
|
||||
elif "ignore_fields" in kwargs:
|
||||
if kwargs.get("columns", None) is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'columns' and 'ignore_fields' keywords"
|
||||
)
|
||||
warnings.warn(
|
||||
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
|
||||
"will be removed in a future release. You can use the 'columns' keyword "
|
||||
"instead to select which columns to read.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
ignore_fields = kwargs.pop("ignore_fields")
|
||||
fields = pyogrio.read_info(path_or_bytes)["fields"]
|
||||
include_fields = [col for col in fields if col not in ignore_fields]
|
||||
kwargs["columns"] = include_fields
|
||||
elif "include_fields" in kwargs:
|
||||
# translate `include_fields` keyword for back compat with fiona engine
|
||||
if kwargs.get("columns", None) is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'columns' and 'include_fields' keywords"
|
||||
)
|
||||
warnings.warn(
|
||||
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
|
||||
"will be removed in a future release. You can use the 'columns' keyword "
|
||||
"instead to select which columns to read.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
kwargs["columns"] = kwargs.pop("include_fields")
|
||||
|
||||
return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
|
||||
|
||||
|
||||
def _detect_driver(path):
|
||||
"""
|
||||
Attempt to auto-detect driver based on the extension
|
||||
"""
|
||||
try:
|
||||
# in case the path is a file handle
|
||||
path = path.name
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
return _EXTENSION_TO_DRIVER[Path(path).suffix.lower()]
|
||||
except KeyError:
|
||||
# Assume it is a shapefile folder for now. In the future,
|
||||
# will likely raise an exception when the expected
|
||||
# folder writing behavior is more clearly defined.
|
||||
return "ESRI Shapefile"
|
||||
|
||||
|
||||
def _to_file(
|
||||
df,
|
||||
filename,
|
||||
driver=None,
|
||||
schema=None,
|
||||
index=None,
|
||||
mode="w",
|
||||
crs=None,
|
||||
engine=None,
|
||||
metadata=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write this GeoDataFrame to an OGR data source
|
||||
|
||||
A dictionary of supported OGR providers is available via:
|
||||
|
||||
>>> import pyogrio
|
||||
>>> pyogrio.list_drivers() # doctest: +SKIP
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame to be written
|
||||
filename : string
|
||||
File path or file handle to write to. The path may specify a
|
||||
GDAL VSI scheme.
|
||||
driver : string, default None
|
||||
The OGR format driver used to write the vector file.
|
||||
If not specified, it attempts to infer it from the file extension.
|
||||
If no extension is specified, it saves ESRI Shapefile to a folder.
|
||||
schema : dict, default None
|
||||
If specified, the schema dictionary is passed to Fiona to
|
||||
better control how the file is written. If None, GeoPandas
|
||||
will determine the schema based on each column's dtype.
|
||||
Not supported for the "pyogrio" engine.
|
||||
index : bool, default None
|
||||
If True, write index into one or more columns (for MultiIndex).
|
||||
Default None writes the index into one or more columns only if
|
||||
the index is named, is a MultiIndex, or has a non-integer data
|
||||
type. If False, no index is written.
|
||||
|
||||
.. versionadded:: 0.7
|
||||
Previously the index was not written.
|
||||
mode : string, default 'w'
|
||||
The write mode, 'w' to overwrite the existing file and 'a' to append;
|
||||
when using the pyogrio engine, you can also pass ``append=True``.
|
||||
Not all drivers support appending. For the fiona engine, the drivers
|
||||
that support appending are listed in fiona.supported_drivers or
|
||||
https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py.
|
||||
For the pyogrio engine, you should be able to use any driver that
|
||||
is available in your installation of GDAL that supports append
|
||||
capability; see the specific driver entry at
|
||||
https://gdal.org/drivers/vector/index.html for more information.
|
||||
crs : pyproj.CRS, default None
|
||||
If specified, the CRS is passed to Fiona to
|
||||
better control how the file is written. If None, GeoPandas
|
||||
will determine the crs based on crs df attribute.
|
||||
The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
engine : str, "pyogrio" or "fiona"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
|
||||
installed, otherwise tries "fiona". Engine can also be set globally
|
||||
with the ``geopandas.options.io_engine`` option.
|
||||
metadata : dict[str, str], default None
|
||||
Optional metadata to be stored in the file. Keys and values must be
|
||||
strings. Only supported for the "GPKG" driver
|
||||
(requires Fiona >= 1.9 or pyogrio >= 0.6).
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
In case of the "fiona" engine, the keyword arguments are passed to
|
||||
fiona.open`. For more information on possible keywords, type:
|
||||
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
|
||||
the keyword arguments are passed to `pyogrio.write_dataframe`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The format drivers will attempt to detect the encoding of your data, but
|
||||
may fail. In this case, the proper encoding can be specified explicitly
|
||||
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
|
||||
"""
|
||||
engine = _check_engine(engine, "'to_file' method")
|
||||
|
||||
filename = _expand_user(filename)
|
||||
|
||||
if index is None:
|
||||
# Determine if index attribute(s) should be saved to file
|
||||
# (only if they are named or are non-integer)
|
||||
index = list(df.index.names) != [None] or not is_integer_dtype(df.index.dtype)
|
||||
if index:
|
||||
df = df.reset_index(drop=False)
|
||||
|
||||
if driver is None:
|
||||
driver = _detect_driver(filename)
|
||||
|
||||
if driver == "ESRI Shapefile" and any(len(c) > 10 for c in df.columns.tolist()):
|
||||
warnings.warn(
|
||||
"Column names longer than 10 characters will be truncated when saved to "
|
||||
"ESRI Shapefile.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
if (df.dtypes == "geometry").sum() > 1:
|
||||
raise ValueError(
|
||||
"GeoDataFrame contains multiple geometry columns but GeoDataFrame.to_file "
|
||||
"supports only a single geometry column. Use a GeoDataFrame.to_parquet or "
|
||||
"GeoDataFrame.to_feather, drop additional geometry columns or convert them "
|
||||
"to a supported format like a well-known text (WKT) using "
|
||||
"`GeoSeries.to_wkt()`.",
|
||||
)
|
||||
_check_metadata_supported(metadata, engine, driver)
|
||||
|
||||
if mode not in ("w", "a"):
|
||||
raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
|
||||
|
||||
if engine == "pyogrio":
|
||||
_to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
|
||||
elif engine == "fiona":
|
||||
_to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
|
||||
if not HAS_PYPROJ and crs:
|
||||
raise ImportError(
|
||||
"The 'pyproj' package is required to write a file with a CRS, but it is not"
|
||||
" installed or does not import correctly."
|
||||
)
|
||||
|
||||
if schema is None:
|
||||
schema = infer_schema(df)
|
||||
|
||||
if crs:
|
||||
from pyproj import CRS
|
||||
|
||||
crs = CRS.from_user_input(crs)
|
||||
else:
|
||||
crs = df.crs
|
||||
|
||||
with fiona_env():
|
||||
crs_wkt = None
|
||||
try:
|
||||
gdal_version = Version(
|
||||
fiona.env.get_gdal_release_name().strip("e")
|
||||
) # GH3147
|
||||
except (AttributeError, ValueError):
|
||||
gdal_version = Version("2.0.0") # just assume it is not the latest
|
||||
if gdal_version >= Version("3.0.0") and crs:
|
||||
crs_wkt = crs.to_wkt()
|
||||
elif crs:
|
||||
crs_wkt = crs.to_wkt("WKT1_GDAL")
|
||||
with fiona.open(
|
||||
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
|
||||
) as colxn:
|
||||
if metadata is not None:
|
||||
colxn.update_tags(metadata)
|
||||
colxn.writerecords(df.iterfeatures())
|
||||
|
||||
|
||||
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if schema is not None:
|
||||
raise ValueError(
|
||||
"The 'schema' argument is not supported with the 'pyogrio' engine."
|
||||
)
|
||||
|
||||
if mode == "a":
|
||||
kwargs["append"] = True
|
||||
|
||||
if crs is not None:
|
||||
raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")
|
||||
|
||||
# for the fiona engine, this check is done in gdf.iterfeatures()
|
||||
if not df.columns.is_unique:
|
||||
raise ValueError("GeoDataFrame cannot contain duplicated column names.")
|
||||
|
||||
pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)
|
||||
|
||||
|
||||
def infer_schema(df):
|
||||
from collections import OrderedDict
|
||||
|
||||
# TODO: test pandas string type and boolean type once released
|
||||
types = {
|
||||
"Int32": "int32",
|
||||
"int32": "int32",
|
||||
"Int64": "int",
|
||||
"string": "str",
|
||||
"boolean": "bool",
|
||||
}
|
||||
|
||||
def convert_type(column, in_type):
|
||||
if in_type == object:
|
||||
return "str"
|
||||
if in_type.name.startswith("datetime64"):
|
||||
# numpy datetime type regardless of frequency
|
||||
return "datetime"
|
||||
if str(in_type) in types:
|
||||
out_type = types[str(in_type)]
|
||||
else:
|
||||
out_type = type(np.zeros(1, in_type).item()).__name__
|
||||
if out_type == "long":
|
||||
out_type = "int"
|
||||
return out_type
|
||||
|
||||
properties = OrderedDict(
|
||||
[
|
||||
(col, convert_type(col, _type))
|
||||
for col, _type in zip(df.columns, df.dtypes)
|
||||
if col != df._geometry_column_name
|
||||
]
|
||||
)
|
||||
|
||||
if df.empty:
|
||||
warnings.warn(
|
||||
"You are attempting to write an empty DataFrame to file. "
|
||||
"For some drivers, this operation may fail.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
# Since https://github.com/Toblerity/Fiona/issues/446 resolution,
|
||||
# Fiona allows a list of geometry types
|
||||
geom_types = _geometry_types(df)
|
||||
|
||||
schema = {"geometry": geom_types, "properties": properties}
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
def _geometry_types(df):
|
||||
"""
|
||||
Determine the geometry types in the GeoDataFrame for the schema.
|
||||
"""
|
||||
geom_types_2D = df[~df.geometry.has_z].geometry.geom_type.unique()
|
||||
geom_types_2D = [gtype for gtype in geom_types_2D if gtype is not None]
|
||||
geom_types_3D = df[df.geometry.has_z].geometry.geom_type.unique()
|
||||
geom_types_3D = ["3D " + gtype for gtype in geom_types_3D if gtype is not None]
|
||||
geom_types = geom_types_3D + geom_types_2D
|
||||
|
||||
if len(geom_types) == 0:
|
||||
# Default geometry type supported by Fiona
|
||||
# (Since https://github.com/Toblerity/Fiona/issues/446 resolution)
|
||||
return "Unknown"
|
||||
|
||||
if len(geom_types) == 1:
|
||||
geom_types = geom_types[0]
|
||||
|
||||
return geom_types
|
||||
|
||||
|
||||
def _list_layers(filename) -> pd.DataFrame:
|
||||
"""List layers available in a file.
|
||||
|
||||
Provides an overview of layers available in a file or URL together with their
|
||||
geometry types. When supported by the data source, this includes both spatial and
|
||||
non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
|
||||
column being ``None``. GeoPandas will not read such layers but they can be read into
|
||||
a pd.DataFrame using :func:`pyogrio.read_dataframe`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
Either the absolute or relative path to the file or URL to
|
||||
be opened, or any object with a read() method (such as an open file
|
||||
or StringIO)
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.DataFrame
|
||||
A DataFrame with columns "name" and "geometry_type" and one row per layer.
|
||||
"""
|
||||
_import_pyogrio()
|
||||
_check_pyogrio("list_layers")
|
||||
|
||||
import pyogrio
|
||||
|
||||
return pd.DataFrame(
|
||||
pyogrio.list_layers(filename), columns=["name", "geometry_type"]
|
||||
)
|
||||
473
.venv/lib/python3.12/site-packages/geopandas/io/sql.py
Normal file
473
.venv/lib/python3.12/site-packages/geopandas/io/sql.py
Normal file
@@ -0,0 +1,473 @@
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from functools import lru_cache
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
import shapely.wkb
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _get_conn(conn_or_engine):
|
||||
"""
|
||||
Yield a connection within a transaction context.
|
||||
|
||||
Engine.begin() returns a Connection with an implicit Transaction while
|
||||
Connection.begin() returns the Transaction. This helper will always return a
|
||||
Connection with an implicit (possibly nested) Transaction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
conn_or_engine : Connection or Engine
|
||||
A sqlalchemy Connection or Engine instance
|
||||
Returns
|
||||
-------
|
||||
Connection
|
||||
"""
|
||||
from sqlalchemy.engine.base import Connection, Engine
|
||||
|
||||
if isinstance(conn_or_engine, Connection):
|
||||
if not conn_or_engine.in_transaction():
|
||||
with conn_or_engine.begin():
|
||||
yield conn_or_engine
|
||||
else:
|
||||
yield conn_or_engine
|
||||
elif isinstance(conn_or_engine, Engine):
|
||||
with conn_or_engine.begin() as conn:
|
||||
yield conn
|
||||
else:
|
||||
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
|
||||
|
||||
|
||||
def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
|
||||
"""
|
||||
Transforms a pandas DataFrame into a GeoDataFrame.
|
||||
The column 'geom_col' must be a geometry column in WKB representation.
|
||||
To be used to convert df based on pd.read_sql to gdf.
|
||||
Parameters
|
||||
----------
|
||||
df : DataFrame
|
||||
pandas DataFrame with geometry column in WKB representation.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : pyproj.CRS, optional
|
||||
CRS to use for the returned GeoDataFrame. The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
If not set, tries to determine CRS from the SRID associated with the
|
||||
first geometry in the database, and assigns that to all geometries.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
"""
|
||||
|
||||
if geom_col not in df:
|
||||
raise ValueError("Query missing geometry column '{}'".format(geom_col))
|
||||
|
||||
if df.columns.to_list().count(geom_col) > 1:
|
||||
raise ValueError(
|
||||
f"Duplicate geometry column '{geom_col}' detected in SQL query output. Only"
|
||||
"one geometry column is allowed."
|
||||
)
|
||||
|
||||
geoms = df[geom_col].dropna()
|
||||
|
||||
if not geoms.empty:
|
||||
load_geom_bytes = shapely.wkb.loads
|
||||
"""Load from Python 3 binary."""
|
||||
|
||||
def load_geom_text(x):
|
||||
"""Load from binary encoded as text."""
|
||||
return shapely.wkb.loads(str(x), hex=True)
|
||||
|
||||
if isinstance(geoms.iat[0], bytes):
|
||||
load_geom = load_geom_bytes
|
||||
else:
|
||||
load_geom = load_geom_text
|
||||
|
||||
df[geom_col] = geoms = geoms.apply(load_geom)
|
||||
if crs is None:
|
||||
srid = shapely.get_srid(geoms.iat[0])
|
||||
# if no defined SRID in geodatabase, returns SRID of 0
|
||||
if srid != 0:
|
||||
try:
|
||||
spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
|
||||
except pd.errors.DatabaseError:
|
||||
warning_msg = (
|
||||
f"Could not find the spatial reference system table "
|
||||
f"(spatial_ref_sys) in PostGIS."
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
else:
|
||||
if not spatial_ref_sys_df.empty:
|
||||
auth_name = spatial_ref_sys_df["auth_name"].item()
|
||||
crs = f"{auth_name}:{srid}"
|
||||
else:
|
||||
warning_msg = (
|
||||
f"Could not find srid {srid} in the "
|
||||
f"spatial_ref_sys table. "
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
|
||||
return GeoDataFrame(df, crs=crs, geometry=geom_col)
|
||||
|
||||
|
||||
def _read_postgis(
|
||||
sql,
|
||||
con,
|
||||
geom_col="geom",
|
||||
crs=None,
|
||||
index_col=None,
|
||||
coerce_float=True,
|
||||
parse_dates=None,
|
||||
params=None,
|
||||
chunksize=None,
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame corresponding to the result of the query
|
||||
string, which must contain a geometry column in WKB representation.
|
||||
|
||||
It is also possible to use :meth:`~GeoDataFrame.read_file` to read from a database.
|
||||
Especially for file geodatabases like GeoPackage or SpatiaLite this can be easier.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sql : string
|
||||
SQL query to execute in selecting entries from database, or name
|
||||
of the table to read from the database.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : dict or str, optional
|
||||
CRS to use for the returned GeoDataFrame; if not set, tries to
|
||||
determine CRS from the SRID associated with the first geometry in
|
||||
the database, and assigns that to all geometries.
|
||||
chunksize : int, default None
|
||||
If specified, return an iterator where chunksize is the number of rows to
|
||||
include in each chunk.
|
||||
|
||||
See the documentation for pandas.read_sql for further explanation
|
||||
of the following parameters:
|
||||
index_col, coerce_float, parse_dates, params, chunksize
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
PostGIS
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydatabase"
|
||||
>>> con = create_engine(db_connection_url) # doctest: +SKIP
|
||||
>>> sql = "SELECT geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
|
||||
SpatiaLite
|
||||
|
||||
>>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if chunksize is None:
|
||||
# read all in one chunk and return a single GeoDataFrame
|
||||
df = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
|
||||
|
||||
else:
|
||||
# read data in chunks and return a generator
|
||||
df_generator = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return (
|
||||
_df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
|
||||
)
|
||||
|
||||
|
||||
def _get_geometry_type(gdf):
|
||||
"""
|
||||
Get basic geometry type of a GeoDataFrame. See more info from:
|
||||
https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType
|
||||
|
||||
Following rules apply:
|
||||
- if geometries all share the same geometry-type,
|
||||
geometries are inserted with the given GeometryType with following types:
|
||||
- Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
|
||||
GeometryCollection.
|
||||
- LinearRing geometries will be converted into LineString -objects.
|
||||
- in all other cases, geometries will be inserted with type GEOMETRY:
|
||||
- a mix of Polygons and MultiPolygons in GeoSeries
|
||||
- a mix of Points and LineStrings in GeoSeries
|
||||
- geometry is of type GeometryCollection,
|
||||
such as GeometryCollection([Point, LineStrings])
|
||||
- if any of the geometries has Z-coordinate, all records will
|
||||
be written with 3D.
|
||||
"""
|
||||
geom_types = list(gdf.geometry.geom_type.unique())
|
||||
has_curve = False
|
||||
|
||||
for gt in geom_types:
|
||||
if gt is None:
|
||||
continue
|
||||
elif "LinearRing" in gt:
|
||||
has_curve = True
|
||||
|
||||
if len(geom_types) == 1:
|
||||
if has_curve:
|
||||
target_geom_type = "LINESTRING"
|
||||
else:
|
||||
if geom_types[0] is None:
|
||||
raise ValueError("No valid geometries in the data.")
|
||||
else:
|
||||
target_geom_type = geom_types[0].upper()
|
||||
else:
|
||||
target_geom_type = "GEOMETRY"
|
||||
|
||||
# Check for 3D-coordinates
|
||||
if any(gdf.geometry.has_z):
|
||||
target_geom_type += "Z"
|
||||
|
||||
return target_geom_type, has_curve
|
||||
|
||||
|
||||
def _get_srid_from_crs(gdf):
|
||||
"""
|
||||
Get EPSG code from CRS if available. If not, return 0.
|
||||
"""
|
||||
|
||||
# Use geoalchemy2 default for srid
|
||||
# Note: undefined srid in PostGIS is 0
|
||||
srid = None
|
||||
warning_msg = (
|
||||
"Could not parse CRS from the GeoDataFrame. "
|
||||
"Inserting data without defined CRS."
|
||||
)
|
||||
if gdf.crs is not None:
|
||||
try:
|
||||
for confidence in (100, 70, 25):
|
||||
srid = gdf.crs.to_epsg(min_confidence=confidence)
|
||||
if srid is not None:
|
||||
break
|
||||
auth_srid = gdf.crs.to_authority(
|
||||
auth_name="ESRI", min_confidence=confidence
|
||||
)
|
||||
if auth_srid is not None:
|
||||
srid = int(auth_srid[1])
|
||||
break
|
||||
except Exception:
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
if srid is None:
|
||||
srid = 0
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
return srid
|
||||
|
||||
|
||||
def _convert_linearring_to_linestring(gdf, geom_name):
|
||||
from shapely.geometry import LineString
|
||||
|
||||
# Todo: Use shapely function once it's implemented:
|
||||
# https://github.com/shapely/shapely/issues/1617
|
||||
|
||||
mask = gdf.geom_type == "LinearRing"
|
||||
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
|
||||
lambda geom: LineString(geom)
|
||||
)
|
||||
return gdf
|
||||
|
||||
|
||||
def _convert_to_ewkb(gdf, geom_name, srid):
|
||||
"""Convert geometries to ewkb."""
|
||||
geoms = shapely.to_wkb(
|
||||
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
# The gdf will warn that the geometry column doesn't hold in-memory geometries
|
||||
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
|
||||
# the user that the dtypes are unexpected.
|
||||
df = pd.DataFrame(gdf, copy=False)
|
||||
df[geom_name] = geoms
|
||||
return df
|
||||
|
||||
|
||||
def _psql_insert_copy(tbl, conn, keys, data_iter):
|
||||
import csv
|
||||
import io
|
||||
|
||||
s_buf = io.StringIO()
|
||||
writer = csv.writer(s_buf)
|
||||
writer.writerows(data_iter)
|
||||
s_buf.seek(0)
|
||||
|
||||
columns = ", ".join('"{}"'.format(k) for k in keys)
|
||||
|
||||
dbapi_conn = conn.connection
|
||||
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
|
||||
tbl.table.schema, tbl.table.name, columns
|
||||
)
|
||||
with dbapi_conn.cursor() as cur:
|
||||
# Use psycopg method if it's available
|
||||
if hasattr(cur, "copy") and callable(cur.copy):
|
||||
with cur.copy(sql) as copy:
|
||||
copy.write(s_buf.read())
|
||||
else: # otherwise use psycopg2 method
|
||||
cur.copy_expert(sql, s_buf)
|
||||
|
||||
|
||||
def _write_postgis(
|
||||
gdf,
|
||||
name,
|
||||
con,
|
||||
schema=None,
|
||||
if_exists="fail",
|
||||
index=False,
|
||||
index_label=None,
|
||||
chunksize=None,
|
||||
dtype=None,
|
||||
):
|
||||
"""
|
||||
Upload GeoDataFrame into PostGIS database.
|
||||
|
||||
This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
|
||||
Python driver (e.g. psycopg2) to be installed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of the target table.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the PostGIS database.
|
||||
if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
||||
How to behave if the table already exists:
|
||||
|
||||
- fail: Raise a ValueError.
|
||||
- replace: Drop the table before inserting new values.
|
||||
- append: Insert new values to the existing table.
|
||||
schema : string, optional
|
||||
Specify the schema. If None, use default schema: 'public'.
|
||||
index : bool, default True
|
||||
Write DataFrame index as a column.
|
||||
Uses *index_label* as the column name in the table.
|
||||
index_label : string or sequence, default None
|
||||
Column label for index column(s).
|
||||
If None is given (default) and index is True,
|
||||
then the index names are used.
|
||||
chunksize : int, optional
|
||||
Rows will be written in batches of this size at a time.
|
||||
By default, all rows will be written at once.
|
||||
dtype : dict of column name to SQL type, default None
|
||||
Specifying the datatype for columns.
|
||||
The keys should be the column names and the values
|
||||
should be the SQLAlchemy types.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
|
||||
/mydatabase";) # doctest: +SKIP
|
||||
>>> gdf.to_postgis("my_table", engine) # doctest: +SKIP
|
||||
"""
|
||||
try:
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
raise ImportError("'to_postgis()' requires geoalchemy2 package.")
|
||||
|
||||
gdf = gdf.copy()
|
||||
geom_name = gdf.geometry.name
|
||||
|
||||
# Get srid
|
||||
srid = _get_srid_from_crs(gdf)
|
||||
|
||||
# Get geometry type and info whether data contains LinearRing.
|
||||
geometry_type, has_curve = _get_geometry_type(gdf)
|
||||
|
||||
# Build dtype with Geometry
|
||||
if dtype is not None:
|
||||
dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid)
|
||||
else:
|
||||
dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)}
|
||||
|
||||
# Convert LinearRing geometries to LineString
|
||||
if has_curve:
|
||||
gdf = _convert_linearring_to_linestring(gdf, geom_name)
|
||||
|
||||
# Convert geometries to EWKB
|
||||
gdf = _convert_to_ewkb(gdf, geom_name, srid)
|
||||
|
||||
if schema is not None:
|
||||
schema_name = schema
|
||||
else:
|
||||
schema_name = "public"
|
||||
|
||||
if if_exists == "append":
|
||||
# Check that the geometry srid matches with the current GeoDataFrame
|
||||
with _get_conn(con) as connection:
|
||||
# Only check SRID if table exists
|
||||
if connection.dialect.has_table(connection, name, schema):
|
||||
target_srid = connection.execute(
|
||||
text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema=schema_name, table=name, geom_col=geom_name
|
||||
)
|
||||
)
|
||||
).fetchone()[0]
|
||||
|
||||
if target_srid != srid:
|
||||
msg = (
|
||||
"The CRS of the target table (EPSG:{epsg_t}) differs from the "
|
||||
"CRS of current GeoDataFrame (EPSG:{epsg_src}).".format(
|
||||
epsg_t=target_srid, epsg_src=srid
|
||||
)
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
with _get_conn(con) as connection:
|
||||
gdf.to_sql(
|
||||
name,
|
||||
connection,
|
||||
schema=schema_name,
|
||||
if_exists=if_exists,
|
||||
index=index,
|
||||
index_label=index_label,
|
||||
chunksize=chunksize,
|
||||
dtype=dtype,
|
||||
method=_psql_insert_copy,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _get_spatial_ref_sys_df(con, srid):
|
||||
spatial_ref_sys_sql = (
|
||||
f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
|
||||
)
|
||||
return pd.read_sql(spatial_ref_sys_sql, con)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Script to create the data and write legacy storage (pickle) files.
|
||||
|
||||
Based on pandas' generate_legacy_storage_files.py script.
|
||||
|
||||
To use this script, create an environment for which you want to
|
||||
generate pickles, activate the environment, and run this script as:
|
||||
|
||||
$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
|
||||
geopandas/geopandas/io/tests/data/pickle/ pickle
|
||||
|
||||
This script generates a storage file for the current arch, system,
|
||||
|
||||
The idea here is you are using the *current* version of the
|
||||
generate_legacy_storage_files with an *older* version of geopandas to
|
||||
generate a pickle file. We will then check this file into a current
|
||||
branch, and test using test_pickle.py. This will load the *older*
|
||||
pickles and test versus the current data that is generated
|
||||
(with master). These are then compared.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import platform
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
import geopandas
|
||||
|
||||
|
||||
def create_pickle_data():
|
||||
"""create the pickle data"""
|
||||
|
||||
# custom geometry column name
|
||||
gdf_the_geom = geopandas.GeoDataFrame(
|
||||
{"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
geometry="the_geom",
|
||||
)
|
||||
|
||||
# with crs
|
||||
gdf_crs = geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
|
||||
|
||||
|
||||
def platform_name():
|
||||
return "_".join(
|
||||
[
|
||||
str(geopandas.__version__),
|
||||
"pd-" + str(pd.__version__),
|
||||
"py-" + str(platform.python_version()),
|
||||
str(platform.machine()),
|
||||
str(platform.system().lower()),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_legacy_pickles(output_dir):
|
||||
print(
|
||||
"This script generates a storage file for the current arch, system, "
|
||||
"and python version"
|
||||
)
|
||||
print("geopandas version: {}").format(geopandas.__version__)
|
||||
print(" output dir : {}".format(output_dir))
|
||||
print(" storage format: pickle")
|
||||
|
||||
pth = "{}.pickle".format(platform_name())
|
||||
|
||||
fh = open(os.path.join(output_dir, pth), "wb")
|
||||
pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
|
||||
fh.close()
|
||||
|
||||
print("created pickle file: {}".format(pth))
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
sys.exit(
|
||||
"Specify output directory and storage type: generate_legacy_"
|
||||
"storage_files.py <output_dir> <storage_type> "
|
||||
)
|
||||
|
||||
output_dir = str(sys.argv[1])
|
||||
storage_type = str(sys.argv[2])
|
||||
|
||||
if storage_type == "pickle":
|
||||
write_legacy_pickles(output_dir=output_dir)
|
||||
else:
|
||||
sys.exit("storage_type must be one of {'pickle'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1332
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_arrow.py
Normal file
1332
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_arrow.py
Normal file
File diff suppressed because it is too large
Load Diff
1438
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
Normal file
1438
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,328 @@
|
||||
import os
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from .test_file import FIONA_MARK, PYOGRIO_MARK
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST TOOLING
|
||||
|
||||
|
||||
class _ExpectedError:
|
||||
def __init__(self, error_type, error_message_match):
|
||||
self.type = error_type
|
||||
self.match = error_message_match
|
||||
|
||||
|
||||
class _ExpectedErrorBuilder:
|
||||
def __init__(self, composite_key):
|
||||
self.composite_key = composite_key
|
||||
|
||||
def to_raise(self, error_type, error_match):
|
||||
_expected_exceptions[self.composite_key] = _ExpectedError(
|
||||
error_type, error_match
|
||||
)
|
||||
|
||||
|
||||
def _expect_writing(gdf, ogr_driver):
|
||||
return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
|
||||
|
||||
|
||||
def _composite_key(gdf, ogr_driver):
|
||||
return frozenset([id(gdf), ogr_driver])
|
||||
|
||||
|
||||
def _expected_error_on(gdf, ogr_driver):
|
||||
composite_key = _composite_key(gdf, ogr_driver)
|
||||
return _expected_exceptions.get(composite_key, None)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST CASES
|
||||
_geodataframes_to_write = []
|
||||
_expected_exceptions = {}
|
||||
_CRS = "epsg:4326"
|
||||
|
||||
# ------------------
|
||||
# gdf with Points
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPoint([city_hall_balcony, city_hall_council_chamber]),
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Points and MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
|
||||
# Polygon/MultiPolygon but does not mention Point/MultiPoint
|
||||
# see https://www.gdal.org/drv_shapefile.html
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings and MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygons
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygon and MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and 3D Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometries only
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with all shape types mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with all 2D shape types and 3D Point mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6, 7]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
|
||||
@pytest.fixture(params=_geodataframes_to_write)
|
||||
def geodataframe(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
("GeoJSON", ".geojson"),
|
||||
("ESRI Shapefile", ".shp"),
|
||||
("GPKG", ".gpkg"),
|
||||
("SQLite", ".sqlite"),
|
||||
]
|
||||
)
|
||||
def ogr_driver(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("fiona", marks=FIONA_MARK),
|
||||
pytest.param("pyogrio", marks=PYOGRIO_MARK),
|
||||
]
|
||||
)
|
||||
def engine(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
|
||||
driver, ext = ogr_driver
|
||||
output_file = os.path.join(str(tmpdir), "output_file" + ext)
|
||||
write_kwargs = {}
|
||||
if driver == "SQLite":
|
||||
write_kwargs["spatialite"] = True
|
||||
|
||||
# This if statement can be removed once minimal fiona version >= 1.8.20
|
||||
if engine == "fiona":
|
||||
from packaging.version import Version
|
||||
|
||||
import fiona
|
||||
|
||||
if Version(fiona.__version__) < Version("1.8.20"):
|
||||
pytest.skip("SQLite driver only available from version 1.8.20")
|
||||
|
||||
# If only 3D Points, geometry_type needs to be specified for spatialite at the
|
||||
# moment. This if can be removed once the following PR is released:
|
||||
# https://github.com/geopandas/pyogrio/pull/223
|
||||
if (
|
||||
engine == "pyogrio"
|
||||
and len(geodataframe == 2)
|
||||
and geodataframe.geometry[0] is None
|
||||
and geodataframe.geometry[1] is not None
|
||||
and geodataframe.geometry[1].has_z
|
||||
):
|
||||
write_kwargs["geometry_type"] = "Point Z"
|
||||
|
||||
expected_error = _expected_error_on(geodataframe, driver)
|
||||
if expected_error:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Failed to write record|Could not add feature to layer"
|
||||
):
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
else:
|
||||
if driver == "SQLite" and engine == "pyogrio":
|
||||
try:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
except ValueError as e:
|
||||
if "unrecognized option 'SPATIALITE'" in str(e):
|
||||
pytest.xfail(
|
||||
"pyogrio wheels from PyPI do not come with SpatiaLite support. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
|
||||
reloaded = geopandas.read_file(output_file, engine=engine)
|
||||
|
||||
if driver == "GeoJSON" and engine == "pyogrio":
|
||||
# For GeoJSON files, the int64 column comes back as int32
|
||||
reloaded["a"] = reloaded["a"].astype("int64")
|
||||
|
||||
assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
|
||||
@@ -0,0 +1,537 @@
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
|
||||
import shapely
|
||||
from shapely import MultiPoint, Point, box
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
|
||||
pytest.importorskip("pyarrow")
|
||||
import pyarrow as pa
|
||||
import pyarrow.compute as pc
|
||||
from pyarrow import feather
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
def pa_table(table):
|
||||
if Version(pa.__version__) < Version("14.0.0"):
|
||||
return table._pa_table
|
||||
else:
|
||||
return pa.table(table)
|
||||
|
||||
|
||||
def pa_array(array):
|
||||
if Version(pa.__version__) < Version("14.0.0"):
|
||||
return array._pa_array
|
||||
else:
|
||||
return pa.array(array)
|
||||
|
||||
|
||||
def assert_table_equal(left, right, check_metadata=True):
|
||||
geom_type = left["geometry"].type
|
||||
# in case of Points (directly the inner fixed_size_list or struct type)
|
||||
# -> there are NaNs for empties -> we need to compare them separately
|
||||
# and then fill, because pyarrow.Table.equals considers NaNs as not equal
|
||||
if pa.types.is_fixed_size_list(geom_type):
|
||||
left_values = left["geometry"].chunk(0).values
|
||||
right_values = right["geometry"].chunk(0).values
|
||||
assert pc.is_nan(left_values).equals(pc.is_nan(right_values))
|
||||
left_geoms = pa.FixedSizeListArray.from_arrays(
|
||||
pc.replace_with_mask(left_values, pc.is_nan(left_values), 0.0),
|
||||
type=left["geometry"].type,
|
||||
)
|
||||
right_geoms = pa.FixedSizeListArray.from_arrays(
|
||||
pc.replace_with_mask(right_values, pc.is_nan(right_values), 0.0),
|
||||
type=right["geometry"].type,
|
||||
)
|
||||
left = left.set_column(1, left.schema.field("geometry"), left_geoms)
|
||||
right = right.set_column(1, right.schema.field("geometry"), right_geoms)
|
||||
|
||||
elif pa.types.is_struct(geom_type):
|
||||
left_arr = left["geometry"].chunk(0)
|
||||
right_arr = right["geometry"].chunk(0)
|
||||
|
||||
for i in range(left_arr.type.num_fields):
|
||||
assert pc.is_nan(left_arr.field(i)).equals(pc.is_nan(right_arr.field(i)))
|
||||
|
||||
left_geoms = pa.StructArray.from_arrays(
|
||||
[
|
||||
pc.replace_with_mask(
|
||||
left_arr.field(i), pc.is_nan(left_arr.field(i)), 0.0
|
||||
)
|
||||
for i in range(left_arr.type.num_fields)
|
||||
],
|
||||
fields=list(left["geometry"].type),
|
||||
)
|
||||
right_geoms = pa.StructArray.from_arrays(
|
||||
[
|
||||
pc.replace_with_mask(
|
||||
right_arr.field(i), pc.is_nan(right_arr.field(i)), 0.0
|
||||
)
|
||||
for i in range(right_arr.type.num_fields)
|
||||
],
|
||||
fields=list(right["geometry"].type),
|
||||
)
|
||||
|
||||
left = left.set_column(1, left.schema.field("geometry"), left_geoms)
|
||||
right = right.set_column(1, right.schema.field("geometry"), right_geoms)
|
||||
|
||||
if left.equals(right, check_metadata=check_metadata):
|
||||
return
|
||||
|
||||
if not left.schema.equals(right.schema):
|
||||
raise AssertionError(
|
||||
"Schema not equal\nLeft:\n{0}\nRight:\n{1}".format(
|
||||
left.schema, right.schema
|
||||
)
|
||||
)
|
||||
|
||||
if check_metadata:
|
||||
if not left.schema.equals(right.schema, check_metadata=True):
|
||||
if not left.schema.metadata == right.schema.metadata:
|
||||
raise AssertionError(
|
||||
"Metadata not equal\nLeft:\n{0}\nRight:\n{1}".format(
|
||||
left.schema.metadata, right.schema.metadata
|
||||
)
|
||||
)
|
||||
for col in left.schema.names:
|
||||
assert left.schema.field(col).equals(
|
||||
right.schema.field(col), check_metadata=True
|
||||
)
|
||||
|
||||
for col in left.column_names:
|
||||
a_left = pa.concat_arrays(left.column(col).chunks)
|
||||
a_right = pa.concat_arrays(right.column(col).chunks)
|
||||
if not a_left.equals(a_right):
|
||||
raise AssertionError(
|
||||
"Column '{0}' not equal:\n{1}".format(col, a_left.diff(a_right))
|
||||
)
|
||||
|
||||
raise AssertionError("Tables not equal for unknown reason")
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
shapely.geos_version < (3, 9, 0),
|
||||
reason="Checking for empty is buggy with GEOS<3.9",
|
||||
) # an old GEOS is installed in the CI builds with the defaults channel
|
||||
@pytest.mark.parametrize(
|
||||
"dim",
|
||||
[
|
||||
"xy",
|
||||
pytest.param(
|
||||
"xyz",
|
||||
marks=pytest.mark.skipif(
|
||||
shapely.geos_version < (3, 10, 0),
|
||||
reason="Cannot write 3D geometries with GEOS<3.10",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_encoding, interleaved",
|
||||
[("WKB", None), ("geoarrow", True), ("geoarrow", False)],
|
||||
ids=["WKB", "geoarrow-interleaved", "geoarrow-separated"],
|
||||
)
|
||||
def test_geoarrow_export(geometry_type, dim, geometry_encoding, interleaved):
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df["row_number"] = df["row_number"].astype("int32")
|
||||
df = GeoDataFrame(df)
|
||||
df.geometry.array.crs = None
|
||||
|
||||
# Read the expected data
|
||||
if geometry_encoding == "WKB":
|
||||
filename = f"example-{suffix}-wkb.arrow"
|
||||
else:
|
||||
filename = f"example-{suffix}{'-interleaved' if interleaved else ''}.arrow"
|
||||
expected = feather.read_table(base_path / filename)
|
||||
|
||||
# GeoDataFrame -> Arrow Table
|
||||
result = pa_table(
|
||||
df.to_arrow(geometry_encoding=geometry_encoding, interleaved=interleaved)
|
||||
)
|
||||
# remove the "pandas" metadata
|
||||
result = result.replace_schema_metadata(None)
|
||||
|
||||
mask_nonempty = None
|
||||
if (
|
||||
geometry_encoding == "WKB"
|
||||
and dim == "xyz"
|
||||
and geometry_type.startswith("multi")
|
||||
):
|
||||
# for collections with z dimension, drop the empties because those don't
|
||||
# roundtrip correctly to WKB
|
||||
# (https://github.com/libgeos/geos/issues/888)
|
||||
mask_nonempty = pa.array(np.asarray(~df.geometry.is_empty))
|
||||
result = result.filter(mask_nonempty)
|
||||
expected = expected.filter(mask_nonempty)
|
||||
|
||||
assert_table_equal(result, expected)
|
||||
|
||||
# GeoSeries -> Arrow array
|
||||
if geometry_encoding != "WKB" and geometry_type == "point":
|
||||
# for points, we again have to handle NaNs separately, we already did that
|
||||
# for table so let's just skip this part
|
||||
return
|
||||
result_arr = pa_array(
|
||||
df.geometry.to_arrow(
|
||||
geometry_encoding=geometry_encoding, interleaved=interleaved
|
||||
)
|
||||
)
|
||||
if mask_nonempty is not None:
|
||||
result_arr = result_arr.filter(mask_nonempty)
|
||||
assert result_arr.equals(expected["geometry"].chunk(0))
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(shapely.__version__) < Version("2.0.2"),
|
||||
reason="from_ragged_array failing with read-only array input",
|
||||
)
|
||||
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
|
||||
def test_geoarrow_multiple_geometry_crs(encoding):
|
||||
pytest.importorskip("pyproj")
|
||||
# ensure each geometry column has its own crs
|
||||
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
gdf["geom2"] = gdf.geometry.to_crs("epsg:3857")
|
||||
|
||||
result = pa_table(gdf.to_arrow(geometry_encoding=encoding))
|
||||
meta1 = json.loads(
|
||||
result.schema.field("geometry").metadata[b"ARROW:extension:metadata"]
|
||||
)
|
||||
assert json.loads(meta1["crs"])["id"]["code"] == 4326
|
||||
meta2 = json.loads(
|
||||
result.schema.field("geom2").metadata[b"ARROW:extension:metadata"]
|
||||
)
|
||||
assert json.loads(meta2["crs"])["id"]["code"] == 3857
|
||||
|
||||
roundtripped = GeoDataFrame.from_arrow(result)
|
||||
assert_geodataframe_equal(gdf, roundtripped)
|
||||
assert gdf.geometry.crs == "epsg:4326"
|
||||
assert gdf.geom2.crs == "epsg:3857"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
|
||||
def test_geoarrow_series_name_crs(encoding):
|
||||
pytest.importorskip("pyproj")
|
||||
pytest.importorskip("pyarrow", minversion="14.0.0")
|
||||
|
||||
gser = GeoSeries([box(0, 0, 10, 10)], crs="epsg:4326", name="geom")
|
||||
schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
|
||||
field = pa.Field._import_from_c_capsule(schema_capsule)
|
||||
assert field.name == "geom"
|
||||
assert (
|
||||
field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
|
||||
if encoding == "WKB"
|
||||
else b"geoarrow.polygon"
|
||||
)
|
||||
meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
|
||||
assert json.loads(meta["crs"])["id"]["code"] == 4326
|
||||
|
||||
# ensure it also works without a name
|
||||
gser = GeoSeries([box(0, 0, 10, 10)])
|
||||
schema_capsule, _ = gser.to_arrow(geometry_encoding=encoding).__arrow_c_array__()
|
||||
field = pa.Field._import_from_c_capsule(schema_capsule)
|
||||
assert field.name == ""
|
||||
|
||||
|
||||
def test_geoarrow_unsupported_encoding():
|
||||
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
|
||||
with pytest.raises(ValueError, match="Expected geometry encoding"):
|
||||
gdf.to_arrow(geometry_encoding="invalid")
|
||||
|
||||
with pytest.raises(ValueError, match="Expected geometry encoding"):
|
||||
gdf.geometry.to_arrow(geometry_encoding="invalid")
|
||||
|
||||
|
||||
def test_geoarrow_mixed_geometry_types():
|
||||
gdf = GeoDataFrame(
|
||||
{"geometry": [Point(0, 0), box(0, 0, 10, 10)]},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Geometry type combination is not supported"):
|
||||
gdf.to_arrow(geometry_encoding="geoarrow")
|
||||
|
||||
gdf = GeoDataFrame(
|
||||
{"geometry": [Point(0, 0), MultiPoint([(0, 0), (1, 1)])]},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
result = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert (
|
||||
result.schema.field("geometry").metadata[b"ARROW:extension:name"]
|
||||
== b"geoarrow.multipoint"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("geom_type", ["point", "polygon"])
|
||||
@pytest.mark.parametrize(
|
||||
"encoding, interleaved", [("WKB", True), ("geoarrow", True), ("geoarrow", False)]
|
||||
)
|
||||
def test_geoarrow_missing(encoding, interleaved, geom_type):
|
||||
# dummy test for single geometry type until missing values are included
|
||||
# in the test data for test_geoarrow_export
|
||||
gdf = GeoDataFrame(
|
||||
geometry=[Point(0, 0) if geom_type == "point" else box(0, 0, 10, 10), None],
|
||||
crs="epsg:4326",
|
||||
)
|
||||
if (
|
||||
encoding == "geoarrow"
|
||||
and geom_type == "point"
|
||||
and interleaved
|
||||
and Version(pa.__version__) < Version("15.0.0")
|
||||
):
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Converting point geometries with missing values is not supported",
|
||||
):
|
||||
gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved)
|
||||
return
|
||||
result = pa_table(gdf.to_arrow(geometry_encoding=encoding, interleaved=interleaved))
|
||||
assert result["geometry"].null_count == 1
|
||||
assert result["geometry"].is_null().to_pylist() == [False, True]
|
||||
|
||||
|
||||
def test_geoarrow_include_z():
|
||||
gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1), Point()]})
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert table["geometry"].type.value_field.name == "xy"
|
||||
assert table["geometry"].type.list_size == 2
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=True))
|
||||
assert table["geometry"].type.value_field.name == "xyz"
|
||||
assert table["geometry"].type.list_size == 3
|
||||
assert np.isnan(table["geometry"].chunk(0).values.to_numpy()[2::3]).all()
|
||||
|
||||
gdf = GeoDataFrame({"geometry": [Point(0, 0, 0), Point(1, 1, 1), Point()]})
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert table["geometry"].type.value_field.name == "xyz"
|
||||
assert table["geometry"].type.list_size == 3
|
||||
|
||||
table = pa_table(gdf.to_arrow(geometry_encoding="geoarrow", include_z=False))
|
||||
assert table["geometry"].type.value_field.name == "xy"
|
||||
assert table["geometry"].type.list_size == 2
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def with_geoarrow_extension_types():
|
||||
gp = pytest.importorskip("geoarrow.pyarrow")
|
||||
gp.register_extension_types()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
gp.unregister_extension_types()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dim", ["xy", "xyz"])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
|
||||
)
|
||||
def test_geoarrow_export_with_extension_types(geometry_type, dim):
|
||||
# ensure the exported data can be imported by geoarrow-pyarrow and are
|
||||
# recognized as extension types
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df["row_number"] = df["row_number"].astype("int32")
|
||||
df = GeoDataFrame(df)
|
||||
df.geometry.array.crs = None
|
||||
|
||||
pytest.importorskip("geoarrow.pyarrow")
|
||||
|
||||
with with_geoarrow_extension_types():
|
||||
result1 = pa_table(df.to_arrow(geometry_encoding="WKB"))
|
||||
assert isinstance(result1["geometry"].type, pa.ExtensionType)
|
||||
|
||||
result2 = pa_table(df.to_arrow(geometry_encoding="geoarrow"))
|
||||
assert isinstance(result2["geometry"].type, pa.ExtensionType)
|
||||
|
||||
result3 = pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
|
||||
assert isinstance(result3["geometry"].type, pa.ExtensionType)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(shapely.__version__) < Version("2.0.2"),
|
||||
reason="from_ragged_array failing with read-only array input",
|
||||
)
|
||||
@pytest.mark.parametrize("dim", ["xy", "xyz"])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
[
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
],
|
||||
)
|
||||
def test_geoarrow_import(geometry_type, dim):
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df = GeoDataFrame(df)
|
||||
df.geometry.crs = None
|
||||
|
||||
table1 = feather.read_table(base_path / f"example-{suffix}-wkb.arrow")
|
||||
result1 = GeoDataFrame.from_arrow(table1)
|
||||
assert_geodataframe_equal(result1, df)
|
||||
|
||||
table2 = feather.read_table(base_path / f"example-{suffix}-interleaved.arrow")
|
||||
result2 = GeoDataFrame.from_arrow(table2)
|
||||
assert_geodataframe_equal(result2, df)
|
||||
|
||||
table3 = feather.read_table(base_path / f"example-{suffix}.arrow")
|
||||
result3 = GeoDataFrame.from_arrow(table3)
|
||||
assert_geodataframe_equal(result3, df)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(shapely.__version__) < Version("2.0.2"),
|
||||
reason="from_ragged_array failing with read-only array input",
|
||||
)
|
||||
@pytest.mark.parametrize("encoding", ["WKB", "geoarrow"])
|
||||
def test_geoarrow_import_geometry_column(encoding):
|
||||
pytest.importorskip("pyproj")
|
||||
# ensure each geometry column has its own crs
|
||||
gdf = GeoDataFrame(geometry=[box(0, 0, 10, 10)])
|
||||
gdf["centroid"] = gdf.geometry.centroid
|
||||
|
||||
result = GeoDataFrame.from_arrow(pa_table(gdf.to_arrow(geometry_encoding=encoding)))
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
assert result.active_geometry_name == "geometry"
|
||||
|
||||
result = GeoDataFrame.from_arrow(
|
||||
pa_table(gdf[["centroid"]].to_arrow(geometry_encoding=encoding))
|
||||
)
|
||||
assert result.active_geometry_name == "centroid"
|
||||
|
||||
result = GeoDataFrame.from_arrow(
|
||||
pa_table(gdf.to_arrow(geometry_encoding=encoding)), geometry="centroid"
|
||||
)
|
||||
assert result.active_geometry_name == "centroid"
|
||||
assert_geodataframe_equal(result, gdf.set_geometry("centroid"))
|
||||
|
||||
|
||||
def test_geoarrow_import_missing_geometry():
|
||||
pytest.importorskip("pyarrow", minversion="14.0.0")
|
||||
|
||||
table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
|
||||
with pytest.raises(ValueError, match="No geometry column found"):
|
||||
GeoDataFrame.from_arrow(table)
|
||||
|
||||
with pytest.raises(ValueError, match="No GeoArrow geometry field found"):
|
||||
GeoSeries.from_arrow(table["a"].chunk(0))
|
||||
|
||||
|
||||
def test_geoarrow_import_capsule_interface():
|
||||
# ensure we can import non-pyarrow object
|
||||
pytest.importorskip("pyarrow", minversion="14.0.0")
|
||||
gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
|
||||
|
||||
result = GeoDataFrame.from_arrow(gdf.to_arrow())
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dim", ["xy", "xyz"])
|
||||
@pytest.mark.parametrize(
|
||||
"geometry_type",
|
||||
["point", "linestring", "polygon", "multipoint", "multilinestring", "multipolygon"],
|
||||
)
|
||||
def test_geoarrow_import_from_extension_types(geometry_type, dim):
|
||||
# ensure the exported data can be imported by geoarrow-pyarrow and are
|
||||
# recognized as extension types
|
||||
pytest.importorskip("pyproj")
|
||||
base_path = DATA_PATH / "geoarrow"
|
||||
suffix = geometry_type + ("_z" if dim == "xyz" else "")
|
||||
|
||||
# Read the example data
|
||||
df = feather.read_feather(base_path / f"example-{suffix}-wkb.arrow")
|
||||
df["geometry"] = GeoSeries.from_wkb(df["geometry"])
|
||||
df = GeoDataFrame(df, crs="EPSG:3857")
|
||||
|
||||
pytest.importorskip("geoarrow.pyarrow")
|
||||
|
||||
with with_geoarrow_extension_types():
|
||||
result1 = GeoDataFrame.from_arrow(
|
||||
pa_table(df.to_arrow(geometry_encoding="WKB"))
|
||||
)
|
||||
assert_geodataframe_equal(result1, df)
|
||||
|
||||
result2 = GeoDataFrame.from_arrow(
|
||||
pa_table(df.to_arrow(geometry_encoding="geoarrow"))
|
||||
)
|
||||
assert_geodataframe_equal(result2, df)
|
||||
|
||||
result3 = GeoDataFrame.from_arrow(
|
||||
pa_table(df.to_arrow(geometry_encoding="geoarrow", interleaved=False))
|
||||
)
|
||||
assert_geodataframe_equal(result3, df)
|
||||
|
||||
|
||||
def test_geoarrow_import_geoseries():
|
||||
pytest.importorskip("pyproj")
|
||||
gp = pytest.importorskip("geoarrow.pyarrow")
|
||||
ser = GeoSeries.from_wkt(["POINT (1 1)", "POINT (2 2)"], crs="EPSG:3857")
|
||||
|
||||
with with_geoarrow_extension_types():
|
||||
arr = gp.array(ser.to_arrow(geometry_encoding="WKB"))
|
||||
result = GeoSeries.from_arrow(arr)
|
||||
assert_geoseries_equal(result, ser)
|
||||
|
||||
arr = gp.array(ser.to_arrow(geometry_encoding="geoarrow"))
|
||||
result = GeoSeries.from_arrow(arr)
|
||||
assert_geoseries_equal(result, ser)
|
||||
|
||||
# the name is lost when going through a pyarrow.Array
|
||||
ser.name = "name"
|
||||
arr = gp.array(ser.to_arrow())
|
||||
result = GeoSeries.from_arrow(arr)
|
||||
assert result.name is None
|
||||
# we can specify the name as one of the kwargs
|
||||
result = GeoSeries.from_arrow(arr, name="test")
|
||||
assert_geoseries_equal(result, ser)
|
||||
|
||||
|
||||
def test_geoarrow_import_unknown_geoarrow_type():
|
||||
gdf = GeoDataFrame({"col": [1]}, geometry=[box(0, 0, 10, 10)])
|
||||
table = pa_table(gdf.to_arrow())
|
||||
schema = table.schema
|
||||
new_field = schema.field("geometry").with_metadata(
|
||||
{
|
||||
b"ARROW:extension:name": b"geoarrow.unknown",
|
||||
b"ARROW:extension:metadata": b"{}",
|
||||
}
|
||||
)
|
||||
|
||||
new_schema = pa.schema([schema.field(0), new_field])
|
||||
new_table = table.cast(new_schema)
|
||||
|
||||
with pytest.raises(TypeError, match="Unknown GeoArrow extension type"):
|
||||
GeoDataFrame.from_arrow(new_table)
|
||||
@@ -0,0 +1,306 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas.io.file import infer_schema
|
||||
|
||||
import pytest
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
linestring_3D = LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5546126200639, 45.5086813829106, 300),
|
||||
(-73.5540185061397, 45.5084409343852, 300),
|
||||
)
|
||||
)
|
||||
polygon_3D = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5535801792994, 45.5089539203786, 300),
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_infer_schema_only_points():
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_points_and_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPoint", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint(
|
||||
[city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_only_linestrings():
|
||||
df = GeoDataFrame(geometry=city_hall_walls)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_linestrings_and_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiLineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "MultiLineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_polygons():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_polygons_and_multipolygons():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPolygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipolygons():
|
||||
df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_multiple_shape_types():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_shape_type():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"3D Point",
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Point():
|
||||
df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Point", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Points():
|
||||
df = GeoDataFrame(geometry=[point_3D, point_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_linestring():
|
||||
df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D LineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_linestrings():
|
||||
df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "3D LineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Polygon():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Polygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Polygons():
|
||||
df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_2D_point():
|
||||
df = GeoDataFrame(geometry=[None, city_hall_entrance])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_3D_point():
|
||||
df = GeoDataFrame(geometry=[None, point_3D])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_all():
|
||||
df = GeoDataFrame(geometry=[None, None])
|
||||
|
||||
# None geometry type in then replaced by 'Unknown'
|
||||
# (default geometry type supported by Fiona)
|
||||
assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
|
||||
)
|
||||
def test_infer_schema_int32(array_data, dtype):
|
||||
int32col = pd.array(data=array_data, dtype=dtype)
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int32_column"] = int32col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int32_column", "int32")]),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_int64():
|
||||
int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int64_column"] = int64col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int64_column", "int")]),
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
See generate_legacy_storage_files.py for the creation of the legacy files.
|
||||
|
||||
"""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def current_pickle_data():
|
||||
# our current version pickle data
|
||||
from .generate_legacy_storage_files import create_pickle_data
|
||||
|
||||
return create_pickle_data()
|
||||
|
||||
|
||||
files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
|
||||
|
||||
|
||||
@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
|
||||
def legacy_pickle(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"shapely 2.0/pygeos-based unpickling currently only works for "
|
||||
"shapely-2.0/pygeos-written files"
|
||||
),
|
||||
)
|
||||
def test_legacy_pickles(current_pickle_data, legacy_pickle):
|
||||
result = pd.read_pickle(legacy_pickle)
|
||||
|
||||
for name, value in result.items():
|
||||
expected = current_pickle_data[name]
|
||||
assert_geodataframe_equal(value, expected)
|
||||
|
||||
|
||||
def test_round_trip_current(tmpdir, current_pickle_data):
|
||||
data = current_pickle_data
|
||||
|
||||
for name, value in data.items():
|
||||
path = str(tmpdir / "{}.pickle".format(name))
|
||||
value.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, value)
|
||||
assert isinstance(result.has_sindex, bool)
|
||||
@@ -0,0 +1,878 @@
|
||||
"""
|
||||
Tests here include reading/writing to different types of spatial databases.
|
||||
The spatial database tests may not work without additional system
|
||||
configuration. postGIS tests require a test database to have been setup;
|
||||
see geopandas.tests.util for more information.
|
||||
"""
|
||||
|
||||
import os
|
||||
import warnings
|
||||
from importlib.util import find_spec
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, read_file, read_postgis
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.io.sql import _get_conn as get_conn
|
||||
from geopandas.io.sql import _write_postgis as write_postgis
|
||||
|
||||
import pytest
|
||||
from geopandas.tests.util import (
|
||||
create_postgis,
|
||||
create_spatialite,
|
||||
mock,
|
||||
validate_boro_df,
|
||||
)
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
# Avoid local imports for text in all sqlalchemy tests
|
||||
# all tests using text use engine_postgis, which ensures sqlalchemy is available
|
||||
text = str
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_nybb(nybb_filename):
|
||||
df = read_file(nybb_filename)
|
||||
return df
|
||||
|
||||
|
||||
def check_available_postgis_drivers() -> list[str]:
|
||||
"""Work out which of psycopg2 and psycopg are available.
|
||||
This prevents tests running if the relevant package isn't installed
|
||||
(rather than being skipped, as skips are treated as failures during postgis CI)
|
||||
"""
|
||||
drivers = []
|
||||
if find_spec("psycopg"):
|
||||
drivers.append("psycopg")
|
||||
if find_spec("psycopg2"):
|
||||
drivers.append("psycopg2")
|
||||
return drivers
|
||||
|
||||
|
||||
POSTGIS_DRIVERS = check_available_postgis_drivers()
|
||||
|
||||
|
||||
def prepare_database_credentials() -> dict:
|
||||
"""Gather postgres connection credentials from environment variables."""
|
||||
return {
|
||||
"dbname": "test_geopandas",
|
||||
"user": os.environ.get("PGUSER"),
|
||||
"password": os.environ.get("PGPASSWORD"),
|
||||
"host": os.environ.get("PGHOST"),
|
||||
"port": os.environ.get("PGPORT"),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_postgis(request):
|
||||
"""Create a postgres connection using either psycopg2 or psycopg.
|
||||
|
||||
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
|
||||
psycopg = pytest.importorskip(request.param)
|
||||
|
||||
try:
|
||||
con = psycopg.connect(**prepare_database_credentials())
|
||||
except psycopg.OperationalError:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="pandas only supports SQLAlchemy connectable.*"
|
||||
)
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def engine_postgis(request):
|
||||
"""
|
||||
Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
|
||||
|
||||
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
|
||||
"""
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
credentials = prepare_database_credentials()
|
||||
try:
|
||||
con = sqlalchemy.create_engine(
|
||||
URL.create(
|
||||
drivername=f"postgresql+{request.param}",
|
||||
username=credentials["user"],
|
||||
database=credentials["dbname"],
|
||||
password=credentials["password"],
|
||||
host=credentials["host"],
|
||||
port=credentials["port"],
|
||||
)
|
||||
)
|
||||
con.connect()
|
||||
except Exception:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
|
||||
yield con
|
||||
con.dispose()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_spatialite():
|
||||
"""
|
||||
Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
|
||||
|
||||
`The sqlite3 module must be built with loadable extension support
|
||||
<https://docs.python.org/3/library/sqlite3.html#f1>`_ and
|
||||
`SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
|
||||
must be available on the system as a SQLite module.
|
||||
Packages available on Anaconda meet requirements.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
``AttributeError`` on missing support for loadable SQLite extensions
|
||||
``sqlite3.OperationalError`` on missing SpatiaLite
|
||||
"""
|
||||
sqlite3 = pytest.importorskip("sqlite3")
|
||||
try:
|
||||
with sqlite3.connect(":memory:") as con:
|
||||
con.enable_load_extension(True)
|
||||
con.load_extension("mod_spatialite")
|
||||
con.execute("SELECT InitSpatialMetaData(TRUE)")
|
||||
except Exception:
|
||||
con.close()
|
||||
pytest.skip("Cannot setup spatialite database")
|
||||
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
def drop_table_if_exists(conn_or_engine, table):
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
|
||||
if sqlalchemy.inspect(conn_or_engine).has_table(table):
|
||||
metadata = sqlalchemy.MetaData()
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="Did not recognize type 'geometry' of column.*"
|
||||
)
|
||||
metadata.reflect(conn_or_engine)
|
||||
table = metadata.tables.get(table)
|
||||
if table is not None:
|
||||
table.drop(conn_or_engine, checkfirst=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_mixed_single_and_multi():
|
||||
from shapely.geometry import LineString, MultiLineString, Point
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
|
||||
Point(0, 1),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_geom_collection():
|
||||
from shapely.geometry import GeometryCollection, LineString, Point, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
GeometryCollection(
|
||||
[
|
||||
Polygon([(0, 0), (1, 1), (0, 1)]),
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
Point(0, 0),
|
||||
]
|
||||
)
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_linear_ring():
|
||||
from shapely.geometry import LinearRing
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_3D_geoms():
|
||||
from shapely.geometry import LineString, Point, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0, 0), (1, 1, 1)]),
|
||||
Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
|
||||
Point(0, 1, 2),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestIO:
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_get_conn(self, engine_postgis):
|
||||
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
|
||||
|
||||
engine = engine_postgis
|
||||
with get_conn(engine) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with engine.connect() as conn:
|
||||
with get_conn(conn) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with pytest.raises(ValueError):
|
||||
with get_conn(object()):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
|
||||
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
|
||||
con = connection_postgis
|
||||
orig_geom = "geom"
|
||||
out_geom = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=orig_geom)
|
||||
|
||||
sql = """SELECT borocode, boroname, shape_leng, shape_area,
|
||||
{} as {} FROM nybb;""".format(
|
||||
orig_geom, out_geom
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=out_geom)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
|
||||
con = connection_postgis
|
||||
crs = "epsg:4269"
|
||||
df_reproj = df_nybb.to_crs(crs)
|
||||
create_postgis(con, df_reproj, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == crs
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that a user specified CRS overrides the geodatabase SRID."""
|
||||
con = connection_postgis
|
||||
orig_crs = df_nybb.crs
|
||||
create_postgis(con, df_nybb, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, crs=orig_crs)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == orig_crs
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_from_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry with NULL is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
df_nybb.geometry.iat[0] = None
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_binary(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry read as binary is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_default(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
engine = engine_postgis
|
||||
table = "nybb"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
|
||||
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
|
||||
engine = engine_postgis
|
||||
table = "aTestTable"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text('SELECT * FROM "{table}";'.format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
with engine_postgis.begin() as con:
|
||||
table = "nybb_con"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(con, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=con, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, con, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that uploading the same table raises error when: if_replace='fail'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
try:
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
except ValueError as e:
|
||||
if "already exists" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise e
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that replacing a table is possible when: if_replace='replace'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Overwrite
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that appending to existing table produces correct results when:
|
||||
if_replace='append'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
orig_rows, orig_cols = df_nybb.shape
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
new_rows, new_cols = df.shape
|
||||
|
||||
# There should be twice as many rows in the new table
|
||||
assert new_rows == orig_rows * 2, (
|
||||
"There should be {target} rows,found: {current}".format(
|
||||
target=orig_rows * 2, current=new_rows
|
||||
),
|
||||
)
|
||||
# Number of columns should stay the same
|
||||
assert new_cols == orig_cols, (
|
||||
"There should be {target} columns,found: {current}".format(
|
||||
target=orig_cols, current=new_cols
|
||||
),
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS without CRS information.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb.geometry.array.crs = None
|
||||
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is -1
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
|
||||
CRS information (GH #2414).
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb_esri = df_nybb.to_crs("ESRI:102003")
|
||||
write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is 102003
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_geometry_collection(
|
||||
self, engine_postgis, df_geom_collection
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of different geometry types is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
|
||||
assert geom_type.upper() == "GEOMETRYCOLLECTION"
|
||||
assert df.geom_type.unique()[0] == "GeometryCollection"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_mixed_geometry_types(
|
||||
self, engine_postgis, df_mixed_single_and_multi
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of single and MultiGeometries is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
|
||||
)
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
|
||||
"""
|
||||
Tests that writing a LinearRing.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
|
||||
assert geom_type.upper() == "LINESTRING"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
|
||||
"""
|
||||
Tests writing a LinearRing works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi,
|
||||
con=engine,
|
||||
name=table,
|
||||
if_exists="replace",
|
||||
chunksize=1,
|
||||
)
|
||||
# Validate row count
|
||||
sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
|
||||
with engine.connect() as conn:
|
||||
row_cnt = conn.execute(sql).fetchone()[0]
|
||||
assert row_cnt == 3
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_to_different_schema_when_table_exists(
|
||||
self, engine_postgis, df_nybb
|
||||
):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
try:
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(
|
||||
schema=schema_to_use, table=table
|
||||
)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
# Should raise a ValueError when table exists
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try with replace flag on
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
|
||||
"""
|
||||
Tests writing a geometries with 3 dimensions works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that all geometries have 3 dimensions
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert list(df.geometry.has_z) == [True, True, True]
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_row_order(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the row order in db table follows the order of the original frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "row_order_test"
|
||||
correct_order = df_nybb["BoroCode"].tolist()
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert df["BoroCode"].tolist() == correct_order
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_before_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that insert works with if_exists='append' when table does not exist yet.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_with_different_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the warning is raised if table CRS differs from frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Reproject
|
||||
df_nybb2 = df_nybb.to_crs(epsg=4326)
|
||||
|
||||
# Should raise error when appending
|
||||
with pytest.raises(ValueError, match="CRS of the target table"):
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_without_crs(self, engine_postgis, df_nybb):
|
||||
# This test was included in #3328 when the default value for no
|
||||
# CRS was changed from an SRID of -1 to 0. This resolves issues
|
||||
# of appending dataframes to postgis that have no CRS as postgis
|
||||
# no CRS value is 0.
|
||||
engine = engine_postgis
|
||||
df_nybb = df_nybb.set_crs(None, allow_override=True)
|
||||
table = "nybb"
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# append another dataframe with no crs
|
||||
|
||||
df_nybb2 = df_nybb
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
@pytest.mark.xfail(
|
||||
compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
|
||||
reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
|
||||
)
|
||||
def test_duplicate_geometry_column_fails(self, engine_postgis):
|
||||
"""
|
||||
Tests that a ValueError is raised if an SQL query returns two geometry columns.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
read_postgis(sql, engine, geom_col="geom")
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="esri:54052")
|
||||
create_postgis(con, df_nybb, srid=54052)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
validate_boro_df(df)
|
||||
assert df.crs == "ESRI:54052"
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
@mock.patch("shapely.get_srid")
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
|
||||
# mock a non-existent srid for edge case if shapely has an srid
|
||||
# not present in postgis table.
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
mock_get_srid.return_value = 99999
|
||||
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="epsg:4326")
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
|
||||
with pytest.warns(UserWarning, match="Could not find srid 99999"):
|
||||
read_postgis(sql, con)
|
||||
|
||||
@mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_no_spatial_ref_sys_table_in_postgis(
|
||||
self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
|
||||
):
|
||||
# mock for a non-existent spatial_ref_sys database
|
||||
|
||||
mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
|
||||
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="epsg:4326")
|
||||
create_postgis(con, df_nybb, srid=4326)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.warns(
|
||||
UserWarning, match="Could not find the spatial reference system table"
|
||||
):
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
assert df.crs == "EPSG:4326"
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument with non epsg crs"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="esri:54052")
|
||||
|
||||
create_postgis(con, df_nybb, srid=54052)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == "ESRI:54052"
|
||||
118
.venv/lib/python3.12/site-packages/geopandas/io/util.py
Normal file
118
.venv/lib/python3.12/site-packages/geopandas/io/util.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Vendored, cut down version of pyogrio/util.py for use with fiona"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
def vsi_path(path: str) -> str:
|
||||
"""
|
||||
Ensure path is a local path or a GDAL-compatible vsi path.
|
||||
|
||||
"""
|
||||
|
||||
# path is already in GDAL format
|
||||
if path.startswith("/vsi"):
|
||||
return path
|
||||
|
||||
# Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
|
||||
# URL schemes
|
||||
if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
|
||||
if not path.split("!")[0].endswith(".zip"):
|
||||
return path
|
||||
|
||||
# prefix then allow to proceed with remaining parsing
|
||||
path = f"zip://{path}"
|
||||
|
||||
path, archive, scheme = _parse_uri(path)
|
||||
|
||||
if scheme or archive or path.endswith(".zip"):
|
||||
return _construct_vsi_path(path, archive, scheme)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
# Supported URI schemes and their mapping to GDAL's VSI suffix.
|
||||
SCHEMES = {
|
||||
"file": "file",
|
||||
"zip": "zip",
|
||||
"tar": "tar",
|
||||
"gzip": "gzip",
|
||||
"http": "curl",
|
||||
"https": "curl",
|
||||
"ftp": "curl",
|
||||
"s3": "s3",
|
||||
"gs": "gs",
|
||||
"az": "az",
|
||||
"adls": "adls",
|
||||
"adl": "adls", # fsspec uses this
|
||||
"hdfs": "hdfs",
|
||||
"webhdfs": "webhdfs",
|
||||
# GDAL additionally supports oss and swift for remote filesystems, but
|
||||
# those are for now not added as supported URI
|
||||
}
|
||||
|
||||
CURLSCHEMES = {k for k, v in SCHEMES.items() if v == "curl"}
|
||||
|
||||
|
||||
def _parse_uri(path: str):
|
||||
"""
|
||||
Parse a URI
|
||||
|
||||
Returns a tuples of (path, archive, scheme)
|
||||
|
||||
path : str
|
||||
Parsed path. Includes the hostname and query string in the case
|
||||
of a URI.
|
||||
archive : str
|
||||
Parsed archive path.
|
||||
scheme : str
|
||||
URI scheme such as "https" or "zip+s3".
|
||||
"""
|
||||
parts = urlparse(path, allow_fragments=False)
|
||||
|
||||
# if the scheme is not one of GDAL's supported schemes, return raw path
|
||||
if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split("+")):
|
||||
return path, "", ""
|
||||
|
||||
# we have a URI
|
||||
path = parts.path
|
||||
scheme = parts.scheme or ""
|
||||
|
||||
if parts.query:
|
||||
path += "?" + parts.query
|
||||
|
||||
if parts.scheme and parts.netloc:
|
||||
path = parts.netloc + path
|
||||
|
||||
parts = path.split("!")
|
||||
path = parts.pop() if parts else ""
|
||||
archive = parts.pop() if parts else ""
|
||||
return (path, archive, scheme)
|
||||
|
||||
|
||||
def _construct_vsi_path(path, archive, scheme) -> str:
|
||||
"""Convert a parsed path to a GDAL VSI path"""
|
||||
|
||||
prefix = ""
|
||||
suffix = ""
|
||||
schemes = scheme.split("+")
|
||||
|
||||
if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
|
||||
schemes.insert(0, "zip")
|
||||
|
||||
if schemes:
|
||||
prefix = "/".join(
|
||||
"vsi{0}".format(SCHEMES[p]) for p in schemes if p and p != "file"
|
||||
)
|
||||
|
||||
if schemes[-1] in CURLSCHEMES:
|
||||
suffix = f"{schemes[-1]}://"
|
||||
|
||||
if prefix:
|
||||
if archive:
|
||||
return "/{}/{}{}/{}".format(prefix, suffix, archive, path.lstrip("/"))
|
||||
else:
|
||||
return "/{}/{}{}".format(prefix, suffix, path)
|
||||
|
||||
return path
|
||||
977
.venv/lib/python3.12/site-packages/geopandas/plotting.py
Normal file
977
.venv/lib/python3.12/site-packages/geopandas/plotting.py
Normal file
@@ -0,0 +1,977 @@
|
||||
import warnings
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import CategoricalDtype
|
||||
from pandas.plotting import PlotAccessor
|
||||
|
||||
import geopandas
|
||||
|
||||
from ._decorator import doc
|
||||
|
||||
|
||||
def _sanitize_geoms(geoms, prefix="Multi"):
|
||||
"""
|
||||
Returns Series like geoms and index, except that any Multi geometries
|
||||
are split into their components and indices are repeated for all component
|
||||
in the same Multi geometry. At the same time, empty or missing geometries are
|
||||
filtered out. Maintains 1:1 matching of geometry to value.
|
||||
|
||||
Prefix specifies type of geometry to be flatten. 'Multi' for MultiPoint and similar,
|
||||
"Geom" for GeometryCollection.
|
||||
|
||||
Returns
|
||||
-------
|
||||
components : list of geometry
|
||||
|
||||
component_index : index array
|
||||
indices are repeated for all components in the same Multi geometry
|
||||
"""
|
||||
# TODO(shapely) look into simplifying this with
|
||||
# shapely.get_parts(geoms, return_index=True) from shapely 2.0
|
||||
components, component_index = [], []
|
||||
|
||||
if (
|
||||
not geoms.geom_type.str.startswith(prefix).any()
|
||||
and not geoms.is_empty.any()
|
||||
and not geoms.isna().any()
|
||||
):
|
||||
return geoms, np.arange(len(geoms))
|
||||
|
||||
for ix, geom in enumerate(geoms):
|
||||
if geom is not None and geom.geom_type.startswith(prefix) and not geom.is_empty:
|
||||
for poly in geom.geoms:
|
||||
components.append(poly)
|
||||
component_index.append(ix)
|
||||
elif geom is None or geom.is_empty:
|
||||
continue
|
||||
else:
|
||||
components.append(geom)
|
||||
component_index.append(ix)
|
||||
|
||||
return components, np.array(component_index)
|
||||
|
||||
|
||||
def _expand_kwargs(kwargs, multiindex):
|
||||
"""
|
||||
Most arguments to the plot functions must be a (single) value, or a sequence
|
||||
of values. This function checks each key-value pair in 'kwargs' and expands
|
||||
it (in place) to the correct length/formats with help of 'multiindex', unless
|
||||
the value appears to already be a valid (single) value for the key.
|
||||
"""
|
||||
from typing import Iterable
|
||||
|
||||
from matplotlib.colors import is_color_like
|
||||
|
||||
scalar_kwargs = ["marker", "path_effects"]
|
||||
for att, value in kwargs.items():
|
||||
if "color" in att: # color(s), edgecolor(s), facecolor(s)
|
||||
if is_color_like(value):
|
||||
continue
|
||||
elif "linestyle" in att: # linestyle(s)
|
||||
# A single linestyle can be 2-tuple of a number and an iterable.
|
||||
if (
|
||||
isinstance(value, tuple)
|
||||
and len(value) == 2
|
||||
and isinstance(value[1], Iterable)
|
||||
):
|
||||
continue
|
||||
elif att in scalar_kwargs:
|
||||
# For these attributes, only a single value is allowed, so never expand.
|
||||
continue
|
||||
|
||||
if pd.api.types.is_list_like(value):
|
||||
kwargs[att] = np.take(value, multiindex, axis=0)
|
||||
|
||||
|
||||
def _PolygonPatch(polygon, **kwargs):
|
||||
"""Constructs a matplotlib patch from a Polygon geometry
|
||||
|
||||
The `kwargs` are those supported by the matplotlib.patches.PathPatch class
|
||||
constructor. Returns an instance of matplotlib.patches.PathPatch.
|
||||
|
||||
Example (using Shapely Point and a matplotlib axes)::
|
||||
|
||||
b = shapely.geometry.Point(0, 0).buffer(1.0)
|
||||
patch = _PolygonPatch(b, fc='blue', ec='blue', alpha=0.5)
|
||||
ax.add_patch(patch)
|
||||
|
||||
GeoPandas originally relied on the descartes package by Sean Gillies
|
||||
(BSD license, https://pypi.org/project/descartes) for PolygonPatch, but
|
||||
this dependency was removed in favor of the below matplotlib code.
|
||||
"""
|
||||
from matplotlib.patches import PathPatch
|
||||
from matplotlib.path import Path
|
||||
|
||||
path = Path.make_compound_path(
|
||||
Path(np.asarray(polygon.exterior.coords)[:, :2]),
|
||||
*[Path(np.asarray(ring.coords)[:, :2]) for ring in polygon.interiors],
|
||||
)
|
||||
return PathPatch(path, **kwargs)
|
||||
|
||||
|
||||
def _plot_polygon_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
autolim=True,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of Polygon and MultiPolygon geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : a sequence of `N` Polygons and/or MultiPolygons (can be mixed)
|
||||
|
||||
values : a sequence of `N` values, optional
|
||||
Values will be mapped to colors using vmin/vmax/cmap. They should
|
||||
have 1:1 correspondence with the geometries (not their components).
|
||||
Otherwise follows `color` / `facecolor` kwargs.
|
||||
edgecolor : single color or sequence of `N` colors
|
||||
Color for the edge of the polygons
|
||||
facecolor : single color or sequence of `N` colors
|
||||
Color to fill the polygons. Cannot be used together with `values`.
|
||||
color : single color or sequence of `N` colors
|
||||
Sets both `edgecolor` and `facecolor`
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
**kwargs
|
||||
Additional keyword arguments passed to the collection
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
from matplotlib.collections import PatchCollection
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
if values is not None:
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
|
||||
# PatchCollection does not accept some kwargs.
|
||||
kwargs = {
|
||||
att: value
|
||||
for att, value in kwargs.items()
|
||||
if att not in ["markersize", "marker"]
|
||||
}
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
collection = PatchCollection([_PolygonPatch(poly) for poly in geoms], **kwargs)
|
||||
|
||||
if values is not None:
|
||||
collection.set_array(np.asarray(values))
|
||||
collection.set_cmap(cmap)
|
||||
if "norm" not in kwargs:
|
||||
collection.set_clim(vmin, vmax)
|
||||
|
||||
ax.add_collection(collection, autolim=autolim)
|
||||
ax.autoscale_view()
|
||||
return collection
|
||||
|
||||
|
||||
def _plot_linestring_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
autolim=True,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of LineString and MultiLineString geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : a sequence of `N` LineStrings and/or MultiLineStrings (can be
|
||||
mixed)
|
||||
values : a sequence of `N` values, optional
|
||||
Values will be mapped to colors using vmin/vmax/cmap. They should
|
||||
have 1:1 correspondence with the geometries (not their components).
|
||||
color : single color or sequence of `N` colors
|
||||
Cannot be used together with `values`.
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
from matplotlib.collections import LineCollection
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
if values is not None:
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
|
||||
# LineCollection does not accept some kwargs.
|
||||
kwargs = {
|
||||
att: value
|
||||
for att, value in kwargs.items()
|
||||
if att not in ["markersize", "marker"]
|
||||
}
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
segments = [np.array(linestring.coords)[:, :2] for linestring in geoms]
|
||||
collection = LineCollection(segments, **kwargs)
|
||||
|
||||
if values is not None:
|
||||
collection.set_array(np.asarray(values))
|
||||
collection.set_cmap(cmap)
|
||||
if "norm" not in kwargs:
|
||||
collection.set_clim(vmin, vmax)
|
||||
|
||||
ax.add_collection(collection, autolim=autolim)
|
||||
ax.autoscale_view()
|
||||
return collection
|
||||
|
||||
|
||||
def _plot_point_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
marker="o",
|
||||
markersize=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of Point and MultiPoint geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : sequence of `N` Points or MultiPoints
|
||||
|
||||
values : a sequence of `N` values, optional
|
||||
Values mapped to colors using vmin, vmax, and cmap.
|
||||
Cannot be specified together with `color`.
|
||||
markersize : scalar or array-like, optional
|
||||
Size of the markers. Note that under the hood ``scatter`` is
|
||||
used, so the specified value will be proportional to the
|
||||
area of the marker (size in points^2).
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
if values is not None and color is not None:
|
||||
raise ValueError("Can only specify one of 'values' and 'color' kwargs")
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
# values are expanded below as kwargs["c"]
|
||||
|
||||
x = [p.x if not p.is_empty else None for p in geoms]
|
||||
y = [p.y if not p.is_empty else None for p in geoms]
|
||||
|
||||
# matplotlib 1.4 does not support c=None, and < 2.0 does not support s=None
|
||||
if values is not None:
|
||||
kwargs["c"] = values
|
||||
if markersize is not None:
|
||||
kwargs["s"] = markersize
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
if marker is not None:
|
||||
kwargs["marker"] = marker
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
if "norm" not in kwargs:
|
||||
collection = ax.scatter(x, y, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs)
|
||||
else:
|
||||
collection = ax.scatter(x, y, cmap=cmap, **kwargs)
|
||||
|
||||
return collection
|
||||
|
||||
|
||||
def plot_series(
|
||||
s,
|
||||
cmap=None,
|
||||
color=None,
|
||||
ax=None,
|
||||
figsize=None,
|
||||
aspect="auto",
|
||||
autolim=True,
|
||||
**style_kwds,
|
||||
):
|
||||
"""
|
||||
Plot a GeoSeries.
|
||||
|
||||
Generate a plot of a GeoSeries geometry with matplotlib.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s : Series
|
||||
The GeoSeries to be plotted. Currently Polygon,
|
||||
MultiPolygon, LineString, MultiLineString, Point and MultiPoint
|
||||
geometries can be plotted.
|
||||
cmap : str (default None)
|
||||
The name of a colormap recognized by matplotlib. Any
|
||||
colormap will work, but categorical colormaps are
|
||||
generally recommended. Examples of useful discrete
|
||||
colormaps include:
|
||||
|
||||
tab10, tab20, Accent, Dark2, Paired, Pastel1, Set1, Set2
|
||||
|
||||
color : str, np.array, pd.Series, List (default None)
|
||||
If specified, all objects will be colored uniformly.
|
||||
ax : matplotlib.pyplot.Artist (default None)
|
||||
axes on which to draw the plot
|
||||
figsize : pair of floats (default None)
|
||||
Size of the resulting matplotlib.figure.Figure. If the argument
|
||||
ax is given explicitly, figsize is ignored.
|
||||
aspect : 'auto', 'equal', None or float (default 'auto')
|
||||
Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
|
||||
however data are not projected (coordinates are long/lat), the aspect is by
|
||||
default set to 1/cos(s_y * pi/180) with s_y the y coordinate of the middle of
|
||||
the GeoSeries (the mean of the y range of bounding box) so that a long/lat
|
||||
square appears square in the middle of the plot. This implies an
|
||||
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
|
||||
also be set manually (float) as the ratio of y-unit to x-unit.
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
**style_kwds : dict
|
||||
Color options to be passed on to the actual plot function, such
|
||||
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
|
||||
``alpha``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ax : matplotlib axes instance
|
||||
"""
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The matplotlib package is required for plotting in geopandas. "
|
||||
"You can install it using 'conda install -c conda-forge matplotlib' or "
|
||||
"'pip install matplotlib'."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots(figsize=figsize)
|
||||
|
||||
if aspect == "auto":
|
||||
if s.crs and s.crs.is_geographic:
|
||||
bounds = s.total_bounds
|
||||
y_coord = np.mean([bounds[1], bounds[3]])
|
||||
ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
|
||||
# formula ported from R package sp
|
||||
# https://github.com/edzer/sp/blob/master/R/mapasp.R
|
||||
else:
|
||||
ax.set_aspect("equal")
|
||||
elif aspect is not None:
|
||||
ax.set_aspect(aspect)
|
||||
|
||||
if s.empty:
|
||||
warnings.warn(
|
||||
"The GeoSeries you are attempting to plot is "
|
||||
"empty. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
if s.is_empty.all():
|
||||
warnings.warn(
|
||||
"The GeoSeries you are attempting to plot is "
|
||||
"composed of empty geometries. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
# have colors been given for all geometries?
|
||||
color_given = pd.api.types.is_list_like(color) and len(color) == len(s)
|
||||
|
||||
# if cmap is specified, create range of colors based on cmap
|
||||
values = None
|
||||
if cmap is not None:
|
||||
values = np.arange(len(s))
|
||||
if hasattr(cmap, "N"):
|
||||
values = values % cmap.N
|
||||
style_kwds["vmin"] = style_kwds.get("vmin", values.min())
|
||||
style_kwds["vmax"] = style_kwds.get("vmax", values.max())
|
||||
|
||||
# decompose GeometryCollections
|
||||
geoms, multiindex = _sanitize_geoms(s.geometry, prefix="Geom")
|
||||
values = np.take(values, multiindex, axis=0) if cmap else None
|
||||
# ensure indexes are consistent
|
||||
if color_given and isinstance(color, pd.Series):
|
||||
color = color.reindex(s.index)
|
||||
expl_color = np.take(color, multiindex, axis=0) if color_given else color
|
||||
expl_series = geopandas.GeoSeries(geoms)
|
||||
|
||||
geom_types = expl_series.geom_type
|
||||
poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
|
||||
line_idx = np.asarray(
|
||||
(geom_types == "LineString")
|
||||
| (geom_types == "MultiLineString")
|
||||
| (geom_types == "LinearRing")
|
||||
)
|
||||
point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
|
||||
|
||||
# plot all Polygons and all MultiPolygon components in the same collection
|
||||
polys = expl_series[poly_idx]
|
||||
if not polys.empty:
|
||||
# color overrides both face and edgecolor. As we want people to be
|
||||
# able to use edgecolor as well, pass color to facecolor
|
||||
facecolor = style_kwds.pop("facecolor", None)
|
||||
color_ = expl_color[poly_idx] if color_given else color
|
||||
if color is not None:
|
||||
facecolor = color_
|
||||
|
||||
values_ = values[poly_idx] if cmap else None
|
||||
_plot_polygon_collection(
|
||||
ax,
|
||||
polys,
|
||||
values_,
|
||||
facecolor=facecolor,
|
||||
cmap=cmap,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# plot all LineStrings and MultiLineString components in same collection
|
||||
lines = expl_series[line_idx]
|
||||
if not lines.empty:
|
||||
values_ = values[line_idx] if cmap else None
|
||||
color_ = expl_color[line_idx] if color_given else color
|
||||
|
||||
_plot_linestring_collection(
|
||||
ax, lines, values_, color=color_, cmap=cmap, autolim=autolim, **style_kwds
|
||||
)
|
||||
|
||||
# plot all Points in the same collection
|
||||
points = expl_series[point_idx]
|
||||
if not points.empty:
|
||||
values_ = values[point_idx] if cmap else None
|
||||
color_ = expl_color[point_idx] if color_given else color
|
||||
|
||||
_plot_point_collection(
|
||||
ax, points, values_, color=color_, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
ax.figure.canvas.draw_idle()
|
||||
return ax
|
||||
|
||||
|
||||
def plot_dataframe(
|
||||
df,
|
||||
column=None,
|
||||
cmap=None,
|
||||
color=None,
|
||||
ax=None,
|
||||
cax=None,
|
||||
categorical=False,
|
||||
legend=False,
|
||||
scheme=None,
|
||||
k=5,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
markersize=None,
|
||||
figsize=None,
|
||||
legend_kwds=None,
|
||||
categories=None,
|
||||
classification_kwds=None,
|
||||
missing_kwds=None,
|
||||
aspect="auto",
|
||||
autolim=True,
|
||||
**style_kwds,
|
||||
):
|
||||
"""
|
||||
Plot a GeoDataFrame.
|
||||
|
||||
Generate a plot of a GeoDataFrame with matplotlib. If a
|
||||
column is specified, the plot coloring will be based on values
|
||||
in that column.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
column : str, np.array, pd.Series (default None)
|
||||
The name of the dataframe column, np.array, or pd.Series to be plotted.
|
||||
If np.array or pd.Series are used then it must have same length as
|
||||
dataframe. Values are used to color the plot. Ignored if `color` is
|
||||
also set.
|
||||
kind: str
|
||||
The kind of plots to produce. The default is to create a map ("geo").
|
||||
Other supported kinds of plots from pandas:
|
||||
|
||||
- 'line' : line plot
|
||||
- 'bar' : vertical bar plot
|
||||
- 'barh' : horizontal bar plot
|
||||
- 'hist' : histogram
|
||||
- 'box' : BoxPlot
|
||||
- 'kde' : Kernel Density Estimation plot
|
||||
- 'density' : same as 'kde'
|
||||
- 'area' : area plot
|
||||
- 'pie' : pie plot
|
||||
- 'scatter' : scatter plot
|
||||
- 'hexbin' : hexbin plot.
|
||||
cmap : str (default None)
|
||||
The name of a colormap recognized by matplotlib.
|
||||
color : str, np.array, pd.Series (default None)
|
||||
If specified, all objects will be colored uniformly.
|
||||
ax : matplotlib.pyplot.Artist (default None)
|
||||
axes on which to draw the plot
|
||||
cax : matplotlib.pyplot Artist (default None)
|
||||
axes on which to draw the legend in case of color map.
|
||||
categorical : bool (default False)
|
||||
If False, cmap will reflect numerical values of the
|
||||
column being plotted. For non-numerical columns, this
|
||||
will be set to True.
|
||||
legend : bool (default False)
|
||||
Plot a legend. Ignored if no `column` is given, or if `color` is given.
|
||||
scheme : str (default None)
|
||||
Name of a choropleth classification scheme (requires mapclassify).
|
||||
A mapclassify.MapClassifier object will be used
|
||||
under the hood. Supported are all schemes provided by mapclassify (e.g.
|
||||
'BoxPlot', 'EqualInterval', 'FisherJenks', 'FisherJenksSampled',
|
||||
'HeadTailBreaks', 'JenksCaspall', 'JenksCaspallForced',
|
||||
'JenksCaspallSampled', 'MaxP', 'MaximumBreaks',
|
||||
'NaturalBreaks', 'Quantiles', 'Percentiles', 'StdMean',
|
||||
'UserDefined'). Arguments can be passed in classification_kwds.
|
||||
k : int (default 5)
|
||||
Number of classes (ignored if scheme is None)
|
||||
vmin : None or float (default None)
|
||||
Minimum value of cmap. If None, the minimum data value
|
||||
in the column to be plotted is used.
|
||||
vmax : None or float (default None)
|
||||
Maximum value of cmap. If None, the maximum data value
|
||||
in the column to be plotted is used.
|
||||
markersize : str or float or sequence (default None)
|
||||
Only applies to point geometries within a frame.
|
||||
If a str, will use the values in the column of the frame specified
|
||||
by markersize to set the size of markers. Otherwise can be a value
|
||||
to apply to all points, or a sequence of the same length as the
|
||||
number of points.
|
||||
figsize : tuple of integers (default None)
|
||||
Size of the resulting matplotlib.figure.Figure. If the argument
|
||||
axes is given explicitly, figsize is ignored.
|
||||
legend_kwds : dict (default None)
|
||||
Keyword arguments to pass to :func:`matplotlib.pyplot.legend` or
|
||||
:func:`matplotlib.pyplot.colorbar`.
|
||||
Additional accepted keywords when `scheme` is specified:
|
||||
|
||||
fmt : string
|
||||
A formatting specification for the bin edges of the classes in the
|
||||
legend. For example, to have no decimals: ``{"fmt": "{:.0f}"}``.
|
||||
labels : list-like
|
||||
A list of legend labels to override the auto-generated labels.
|
||||
Needs to have the same number of elements as the number of
|
||||
classes (`k`).
|
||||
interval : boolean (default False)
|
||||
An option to control brackets from mapclassify legend.
|
||||
If True, open/closed interval brackets are shown in the legend.
|
||||
categories : list-like
|
||||
Ordered list-like object of categories to be used for categorical plot.
|
||||
classification_kwds : dict (default None)
|
||||
Keyword arguments to pass to mapclassify
|
||||
missing_kwds : dict (default None)
|
||||
Keyword arguments specifying color options (as style_kwds)
|
||||
to be passed on to geometries with missing values in addition to
|
||||
or overwriting other style kwds. If None, geometries with missing
|
||||
values are not plotted.
|
||||
aspect : 'auto', 'equal', None or float (default 'auto')
|
||||
Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
|
||||
however data are not projected (coordinates are long/lat), the aspect is by
|
||||
default set to 1/cos(df_y * pi/180) with df_y the y coordinate of the middle of
|
||||
the GeoDataFrame (the mean of the y range of bounding box) so that a long/lat
|
||||
square appears square in the middle of the plot. This implies an
|
||||
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
|
||||
also be set manually (float) as the ratio of y-unit to x-unit.
|
||||
autolim : bool (default True)
|
||||
Update axes data limits to contain the new geometries.
|
||||
**style_kwds : dict
|
||||
Style options to be passed on to the actual plot function, such
|
||||
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
|
||||
``alpha``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ax : matplotlib axes instance
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import geodatasets
|
||||
>>> df = geopandas.read_file(geodatasets.get_path("nybb"))
|
||||
>>> df.head() # doctest: +SKIP
|
||||
BoroCode ... geometry
|
||||
0 5 ... MULTIPOLYGON (((970217.022 145643.332, 970227....
|
||||
1 4 ... MULTIPOLYGON (((1029606.077 156073.814, 102957...
|
||||
2 3 ... MULTIPOLYGON (((1021176.479 151374.797, 102100...
|
||||
3 1 ... MULTIPOLYGON (((981219.056 188655.316, 980940....
|
||||
4 2 ... MULTIPOLYGON (((1012821.806 229228.265, 101278...
|
||||
|
||||
>>> df.plot("BoroName", cmap="Set1") # doctest: +SKIP
|
||||
|
||||
See the User Guide page :doc:`../../user_guide/mapping` for details.
|
||||
|
||||
"""
|
||||
if column is not None and color is not None:
|
||||
warnings.warn(
|
||||
"Only specify one of 'column' or 'color'. Using 'color'.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
column = None
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The matplotlib package is required for plotting in geopandas. "
|
||||
"You can install it using 'conda install -c conda-forge matplotlib' or "
|
||||
"'pip install matplotlib'."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
if cax is not None:
|
||||
raise ValueError("'ax' can not be None if 'cax' is not.")
|
||||
fig, ax = plt.subplots(figsize=figsize)
|
||||
|
||||
if aspect == "auto":
|
||||
if df.crs and df.crs.is_geographic:
|
||||
bounds = df.total_bounds
|
||||
y_coord = np.mean([bounds[1], bounds[3]])
|
||||
ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
|
||||
# formula ported from R package sp
|
||||
# https://github.com/edzer/sp/blob/master/R/mapasp.R
|
||||
else:
|
||||
ax.set_aspect("equal")
|
||||
elif aspect is not None:
|
||||
ax.set_aspect(aspect)
|
||||
|
||||
# GH 1555
|
||||
# if legend_kwds set, copy so we don't update it in place
|
||||
if legend_kwds is not None:
|
||||
legend_kwds = legend_kwds.copy()
|
||||
|
||||
if df.empty:
|
||||
warnings.warn(
|
||||
"The GeoDataFrame you are attempting to plot is "
|
||||
"empty. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
if isinstance(markersize, str):
|
||||
markersize = df[markersize].values
|
||||
|
||||
if column is None:
|
||||
return plot_series(
|
||||
df.geometry,
|
||||
cmap=cmap,
|
||||
color=color,
|
||||
ax=ax,
|
||||
figsize=figsize,
|
||||
markersize=markersize,
|
||||
aspect=aspect,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# To accept pd.Series and np.arrays as column
|
||||
if isinstance(column, (np.ndarray, pd.Series)):
|
||||
if column.shape[0] != df.shape[0]:
|
||||
raise ValueError(
|
||||
"The dataframe and given column have different number of rows."
|
||||
)
|
||||
else:
|
||||
values = column
|
||||
|
||||
# Make sure index of a Series matches index of df
|
||||
if isinstance(values, pd.Series):
|
||||
values = values.reindex(df.index)
|
||||
else:
|
||||
values = df[column]
|
||||
|
||||
if isinstance(values.dtype, CategoricalDtype):
|
||||
if categories is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify 'categories' when column has categorical dtype"
|
||||
)
|
||||
categorical = True
|
||||
elif (
|
||||
pd.api.types.is_object_dtype(values.dtype)
|
||||
or pd.api.types.is_bool_dtype(values.dtype)
|
||||
or pd.api.types.is_string_dtype(values.dtype)
|
||||
or categories
|
||||
):
|
||||
categorical = True
|
||||
|
||||
nan_idx = np.asarray(pd.isna(values), dtype="bool")
|
||||
|
||||
if scheme is not None:
|
||||
mc_err = (
|
||||
"The 'mapclassify' package (>= 2.4.0) is "
|
||||
"required to use the 'scheme' keyword."
|
||||
)
|
||||
try:
|
||||
import mapclassify
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(mc_err)
|
||||
|
||||
if Version(mapclassify.__version__) < Version("2.4.0"):
|
||||
raise ImportError(mc_err)
|
||||
|
||||
if classification_kwds is None:
|
||||
classification_kwds = {}
|
||||
if "k" not in classification_kwds:
|
||||
classification_kwds["k"] = k
|
||||
|
||||
binning = mapclassify.classify(
|
||||
np.asarray(values[~nan_idx]), scheme, **classification_kwds
|
||||
)
|
||||
# set categorical to True for creating the legend
|
||||
categorical = True
|
||||
if legend_kwds is not None and "labels" in legend_kwds:
|
||||
if len(legend_kwds["labels"]) != binning.k:
|
||||
raise ValueError(
|
||||
"Number of labels must match number of bins, "
|
||||
"received {} labels for {} bins".format(
|
||||
len(legend_kwds["labels"]), binning.k
|
||||
)
|
||||
)
|
||||
else:
|
||||
labels = list(legend_kwds.pop("labels"))
|
||||
else:
|
||||
fmt = "{:.2f}"
|
||||
if legend_kwds is not None and "fmt" in legend_kwds:
|
||||
fmt = legend_kwds.pop("fmt")
|
||||
|
||||
labels = binning.get_legend_classes(fmt)
|
||||
if legend_kwds is not None:
|
||||
show_interval = legend_kwds.pop("interval", False)
|
||||
else:
|
||||
show_interval = False
|
||||
if not show_interval:
|
||||
labels = [c[1:-1] for c in labels]
|
||||
|
||||
values = pd.Categorical(
|
||||
[np.nan] * len(values), categories=binning.bins, ordered=True
|
||||
)
|
||||
values[~nan_idx] = pd.Categorical.from_codes(
|
||||
binning.yb, categories=binning.bins, ordered=True
|
||||
)
|
||||
if cmap is None:
|
||||
cmap = "viridis"
|
||||
|
||||
# Define `values` as a Series
|
||||
if categorical:
|
||||
if cmap is None:
|
||||
cmap = "tab10"
|
||||
|
||||
cat = pd.Categorical(values, categories=categories)
|
||||
categories = list(cat.categories)
|
||||
|
||||
# values missing in the Categorical but not in original values
|
||||
missing = list(np.unique(values[~nan_idx & cat.isna()]))
|
||||
if missing:
|
||||
raise ValueError(
|
||||
"Column contains values not listed in categories. "
|
||||
"Missing categories: {}.".format(missing)
|
||||
)
|
||||
|
||||
values = cat.codes[~nan_idx]
|
||||
vmin = 0 if vmin is None else vmin
|
||||
vmax = len(categories) - 1 if vmax is None else vmax
|
||||
|
||||
# fill values with placeholder where were NaNs originally to map them properly
|
||||
# (after removing them in categorical or scheme)
|
||||
if categorical:
|
||||
for n in np.where(nan_idx)[0]:
|
||||
values = np.insert(values, n, values[0])
|
||||
|
||||
mn = values[~np.isnan(values)].min() if vmin is None else vmin
|
||||
mx = values[~np.isnan(values)].max() if vmax is None else vmax
|
||||
|
||||
# decompose GeometryCollections
|
||||
geoms, multiindex = _sanitize_geoms(df.geometry, prefix="Geom")
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
nan_idx = np.take(nan_idx, multiindex, axis=0)
|
||||
expl_series = geopandas.GeoSeries(geoms)
|
||||
|
||||
geom_types = expl_series.geom_type
|
||||
poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
|
||||
line_idx = np.asarray(
|
||||
(geom_types == "LineString")
|
||||
| (geom_types == "MultiLineString")
|
||||
| (geom_types == "LinearRing")
|
||||
)
|
||||
point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
|
||||
|
||||
# plot all Polygons and all MultiPolygon components in the same collection
|
||||
polys = expl_series[poly_idx & np.invert(nan_idx)]
|
||||
subset = values[poly_idx & np.invert(nan_idx)]
|
||||
if not polys.empty:
|
||||
_plot_polygon_collection(
|
||||
ax,
|
||||
polys,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
cmap=cmap,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# plot all LineStrings and MultiLineString components in same collection
|
||||
lines = expl_series[line_idx & np.invert(nan_idx)]
|
||||
subset = values[line_idx & np.invert(nan_idx)]
|
||||
if not lines.empty:
|
||||
_plot_linestring_collection(
|
||||
ax,
|
||||
lines,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
cmap=cmap,
|
||||
autolim=autolim,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# plot all Points in the same collection
|
||||
points = expl_series[point_idx & np.invert(nan_idx)]
|
||||
subset = values[point_idx & np.invert(nan_idx)]
|
||||
if not points.empty:
|
||||
if isinstance(markersize, np.ndarray):
|
||||
markersize = np.take(markersize, multiindex, axis=0)
|
||||
markersize = markersize[point_idx & np.invert(nan_idx)]
|
||||
_plot_point_collection(
|
||||
ax,
|
||||
points,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
markersize=markersize,
|
||||
cmap=cmap,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
missing_data = not expl_series[nan_idx].empty
|
||||
if missing_kwds is not None and missing_data:
|
||||
if color:
|
||||
if "color" not in missing_kwds:
|
||||
missing_kwds["color"] = color
|
||||
|
||||
merged_kwds = style_kwds.copy()
|
||||
merged_kwds.update(missing_kwds)
|
||||
|
||||
plot_series(expl_series[nan_idx], ax=ax, **merged_kwds)
|
||||
|
||||
if legend and not color:
|
||||
if legend_kwds is None:
|
||||
legend_kwds = {}
|
||||
if "fmt" in legend_kwds:
|
||||
legend_kwds.pop("fmt")
|
||||
|
||||
from matplotlib import cm
|
||||
from matplotlib.colors import Normalize
|
||||
from matplotlib.lines import Line2D
|
||||
|
||||
norm = style_kwds.get("norm", None)
|
||||
if not norm:
|
||||
norm = Normalize(vmin=mn, vmax=mx)
|
||||
n_cmap = cm.ScalarMappable(norm=norm, cmap=cmap)
|
||||
if categorical:
|
||||
if scheme is not None:
|
||||
categories = labels
|
||||
patches = []
|
||||
for i in range(len(categories)):
|
||||
patches.append(
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
linestyle="none",
|
||||
marker="o",
|
||||
alpha=style_kwds.get("alpha", 1),
|
||||
markersize=10,
|
||||
markerfacecolor=n_cmap.to_rgba(i),
|
||||
markeredgewidth=0,
|
||||
)
|
||||
)
|
||||
if missing_kwds is not None and missing_data:
|
||||
if "color" in merged_kwds:
|
||||
merged_kwds["facecolor"] = merged_kwds["color"]
|
||||
patches.append(
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
linestyle="none",
|
||||
marker="o",
|
||||
alpha=merged_kwds.get("alpha", 1),
|
||||
markersize=10,
|
||||
markerfacecolor=merged_kwds.get("facecolor", None),
|
||||
markeredgecolor=merged_kwds.get("edgecolor", None),
|
||||
markeredgewidth=merged_kwds.get(
|
||||
"linewidth", 1 if merged_kwds.get("edgecolor", False) else 0
|
||||
),
|
||||
)
|
||||
)
|
||||
categories.append(merged_kwds.get("label", "NaN"))
|
||||
legend_kwds.setdefault("numpoints", 1)
|
||||
legend_kwds.setdefault("loc", "best")
|
||||
legend_kwds.setdefault("handles", patches)
|
||||
legend_kwds.setdefault("labels", categories)
|
||||
ax.legend(**legend_kwds)
|
||||
else:
|
||||
if cax is not None:
|
||||
legend_kwds.setdefault("cax", cax)
|
||||
else:
|
||||
legend_kwds.setdefault("ax", ax)
|
||||
|
||||
n_cmap.set_array(np.array([]))
|
||||
ax.get_figure().colorbar(n_cmap, **legend_kwds)
|
||||
|
||||
ax.figure.canvas.draw_idle()
|
||||
return ax
|
||||
|
||||
|
||||
@doc(plot_dataframe)
|
||||
class GeoplotAccessor(PlotAccessor):
|
||||
_pandas_kinds = PlotAccessor._all_kinds
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
data = self._parent.copy()
|
||||
kind = kwargs.pop("kind", "geo")
|
||||
if kind == "geo":
|
||||
return plot_dataframe(data, *args, **kwargs)
|
||||
if kind in self._pandas_kinds:
|
||||
# Access pandas plots
|
||||
return PlotAccessor(data)(kind=kind, **kwargs)
|
||||
else:
|
||||
# raise error
|
||||
raise ValueError(f"{kind} is not a valid plot kind")
|
||||
|
||||
def geo(self, *args, **kwargs):
|
||||
return self(kind="geo", *args, **kwargs) # noqa: B026
|
||||
505
.venv/lib/python3.12/site-packages/geopandas/sindex.py
Normal file
505
.venv/lib/python3.12/site-packages/geopandas/sindex.py
Normal file
@@ -0,0 +1,505 @@
|
||||
import numpy as np
|
||||
|
||||
import shapely
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from . import _compat as compat
|
||||
from . import array, geoseries
|
||||
|
||||
PREDICATES = {p.name for p in shapely.strtree.BinaryPredicate} | {None}
|
||||
|
||||
if compat.GEOS_GE_310:
|
||||
PREDICATES.update(["dwithin"])
|
||||
|
||||
|
||||
class SpatialIndex:
|
||||
"""A simple wrapper around Shapely's STRTree.
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : np.array of Shapely geometries
|
||||
Geometries from which to build the spatial index.
|
||||
"""
|
||||
|
||||
def __init__(self, geometry):
|
||||
# set empty geometries to None to avoid segfault on GEOS <= 3.6
|
||||
# see:
|
||||
# https://github.com/pygeos/pygeos/issues/146
|
||||
# https://github.com/pygeos/pygeos/issues/147
|
||||
non_empty = geometry.copy()
|
||||
non_empty[shapely.is_empty(non_empty)] = None
|
||||
# set empty geometries to None to maintain indexing
|
||||
self._tree = shapely.STRtree(non_empty)
|
||||
# store geometries, including empty geometries for user access
|
||||
self.geometries = geometry.copy()
|
||||
|
||||
@property
|
||||
def valid_query_predicates(self):
|
||||
"""Returns valid predicates for the spatial index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
set
|
||||
Set of valid predicates for this spatial index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries([Point(0, 0), Point(1, 1)])
|
||||
>>> s.sindex.valid_query_predicates # doctest: +SKIP
|
||||
{None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "dwithin", "intersects", "overlaps", "touches", "within"}
|
||||
"""
|
||||
return PREDICATES
|
||||
|
||||
def query(
|
||||
self, geometry, predicate=None, sort=False, distance=None, output_format="tuple"
|
||||
):
|
||||
"""
|
||||
Return the integer indices of all combinations of each input geometry
|
||||
and tree geometries where the bounding box of each input geometry
|
||||
intersects the bounding box of a tree geometry.
|
||||
|
||||
If the input geometry is a scalar, this returns an array of shape (n, ) with
|
||||
the indices of the matching tree geometries. If the input geometry is an
|
||||
array_like, this returns an array with shape (2,n) where the subarrays
|
||||
correspond to the indices of the input geometries and indices of the
|
||||
tree geometries associated with each. To generate an array of pairs of
|
||||
input geometry index and tree geometry index, simply transpose the
|
||||
result.
|
||||
|
||||
If a predicate is provided, the tree geometries are first queried based
|
||||
on the bounding box of the input geometry and then are further filtered
|
||||
to those that meet the predicate when comparing the input geometry to
|
||||
the tree geometry: ``predicate(geometry, tree_geometry)``.
|
||||
|
||||
The 'dwithin' predicate requires GEOS >= 3.10.
|
||||
|
||||
Bounding boxes are limited to two dimensions and are axis-aligned
|
||||
(equivalent to the ``bounds`` property of a geometry); any Z values
|
||||
present in input geometries are ignored when querying the tree.
|
||||
|
||||
Any input geometry that is None or empty will never match geometries in
|
||||
the tree.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : shapely.Geometry or array-like of geometries \
|
||||
(numpy.ndarray, GeoSeries, GeometryArray)
|
||||
A single shapely geometry or array of geometries to query against
|
||||
the spatial index. For array-like, accepts both GeoPandas geometry
|
||||
iterables (GeoSeries, GeometryArray) or a numpy array of Shapely
|
||||
geometries.
|
||||
predicate : {None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "intersects", "overlaps", "touches", "within", "dwithin"}, optional
|
||||
If predicate is provided, the input geometries are tested
|
||||
using the predicate function against each item in the tree
|
||||
whose extent intersects the envelope of the input geometry:
|
||||
``predicate(input_geometry, tree_geometry)``.
|
||||
If possible, prepared geometries are used to help speed up the
|
||||
predicate operation.
|
||||
sort : bool, default False
|
||||
If True, the results will be sorted in ascending order. In case
|
||||
of 2D array, the result is sorted lexicographically using the
|
||||
geometries' indexes as the primary key and the sindex's indexes
|
||||
as the secondary key.
|
||||
If False, no additional sorting is applied (results are often
|
||||
sorted but there is no guarantee).
|
||||
distance : number or array_like, optional
|
||||
Distances around each input geometry within which to query the tree for
|
||||
the 'dwithin' predicate. If array_like, shape must be broadcastable to shape
|
||||
of geometry. Required if ``predicate='dwithin'``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray with shape (n,) if geometry is a scalar
|
||||
Integer indices for matching geometries from the spatial index
|
||||
tree geometries.
|
||||
|
||||
OR
|
||||
|
||||
ndarray with shape (2, n) if geometry is an array_like
|
||||
The first subarray contains input geometry integer indices.
|
||||
The second subarray contains tree geometry integer indices.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
Querying the tree with a scalar geometry:
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3))
|
||||
array([1, 2, 3])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="contains")
|
||||
array([2])
|
||||
|
||||
Querying the tree with an array of geometries:
|
||||
|
||||
>>> s2 = geopandas.GeoSeries([box(2, 2, 4, 4), box(5, 5, 6, 6)])
|
||||
>>> s2
|
||||
0 POLYGON ((4 2, 4 4, 2 4, 2 2, 4 2))
|
||||
1 POLYGON ((6 5, 6 6, 5 6, 5 5, 6 5))
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.query(s2)
|
||||
array([[0, 0, 0, 1, 1],
|
||||
[2, 3, 4, 5, 6]])
|
||||
|
||||
>>> s.sindex.query(s2, predicate="contains")
|
||||
array([[0],
|
||||
[3]])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="dwithin", distance=0)
|
||||
array([1, 2, 3])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="dwithin", distance=2)
|
||||
array([0, 1, 2, 3, 4])
|
||||
|
||||
Notes
|
||||
-----
|
||||
In the context of a spatial join, input geometries are the "left"
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are "right" geometries that are joined against the left geometries. This
|
||||
effectively performs an inner join, where only those combinations of
|
||||
geometries that can be joined based on overlapping bounding boxes or
|
||||
optional predicate are returned.
|
||||
"""
|
||||
if predicate not in self.valid_query_predicates:
|
||||
if predicate == "dwithin":
|
||||
raise ValueError("predicate = 'dwithin' requires GEOS >= 3.10.0")
|
||||
|
||||
raise ValueError(
|
||||
"Got predicate='{}'; ".format(predicate)
|
||||
+ "`predicate` must be one of {}".format(self.valid_query_predicates)
|
||||
)
|
||||
|
||||
# distance argument requirement of predicate `dwithin`
|
||||
# and only valid for predicate `dwithin`
|
||||
kwargs = {}
|
||||
if predicate == "dwithin":
|
||||
if distance is None:
|
||||
# the distance parameter is needed
|
||||
raise ValueError(
|
||||
"'distance' parameter is required for 'dwithin' predicate"
|
||||
)
|
||||
# add distance to kwargs
|
||||
kwargs["distance"] = distance
|
||||
|
||||
elif distance is not None:
|
||||
# distance parameter is invalid
|
||||
raise ValueError(
|
||||
"'distance' parameter is only supported in combination with "
|
||||
"'dwithin' predicate"
|
||||
)
|
||||
|
||||
geometry = self._as_geometry_array(geometry)
|
||||
|
||||
indices = self._tree.query(geometry, predicate=predicate, **kwargs)
|
||||
|
||||
if output_format != "tuple":
|
||||
sort = True
|
||||
|
||||
if sort:
|
||||
if indices.ndim == 1:
|
||||
indices = np.sort(indices)
|
||||
else:
|
||||
# sort by first array (geometry) and then second (tree)
|
||||
geo_idx, tree_idx = indices
|
||||
sort_indexer = np.lexsort((tree_idx, geo_idx))
|
||||
indices = np.vstack((geo_idx[sort_indexer], tree_idx[sort_indexer]))
|
||||
|
||||
if output_format == "sparse":
|
||||
from scipy.sparse import coo_array
|
||||
|
||||
return coo_array(
|
||||
(np.ones(len(indices[0]), dtype=np.bool_), indices),
|
||||
shape=(len(self.geometries), len(geometry)),
|
||||
dtype=np.bool_,
|
||||
)
|
||||
|
||||
if output_format == "dense":
|
||||
dense = np.zeros((len(self.geometries), len(geometry)), dtype=bool)
|
||||
dense[indices] = True
|
||||
return dense
|
||||
|
||||
if output_format == "tuple":
|
||||
return indices
|
||||
|
||||
raise ValueError("Invalid output_format: {}".format(output_format))
|
||||
|
||||
@staticmethod
|
||||
def _as_geometry_array(geometry):
|
||||
"""Convert geometry into a numpy array of Shapely geometries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry
|
||||
An array-like of Shapely geometries, a GeoPandas GeoSeries/GeometryArray,
|
||||
shapely.geometry or list of shapely geometries.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
A numpy array of Shapely geometries.
|
||||
"""
|
||||
if isinstance(geometry, np.ndarray):
|
||||
return array.from_shapely(geometry)._data
|
||||
elif isinstance(geometry, geoseries.GeoSeries):
|
||||
return geometry.values._data
|
||||
elif isinstance(geometry, array.GeometryArray):
|
||||
return geometry._data
|
||||
elif isinstance(geometry, BaseGeometry):
|
||||
return geometry
|
||||
elif geometry is None:
|
||||
return None
|
||||
else:
|
||||
return np.asarray(geometry)
|
||||
|
||||
def nearest(
|
||||
self,
|
||||
geometry,
|
||||
return_all=True,
|
||||
max_distance=None,
|
||||
return_distance=False,
|
||||
exclusive=False,
|
||||
):
|
||||
"""
|
||||
Return the nearest geometry in the tree for each input geometry in
|
||||
``geometry``.
|
||||
|
||||
If multiple tree geometries have the same distance from an input geometry,
|
||||
multiple results will be returned for that input geometry by default.
|
||||
Specify ``return_all=False`` to only get a single nearest geometry
|
||||
(non-deterministic which nearest is returned).
|
||||
|
||||
In the context of a spatial join, input geometries are the "left"
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are "right" geometries that are joined against the left geometries.
|
||||
If ``max_distance`` is not set, this will effectively be a left join
|
||||
because every geometry in ``geometry`` will have a nearest geometry in
|
||||
the tree. However, if ``max_distance`` is used, this becomes an
|
||||
inner join, since some geometries in ``geometry`` may not have a match
|
||||
in the tree.
|
||||
|
||||
For performance reasons, it is highly recommended that you set
|
||||
the ``max_distance`` parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : {shapely.geometry, GeoSeries, GeometryArray, numpy.array of Shapely \
|
||||
geometries}
|
||||
A single shapely geometry, one of the GeoPandas geometry iterables
|
||||
(GeoSeries, GeometryArray), or a numpy array of Shapely geometries to query
|
||||
against the spatial index.
|
||||
return_all : bool, default True
|
||||
If there are multiple equidistant or intersecting nearest
|
||||
geometries, return all those geometries instead of a single
|
||||
nearest geometry.
|
||||
max_distance : float, optional
|
||||
Maximum distance within which to query for nearest items in tree.
|
||||
Must be greater than 0. By default None, indicating no distance limit.
|
||||
return_distance : bool, optional
|
||||
If True, will return distances in addition to indexes. By default False
|
||||
exclusive : bool, optional
|
||||
if True, the nearest geometries that are equal to the input geometry
|
||||
will not be returned. By default False. Requires Shapely >= 2.0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Indices or tuple of (indices, distances)
|
||||
Indices is an ndarray of shape (2,n) and distances (if present) an
|
||||
ndarray of shape (n).
|
||||
The first subarray of indices contains input geometry indices.
|
||||
The second subarray of indices contains tree geometry indices.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s.head()
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.nearest(Point(1, 1))
|
||||
array([[0],
|
||||
[1]])
|
||||
|
||||
>>> s.sindex.nearest([box(4.9, 4.9, 5.1, 5.1)])
|
||||
array([[0],
|
||||
[5]])
|
||||
|
||||
>>> s2 = geopandas.GeoSeries(geopandas.points_from_xy([7.6, 10], [7.6, 10]))
|
||||
>>> s2
|
||||
0 POINT (7.6 7.6)
|
||||
1 POINT (10 10)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.nearest(s2)
|
||||
array([[0, 1],
|
||||
[8, 9]])
|
||||
"""
|
||||
geometry = self._as_geometry_array(geometry)
|
||||
if isinstance(geometry, BaseGeometry) or geometry is None:
|
||||
geometry = [geometry]
|
||||
|
||||
result = self._tree.query_nearest(
|
||||
geometry,
|
||||
max_distance=max_distance,
|
||||
return_distance=return_distance,
|
||||
all_matches=return_all,
|
||||
exclusive=exclusive,
|
||||
)
|
||||
if return_distance:
|
||||
indices, distances = result
|
||||
else:
|
||||
indices = result
|
||||
|
||||
if return_distance:
|
||||
return indices, distances
|
||||
else:
|
||||
return indices
|
||||
|
||||
def intersection(self, coordinates):
|
||||
"""Compatibility wrapper for rtree.index.Index.intersection,
|
||||
use ``query`` instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
coordinates : sequence or array
|
||||
Sequence of the form (min_x, min_y, max_x, max_y)
|
||||
to query a rectangle or (x, y) to query a point.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.intersection(box(1, 1, 3, 3).bounds)
|
||||
array([1, 2, 3])
|
||||
|
||||
Alternatively, you can use ``query``:
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3))
|
||||
array([1, 2, 3])
|
||||
|
||||
"""
|
||||
# TODO: we should deprecate this
|
||||
# convert bounds to geometry
|
||||
# the old API uses tuples of bound, but Shapely uses geometries
|
||||
try:
|
||||
iter(coordinates)
|
||||
except TypeError:
|
||||
# likely not an iterable
|
||||
# this is a check that rtree does, we mimic it
|
||||
# to ensure a useful failure message
|
||||
raise TypeError(
|
||||
"Invalid coordinates, must be iterable in format "
|
||||
"(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
|
||||
"Got `coordinates` = {}.".format(coordinates)
|
||||
)
|
||||
|
||||
# need to convert tuple of bounds to a geometry object
|
||||
if len(coordinates) == 4:
|
||||
indexes = self._tree.query(shapely.box(*coordinates))
|
||||
elif len(coordinates) == 2:
|
||||
indexes = self._tree.query(shapely.points(*coordinates))
|
||||
else:
|
||||
raise TypeError(
|
||||
"Invalid coordinates, must be iterable in format "
|
||||
"(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
|
||||
"Got `coordinates` = {}.".format(coordinates)
|
||||
)
|
||||
|
||||
return indexes
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
"""Size of the spatial index
|
||||
|
||||
Number of leaves (input geometries) in the index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.size
|
||||
10
|
||||
"""
|
||||
return len(self._tree)
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
"""Check if the spatial index is empty
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0 0)
|
||||
1 POINT (1 1)
|
||||
2 POINT (2 2)
|
||||
3 POINT (3 3)
|
||||
4 POINT (4 4)
|
||||
5 POINT (5 5)
|
||||
6 POINT (6 6)
|
||||
7 POINT (7 7)
|
||||
8 POINT (8 8)
|
||||
9 POINT (9 9)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.is_empty
|
||||
False
|
||||
|
||||
>>> s2 = geopandas.GeoSeries()
|
||||
>>> s2.sindex.is_empty
|
||||
True
|
||||
"""
|
||||
return len(self._tree) == 0
|
||||
|
||||
def __len__(self):
|
||||
return len(self._tree)
|
||||
358
.venv/lib/python3.12/site-packages/geopandas/testing.py
Normal file
358
.venv/lib/python3.12/site-packages/geopandas/testing.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
Testing functionality for geopandas objects.
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas.array import GeometryDtype
|
||||
|
||||
|
||||
def _isna(this):
|
||||
"""isna version that works for both scalars and (Geo)Series"""
|
||||
with warnings.catch_warnings():
|
||||
# GeoSeries.isna will raise a warning about no longer returning True
|
||||
# for empty geometries. This helper is used below always in combination
|
||||
# with an is_empty check to preserve behaviour, and thus we ignore the
|
||||
# warning here to avoid it bubbling up to the user
|
||||
warnings.filterwarnings(
|
||||
"ignore", r"GeoSeries.isna\(\) previously returned", UserWarning
|
||||
)
|
||||
if hasattr(this, "isna"):
|
||||
return this.isna()
|
||||
elif hasattr(this, "isnull"):
|
||||
return this.isnull()
|
||||
else:
|
||||
return pd.isnull(this)
|
||||
|
||||
|
||||
def _geom_equals_mask(this, that):
|
||||
"""
|
||||
Test for geometric equality. Empty or missing geometries are considered
|
||||
equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
attribute)
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
boolean Series, True if geometries in left equal geometries in right
|
||||
"""
|
||||
|
||||
return (
|
||||
this.geom_equals(that)
|
||||
| (this.is_empty & that.is_empty)
|
||||
| (_isna(this) & _isna(that))
|
||||
)
|
||||
|
||||
|
||||
def geom_equals(this, that):
|
||||
"""
|
||||
Test for geometric equality. Empty or missing geometries are considered
|
||||
equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
attribute)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all geometries in left equal geometries in right
|
||||
"""
|
||||
|
||||
return _geom_equals_mask(this, that).all()
|
||||
|
||||
|
||||
def _geom_almost_equals_mask(this, that):
|
||||
"""
|
||||
Test for 'almost' geometric equality. Empty or missing geometries
|
||||
considered equal.
|
||||
|
||||
This method allows small difference in the coordinates, but this
|
||||
requires coordinates be in the same order for all components of a geometry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
boolean Series, True if geometries in left almost equal geometries in right
|
||||
"""
|
||||
|
||||
return (
|
||||
this.geom_equals_exact(that, tolerance=0.5 * 10 ** (-6))
|
||||
| (this.is_empty & that.is_empty)
|
||||
| (_isna(this) & _isna(that))
|
||||
)
|
||||
|
||||
|
||||
def geom_almost_equals(this, that):
|
||||
"""
|
||||
Test for 'almost' geometric equality. Empty or missing geometries
|
||||
considered equal.
|
||||
|
||||
This method allows small difference in the coordinates, but this
|
||||
requires coordinates be in the same order for all components of a geometry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
property)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all geometries in left almost equal geometries in right
|
||||
"""
|
||||
if isinstance(this, GeoDataFrame) and isinstance(that, GeoDataFrame):
|
||||
this = this.geometry
|
||||
that = that.geometry
|
||||
|
||||
return _geom_almost_equals_mask(this, that).all()
|
||||
|
||||
|
||||
def assert_geoseries_equal(
|
||||
left,
|
||||
right,
|
||||
check_dtype=True,
|
||||
check_index_type=False,
|
||||
check_series_type=True,
|
||||
check_less_precise=False,
|
||||
check_geom_type=False,
|
||||
check_crs=True,
|
||||
normalize=False,
|
||||
):
|
||||
"""
|
||||
Test util for checking that two GeoSeries are equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : two GeoSeries
|
||||
check_dtype : bool, default False
|
||||
If True, check geo dtype [only included so it's a drop-in replacement
|
||||
for assert_series_equal].
|
||||
check_index_type : bool, default False
|
||||
Check that index types are equal.
|
||||
check_series_type : bool, default True
|
||||
Check that both are same type (*and* are GeoSeries). If False,
|
||||
will attempt to convert both into GeoSeries.
|
||||
check_less_precise : bool, default False
|
||||
If True, use geom_equals_exact with relative error of 0.5e-6.
|
||||
If False, use geom_equals.
|
||||
check_geom_type : bool, default False
|
||||
If True, check that all the geom types are equal.
|
||||
check_crs: bool, default True
|
||||
If `check_series_type` is True, then also check that the
|
||||
crs matches.
|
||||
normalize: bool, default False
|
||||
If True, normalize the geometries before comparing equality.
|
||||
Typically useful with ``check_less_precise=True``, which uses
|
||||
``geom_equals_exact`` and requires exact coordinate order.
|
||||
"""
|
||||
assert len(left) == len(right), "%d != %d" % (len(left), len(right))
|
||||
|
||||
if check_dtype:
|
||||
msg = "dtype should be a GeometryDtype, got {0}"
|
||||
assert isinstance(left.dtype, GeometryDtype), msg.format(left.dtype)
|
||||
assert isinstance(right.dtype, GeometryDtype), msg.format(left.dtype)
|
||||
|
||||
if check_index_type:
|
||||
assert isinstance(left.index, type(right.index))
|
||||
|
||||
if check_series_type:
|
||||
assert isinstance(left, GeoSeries)
|
||||
assert isinstance(left, type(right))
|
||||
|
||||
if check_crs:
|
||||
assert left.crs == right.crs
|
||||
else:
|
||||
if not isinstance(left, GeoSeries):
|
||||
left = GeoSeries(left)
|
||||
if not isinstance(right, GeoSeries):
|
||||
right = GeoSeries(right, index=left.index)
|
||||
|
||||
assert left.index.equals(right.index), "index: %s != %s" % (left.index, right.index)
|
||||
|
||||
if check_geom_type:
|
||||
assert (left.geom_type == right.geom_type).all(), "type: %s != %s" % (
|
||||
left.geom_type,
|
||||
right.geom_type,
|
||||
)
|
||||
|
||||
if normalize:
|
||||
left = GeoSeries(left.array.normalize())
|
||||
right = GeoSeries(right.array.normalize())
|
||||
|
||||
if not check_crs:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", "CRS mismatch", UserWarning)
|
||||
_check_equality(left, right, check_less_precise)
|
||||
else:
|
||||
_check_equality(left, right, check_less_precise)
|
||||
|
||||
|
||||
def _truncated_string(geom):
|
||||
"""Truncated WKT repr of geom"""
|
||||
s = str(geom)
|
||||
if len(s) > 100:
|
||||
return s[:100] + "..."
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def _check_equality(left, right, check_less_precise):
|
||||
assert_error_message = (
|
||||
"{0} out of {1} geometries are not {3}equal.\n"
|
||||
"Indices where geometries are not {3}equal: {2} \n"
|
||||
"The first not {3}equal geometry:\n"
|
||||
"Left: {4}\n"
|
||||
"Right: {5}\n"
|
||||
)
|
||||
if check_less_precise:
|
||||
precise = "almost "
|
||||
equal = _geom_almost_equals_mask(left, right)
|
||||
else:
|
||||
precise = ""
|
||||
equal = _geom_equals_mask(left, right)
|
||||
|
||||
if not equal.all():
|
||||
unequal_left_geoms = left[~equal]
|
||||
unequal_right_geoms = right[~equal]
|
||||
raise AssertionError(
|
||||
assert_error_message.format(
|
||||
len(unequal_left_geoms),
|
||||
len(left),
|
||||
unequal_left_geoms.index.to_list(),
|
||||
precise,
|
||||
_truncated_string(unequal_left_geoms.iloc[0]),
|
||||
_truncated_string(unequal_right_geoms.iloc[0]),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def assert_geodataframe_equal(
|
||||
left,
|
||||
right,
|
||||
check_dtype=True,
|
||||
check_index_type="equiv",
|
||||
check_column_type="equiv",
|
||||
check_frame_type=True,
|
||||
check_like=False,
|
||||
check_less_precise=False,
|
||||
check_geom_type=False,
|
||||
check_crs=True,
|
||||
normalize=False,
|
||||
):
|
||||
"""
|
||||
Check that two GeoDataFrames are equal/
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : two GeoDataFrames
|
||||
check_dtype : bool, default True
|
||||
Whether to check the DataFrame dtype is identical.
|
||||
check_index_type, check_column_type : bool, default 'equiv'
|
||||
Check that index types are equal.
|
||||
check_frame_type : bool, default True
|
||||
Check that both are same type (*and* are GeoDataFrames). If False,
|
||||
will attempt to convert both into GeoDataFrame.
|
||||
check_like : bool, default False
|
||||
If true, ignore the order of rows & columns
|
||||
check_less_precise : bool, default False
|
||||
If True, use geom_equals_exact. if False, use geom_equals.
|
||||
check_geom_type : bool, default False
|
||||
If True, check that all the geom types are equal.
|
||||
check_crs: bool, default True
|
||||
If `check_frame_type` is True, then also check that the
|
||||
crs matches.
|
||||
normalize: bool, default False
|
||||
If True, normalize the geometries before comparing equality.
|
||||
Typically useful with ``check_less_precise=True``, which uses
|
||||
``geom_equals_exact`` and requires exact coordinate order.
|
||||
"""
|
||||
try:
|
||||
# added from pandas 0.20
|
||||
from pandas.testing import assert_frame_equal, assert_index_equal
|
||||
except ImportError:
|
||||
from pandas.util.testing import assert_frame_equal, assert_index_equal
|
||||
|
||||
# instance validation
|
||||
if check_frame_type:
|
||||
assert isinstance(left, GeoDataFrame)
|
||||
assert isinstance(left, type(right))
|
||||
|
||||
if check_crs:
|
||||
# allow if neither left and right has an active geometry column
|
||||
if (
|
||||
left._geometry_column_name is None
|
||||
and right._geometry_column_name is None
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
left._geometry_column_name not in left.columns
|
||||
and right._geometry_column_name not in right.columns
|
||||
):
|
||||
pass
|
||||
# no crs can be either None or {}
|
||||
elif not left.crs and not right.crs:
|
||||
pass
|
||||
else:
|
||||
assert left.crs == right.crs
|
||||
else:
|
||||
if not isinstance(left, GeoDataFrame):
|
||||
left = GeoDataFrame(left)
|
||||
if not isinstance(right, GeoDataFrame):
|
||||
right = GeoDataFrame(right)
|
||||
|
||||
# shape comparison
|
||||
assert left.shape == right.shape, (
|
||||
"GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n"
|
||||
"Left columns: {lcols!r}, right columns: {rcols!r}"
|
||||
).format(
|
||||
lshape=left.shape, rshape=right.shape, lcols=left.columns, rcols=right.columns
|
||||
)
|
||||
|
||||
if check_like:
|
||||
left = left.reindex_like(right)
|
||||
|
||||
# column comparison
|
||||
assert_index_equal(
|
||||
left.columns, right.columns, exact=check_column_type, obj="GeoDataFrame.columns"
|
||||
)
|
||||
|
||||
# geometry comparison
|
||||
for col, dtype in left.dtypes.items():
|
||||
if isinstance(dtype, GeometryDtype):
|
||||
assert_geoseries_equal(
|
||||
left[col],
|
||||
right[col],
|
||||
normalize=normalize,
|
||||
check_dtype=check_dtype,
|
||||
check_less_precise=check_less_precise,
|
||||
check_geom_type=check_geom_type,
|
||||
check_crs=check_crs,
|
||||
)
|
||||
|
||||
# ensure the active geometry column is the same
|
||||
assert left._geometry_column_name == right._geometry_column_name
|
||||
|
||||
# drop geometries and check remaining columns
|
||||
left2 = left.select_dtypes(exclude="geometry")
|
||||
right2 = right.select_dtypes(exclude="geometry")
|
||||
assert_frame_equal(
|
||||
left2,
|
||||
right2,
|
||||
check_dtype=check_dtype,
|
||||
check_index_type=check_index_type,
|
||||
check_column_type=check_column_type,
|
||||
obj="GeoDataFrame",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
|
||||
{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def test_no_additional_imports():
|
||||
# test that 'import geopandas' does not import any of the optional or
|
||||
# development dependencies
|
||||
blacklist = {
|
||||
"pytest",
|
||||
"py",
|
||||
"ipython",
|
||||
# fiona actually gets imported if installed (but error suppressed until used)
|
||||
# "fiona",
|
||||
# "matplotlib", # matplotlib gets imported by pandas, see below
|
||||
"mapclassify",
|
||||
"sqlalchemy",
|
||||
"psycopg",
|
||||
"psycopg2",
|
||||
"geopy",
|
||||
"geoalchemy2",
|
||||
"matplotlib",
|
||||
}
|
||||
|
||||
code = """
|
||||
import sys
|
||||
import geopandas
|
||||
blacklist = {0!r}
|
||||
|
||||
mods = blacklist & set(m.split('.')[0] for m in sys.modules)
|
||||
if mods:
|
||||
sys.stderr.write('err: geopandas should not import: {{}}'.format(', '.join(mods)))
|
||||
sys.exit(len(mods))
|
||||
""".format(
|
||||
blacklist
|
||||
)
|
||||
call = [sys.executable, "-c", code]
|
||||
returncode = subprocess.run(call, check=False).returncode
|
||||
assert returncode == 0
|
||||
1035
.venv/lib/python3.12/site-packages/geopandas/tests/test_array.py
Normal file
1035
.venv/lib/python3.12/site-packages/geopandas/tests/test_array.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
||||
from geopandas._compat import import_optional_dependency
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_import_optional_dependency_present():
|
||||
# pandas is not optional, but we know it is present
|
||||
pandas = import_optional_dependency("pandas")
|
||||
assert pandas is not None
|
||||
|
||||
# module imported normally must be same
|
||||
import pandas as pd
|
||||
|
||||
assert pandas == pd
|
||||
|
||||
|
||||
def test_import_optional_dependency_absent():
|
||||
with pytest.raises(ImportError, match="Missing optional dependency 'foo'"):
|
||||
import_optional_dependency("foo")
|
||||
|
||||
with pytest.raises(ImportError, match="foo is required"):
|
||||
import_optional_dependency("foo", extra="foo is required")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bad_import", [["foo"], 0, False, True, {}, {"foo"}, {"foo": "bar"}]
|
||||
)
|
||||
def test_import_optional_dependency_invalid(bad_import):
|
||||
with pytest.raises(ValueError, match="Invalid module name"):
|
||||
import_optional_dependency(bad_import)
|
||||
@@ -0,0 +1,47 @@
|
||||
import geopandas
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_options():
|
||||
assert "display_precision: " in repr(geopandas.options)
|
||||
|
||||
assert set(dir(geopandas.options)) == {
|
||||
"display_precision",
|
||||
"use_pygeos",
|
||||
"io_engine",
|
||||
}
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
geopandas.options.non_existing_option
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
geopandas.options.non_existing_option = 10
|
||||
|
||||
|
||||
def test_options_display_precision():
|
||||
assert geopandas.options.display_precision is None
|
||||
geopandas.options.display_precision = 5
|
||||
assert geopandas.options.display_precision == 5
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.display_precision = "abc"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.display_precision = -1
|
||||
|
||||
geopandas.options.display_precision = None
|
||||
|
||||
|
||||
def test_options_io_engine():
|
||||
assert geopandas.options.io_engine is None
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
assert geopandas.options.io_engine == "pyogrio"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.io_engine = "abc"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
geopandas.options.io_engine = -1
|
||||
|
||||
geopandas.options.io_engine = None
|
||||
747
.venv/lib/python3.12/site-packages/geopandas/tests/test_crs.py
Normal file
747
.venv/lib/python3.12/site-packages/geopandas/tests/test_crs.py
Normal file
@@ -0,0 +1,747 @@
|
||||
import random
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import LineString, Point, Polygon
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries, points_from_xy, read_file
|
||||
from geopandas.array import GeometryArray, from_shapely, from_wkb, from_wkt
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
|
||||
def _create_df(x, y=None, crs=None):
|
||||
y = y or x
|
||||
x = np.asarray(x)
|
||||
y = np.asarray(y)
|
||||
|
||||
return GeoDataFrame(
|
||||
{"geometry": points_from_xy(x, y), "value1": x + y, "value2": x * y}, crs=crs
|
||||
)
|
||||
|
||||
|
||||
def df_epsg26918():
|
||||
# EPSG:26918
|
||||
# Center coordinates
|
||||
# -1683723.64 6689139.23
|
||||
return _create_df(
|
||||
x=range(-1683723, -1683723 + 10, 1),
|
||||
y=range(6689139, 6689139 + 10, 1),
|
||||
crs="epsg:26918",
|
||||
)
|
||||
|
||||
|
||||
def test_to_crs_transform():
|
||||
df = df_epsg26918()
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_transform__missing_data():
|
||||
# https://github.com/geopandas/geopandas/issues/1573
|
||||
df = df_epsg26918()
|
||||
df.loc[3, "geometry"] = None
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_transform__empty_data():
|
||||
df = df_epsg26918().iloc[:0]
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_inplace():
|
||||
df = df_epsg26918()
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
df.to_crs(epsg=4326, inplace=True)
|
||||
assert_geodataframe_equal(df, lonlat, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_geo_column_name():
|
||||
# Test to_crs() with different geometry column name (GH#339)
|
||||
df = df_epsg26918()
|
||||
df = df.rename(columns={"geometry": "geom"})
|
||||
df.set_geometry("geom", inplace=True)
|
||||
lonlat = df.to_crs(epsg=4326)
|
||||
utm = lonlat.to_crs(epsg=26918)
|
||||
assert lonlat.geometry.name == "geom"
|
||||
assert utm.geometry.name == "geom"
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
def test_to_crs_dimension_z():
|
||||
# preserve z dimension
|
||||
arr = points_from_xy([1, 2], [2, 3], [3, 4], crs=4326)
|
||||
assert arr.has_z.all()
|
||||
result = arr.to_crs(epsg=3857)
|
||||
assert result.has_z.all()
|
||||
|
||||
|
||||
# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
|
||||
# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
|
||||
@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
|
||||
def test_to_crs_dimension_mixed():
|
||||
s = GeoSeries([Point(1, 2), LineString([(1, 2, 3), (4, 5, 6)])], crs=2056)
|
||||
result = s.to_crs(epsg=4326)
|
||||
assert not result[0].is_empty
|
||||
assert result.has_z.tolist() == [False, True]
|
||||
roundtrip = result.to_crs(epsg=2056)
|
||||
# TODO replace with assert_geoseries_equal once we expose tolerance keyword
|
||||
# assert_geoseries_equal(roundtrip, s, check_less_precise=True)
|
||||
for a, b in zip(roundtrip, s):
|
||||
np.testing.assert_allclose(a.coords[:], b.coords[:], atol=0.01)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Test different supported formats for CRS specification
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
4326,
|
||||
"epsg:4326",
|
||||
pytest.param(
|
||||
{"init": "epsg:4326"},
|
||||
),
|
||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs",
|
||||
{"proj": "latlong", "ellps": "WGS84", "datum": "WGS84", "no_defs": True},
|
||||
],
|
||||
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
|
||||
)
|
||||
def epsg4326(request):
|
||||
if isinstance(request.param, int):
|
||||
return {"epsg": request.param}
|
||||
return {"crs": request.param}
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
26918,
|
||||
"epsg:26918",
|
||||
pytest.param(
|
||||
{"init": "epsg:26918", "no_defs": True},
|
||||
),
|
||||
"+proj=utm +zone=18 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ",
|
||||
{"proj": "utm", "zone": 18, "datum": "NAD83", "units": "m", "no_defs": True},
|
||||
],
|
||||
ids=["epsg_number", "epsg_string", "epsg_dict", "proj4_string", "proj4_dict"],
|
||||
)
|
||||
def epsg26918(request):
|
||||
if isinstance(request.param, int):
|
||||
return {"epsg": request.param}
|
||||
return {"crs": request.param}
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:'\\+init:DeprecationWarning")
|
||||
@pytest.mark.filterwarnings("ignore:'\\+init:FutureWarning")
|
||||
def test_transform2(epsg4326, epsg26918):
|
||||
# with PROJ >= 7, the transformation using EPSG code vs proj4 string is
|
||||
# slightly different due to use of grid files or not -> turn off network
|
||||
# to not use grid files at all for this test
|
||||
pyproj.network.set_network_enabled(False)
|
||||
|
||||
df = df_epsg26918()
|
||||
lonlat = df.to_crs(**epsg4326)
|
||||
utm = lonlat.to_crs(**epsg26918)
|
||||
# can't check for CRS equality, as the formats differ although representing
|
||||
# the same CRS
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True, check_crs=False)
|
||||
|
||||
|
||||
# pyproj + numpy 1.25 trigger warning for single-element array -> recommdation is to
|
||||
# ignore the warning for now (https://github.com/pyproj4/pyproj/issues/1307)
|
||||
@pytest.mark.filterwarnings("ignore:Conversion of an array with:DeprecationWarning")
|
||||
def test_crs_axis_order__always_xy():
|
||||
df = GeoDataFrame(geometry=[Point(-1683723, 6689139)], crs="epsg:26918")
|
||||
lonlat = df.to_crs("epsg:4326")
|
||||
test_lonlat = GeoDataFrame(
|
||||
geometry=[Point(-110.1399901, 55.1350011)], crs="epsg:4326"
|
||||
)
|
||||
assert_geodataframe_equal(lonlat, test_lonlat, check_less_precise=True)
|
||||
|
||||
|
||||
def test_skip_exact_same():
|
||||
df = df_epsg26918()
|
||||
utm = df.to_crs(df.crs)
|
||||
assert_geodataframe_equal(df, utm, check_less_precise=True)
|
||||
|
||||
|
||||
# Test CRS on GeometryArray level
|
||||
class TestGeometryArrayCRS:
|
||||
def setup_method(self):
|
||||
self.osgb = pyproj.CRS(27700)
|
||||
self.wgs = pyproj.CRS(4326)
|
||||
|
||||
self.geoms = [Point(0, 0), Point(1, 1)]
|
||||
self.polys = [
|
||||
Polygon([(random.random(), random.random()) for i in range(3)])
|
||||
for _ in range(10)
|
||||
]
|
||||
self.arr = from_shapely(self.polys, crs=27700)
|
||||
|
||||
def test_array(self):
|
||||
arr = from_shapely(self.geoms)
|
||||
arr.crs = 27700
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
arr = GeometryArray(arr)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
arr = GeometryArray(arr, crs=4326)
|
||||
assert arr.crs == self.wgs
|
||||
|
||||
def test_series(self):
|
||||
s = GeoSeries(crs=27700)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
# manually change CRS
|
||||
s = s.set_crs(4326, allow_override=True)
|
||||
assert s.crs == self.wgs
|
||||
assert s.values.crs == self.wgs
|
||||
|
||||
s = GeoSeries(self.geoms, crs=27700)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(arr)
|
||||
assert s.crs == self.osgb
|
||||
assert s.values.crs == self.osgb
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="CRS mismatch between CRS of the passed geometries and 'crs'",
|
||||
):
|
||||
s = GeoSeries(arr, crs=4326)
|
||||
assert s.crs == self.osgb
|
||||
|
||||
def test_dataframe(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame(geometry=arr)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame(geometry=s)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# different passed CRS than array CRS is now an error
|
||||
match_str = "CRS mismatch between CRS of the passed geometries and 'crs'"
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
df = GeoDataFrame(geometry=s, crs=4326)
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
GeoDataFrame(geometry=s, crs=4326)
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326)
|
||||
with pytest.raises(ValueError, match=match_str):
|
||||
GeoDataFrame(df, crs=4326).crs
|
||||
|
||||
# manually change CRS
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame(geometry=s)
|
||||
df = df.set_crs(crs="epsg:4326", allow_override=True)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
|
||||
GeoDataFrame(self.geoms, columns=["geom"], crs=27700)
|
||||
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
|
||||
GeoDataFrame(crs=27700)
|
||||
|
||||
df = GeoDataFrame(self.geoms, columns=["geom"])
|
||||
df = df.set_geometry("geom", crs=27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
assert df.geom.crs == self.osgb
|
||||
assert df.geom.values.crs == self.osgb
|
||||
|
||||
df = GeoDataFrame(geometry=self.geoms, crs=27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# new geometry with set CRS has priority over GDF CRS
|
||||
df = GeoDataFrame(geometry=self.geoms, crs=27700)
|
||||
df = df.set_geometry(self.geoms, crs=4326)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
df = df.set_geometry(s)
|
||||
assert df._geometry_column_name == "geometry"
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
df = df.set_geometry(arr)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
arr = from_shapely(self.geoms, crs=4326)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr})
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
# geometry column name None on init
|
||||
df = GeoDataFrame({"geometry": [0, 1]})
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Assigning CRS to a GeoDataFrame without a geometry",
|
||||
):
|
||||
df.crs = 27700
|
||||
|
||||
# geometry column without geometry
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", "Geometry column does not contain geometry", UserWarning
|
||||
)
|
||||
df = GeoDataFrame({"geometry": [Point(0, 1)]}).assign(geometry=[0])
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Assigning CRS to a GeoDataFrame without an active geometry",
|
||||
):
|
||||
df.crs = 27700
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match="The CRS attribute of a GeoDataFrame without an active",
|
||||
):
|
||||
assert df.crs == self.osgb
|
||||
|
||||
def test_dataframe_getitem_without_geometry_column(self):
|
||||
df = GeoDataFrame({"col": range(10)}, geometry=self.arr)
|
||||
df["geom2"] = df.geometry.centroid
|
||||
subset = df[["col", "geom2"]]
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match="The CRS attribute of a GeoDataFrame without an active",
|
||||
):
|
||||
assert subset.crs == self.osgb
|
||||
|
||||
def test_dataframe_setitem(self):
|
||||
# new geometry CRS has priority over GDF CRS
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
with pytest.warns(
|
||||
FutureWarning, match="You are adding a column named 'geometry'"
|
||||
):
|
||||
df["geometry"] = s
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame()
|
||||
with pytest.warns(
|
||||
FutureWarning, match="You are adding a column named 'geometry'"
|
||||
):
|
||||
df["geometry"] = arr
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# test to_crs case (GH1960)
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
|
||||
df["geometry"] = df["geometry"].to_crs(27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# test changing geometry crs not in the geometry column doesn't change the crs
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame(
|
||||
{"col1": [1, 2], "geometry": arr, "other_geom": arr}, crs=4326
|
||||
)
|
||||
df["other_geom"] = from_shapely(self.geoms, crs=27700)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df["geometry"].crs == self.wgs
|
||||
assert df["other_geom"].crs == self.osgb
|
||||
|
||||
def test_dataframe_setitem_without_geometry_column(self):
|
||||
arr = from_shapely(self.geoms)
|
||||
df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326)
|
||||
|
||||
# override geometry with non geometry
|
||||
with pytest.warns(UserWarning):
|
||||
df["geometry"] = 1
|
||||
|
||||
# assigning a list of geometry object doesn't have cached access to 4326
|
||||
df["geometry"] = self.geoms
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])]
|
||||
)
|
||||
def test_scalar(self, scalar):
|
||||
df = GeoDataFrame()
|
||||
with pytest.warns(
|
||||
FutureWarning, match="You are adding a column named 'geometry'"
|
||||
):
|
||||
df["geometry"] = scalar
|
||||
df = df.set_crs(4326)
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Accessing CRS")
|
||||
def test_crs_with_no_geom_fails(self):
|
||||
with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"):
|
||||
df = GeoDataFrame()
|
||||
df.crs = 4326
|
||||
|
||||
def test_read_file(self, nybb_filename):
|
||||
df = read_file(nybb_filename)
|
||||
assert df.crs == pyproj.CRS(2263)
|
||||
assert df.geometry.crs == pyproj.CRS(2263)
|
||||
assert df.geometry.values.crs == pyproj.CRS(2263)
|
||||
|
||||
def test_multiple_geoms(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
assert df.col1.crs == self.wgs
|
||||
assert df.col1.values.crs == self.wgs
|
||||
|
||||
def test_multiple_geoms_set_geom(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
df = df.set_geometry("col1")
|
||||
assert df.crs == self.wgs
|
||||
assert df.geometry.crs == self.wgs
|
||||
assert df.geometry.values.crs == self.wgs
|
||||
assert df["geometry"].crs == self.osgb
|
||||
assert df["geometry"].values.crs == self.osgb
|
||||
|
||||
def test_assign_cols(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
df["geom2"] = s
|
||||
df["geom3"] = s.values
|
||||
df["geom4"] = from_shapely(self.geoms)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
assert df.geom2.crs == self.wgs
|
||||
assert df.geom2.values.crs == self.wgs
|
||||
assert df.geom3.crs == self.wgs
|
||||
assert df.geom3.values.crs == self.wgs
|
||||
assert df.geom4.crs is None
|
||||
assert df.geom4.values.crs is None
|
||||
|
||||
def test_copy(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
|
||||
arr_copy = arr.copy()
|
||||
assert arr_copy.crs == arr.crs
|
||||
|
||||
s_copy = s.copy()
|
||||
assert s_copy.crs == s.crs
|
||||
assert s_copy.values.crs == s.values.crs
|
||||
|
||||
df_copy = df.copy()
|
||||
assert df_copy.crs == df.crs
|
||||
assert df_copy.geometry.crs == df.geometry.crs
|
||||
assert df_copy.geometry.values.crs == df.geometry.values.crs
|
||||
assert df_copy.col1.crs == df.col1.crs
|
||||
assert df_copy.col1.values.crs == df.col1.values.crs
|
||||
|
||||
def test_rename(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame(s, geometry=arr, columns=["col1"])
|
||||
df = df.rename(columns={"geometry": "geom"}).set_geometry("geom")
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
df = df.rename_geometry("geom2")
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
df = df.rename(columns={"col1": "column1"})
|
||||
assert df.column1.crs == self.wgs
|
||||
assert df.column1.values.crs == self.wgs
|
||||
|
||||
def test_geoseries_to_crs(self):
|
||||
s = GeoSeries(self.geoms, crs=27700)
|
||||
s = s.to_crs(4326)
|
||||
assert s.crs == self.wgs
|
||||
assert s.values.crs == self.wgs
|
||||
|
||||
df = GeoDataFrame(geometry=s)
|
||||
assert df.crs == self.wgs
|
||||
df = df.to_crs(27700)
|
||||
assert df.crs == self.osgb
|
||||
assert df.geometry.crs == self.osgb
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
# make sure that only active geometry is transformed
|
||||
arr = from_shapely(self.geoms, crs=4326)
|
||||
df["col1"] = arr
|
||||
df = df.to_crs(3857)
|
||||
assert df.col1.crs == self.wgs
|
||||
assert df.col1.values.crs == self.wgs
|
||||
|
||||
def test_array_to_crs(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
arr = arr.to_crs(4326)
|
||||
assert arr.crs == self.wgs
|
||||
|
||||
def test_from_shapely(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
def test_from_wkb(self):
|
||||
L_wkb = [p.wkb for p in self.geoms]
|
||||
arr = from_wkb(L_wkb, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
def test_from_wkt(self):
|
||||
L_wkt = [p.wkt for p in self.geoms]
|
||||
arr = from_wkt(L_wkt, crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
def test_points_from_xy(self):
|
||||
df = pd.DataFrame([{"x": x, "y": x, "z": x} for x in range(10)])
|
||||
arr = points_from_xy(df["x"], df["y"], crs=27700)
|
||||
assert arr.crs == self.osgb
|
||||
|
||||
# setting CRS in GeoSeries should not set it in passed array without CRS
|
||||
def test_original(self):
|
||||
arr = from_shapely(self.geoms)
|
||||
s = GeoSeries(arr, crs=27700)
|
||||
assert arr.crs is None
|
||||
assert s.crs == self.osgb
|
||||
|
||||
def test_ops(self):
|
||||
arr = self.arr
|
||||
bound = arr.boundary
|
||||
assert bound.crs == self.osgb
|
||||
|
||||
cent = arr.centroid
|
||||
assert cent.crs == self.osgb
|
||||
|
||||
hull = arr.convex_hull
|
||||
assert hull.crs == self.osgb
|
||||
|
||||
envelope = arr.envelope
|
||||
assert envelope.crs == self.osgb
|
||||
|
||||
exterior = arr.exterior
|
||||
assert exterior.crs == self.osgb
|
||||
|
||||
representative_point = arr.representative_point()
|
||||
assert representative_point.crs == self.osgb
|
||||
|
||||
def test_binary_ops(self):
|
||||
arr = self.arr
|
||||
quads = []
|
||||
while len(quads) < 10:
|
||||
geom = Polygon([(random.random(), random.random()) for i in range(4)])
|
||||
if geom.is_valid:
|
||||
quads.append(geom)
|
||||
|
||||
arr2 = from_shapely(quads, crs=27700)
|
||||
|
||||
difference = arr.difference(arr2)
|
||||
assert difference.crs == self.osgb
|
||||
|
||||
intersection = arr.intersection(arr2)
|
||||
assert intersection.crs == self.osgb
|
||||
|
||||
symmetric_difference = arr.symmetric_difference(arr2)
|
||||
assert symmetric_difference.crs == self.osgb
|
||||
|
||||
union = arr.union(arr2)
|
||||
assert union.crs == self.osgb
|
||||
|
||||
def test_other(self):
|
||||
arr = self.arr
|
||||
|
||||
buffer = arr.buffer(5)
|
||||
assert buffer.crs == self.osgb
|
||||
|
||||
interpolate = arr.exterior.interpolate(0.1)
|
||||
assert interpolate.crs == self.osgb
|
||||
|
||||
simplify = arr.simplify(5)
|
||||
assert simplify.crs == self.osgb
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr, arg",
|
||||
[
|
||||
("affine_transform", ([0, 1, 1, 0, 0, 0],)),
|
||||
("translate", ()),
|
||||
("rotate", (10,)),
|
||||
("scale", ()),
|
||||
("skew", ()),
|
||||
],
|
||||
)
|
||||
def test_affinity_methods(self, attr, arg):
|
||||
result = getattr(self.arr, attr)(*arg)
|
||||
|
||||
assert result.crs == self.osgb
|
||||
|
||||
def test_slice(self):
|
||||
s = GeoSeries(self.arr, crs=27700)
|
||||
assert s.iloc[1:].values.crs == self.osgb
|
||||
|
||||
df = GeoDataFrame({"col1": self.arr}, geometry=s)
|
||||
assert df.iloc[1:].geometry.values.crs == self.osgb
|
||||
assert df.iloc[1:].col1.values.crs == self.osgb
|
||||
|
||||
def test_concat(self):
|
||||
s = GeoSeries(self.arr, crs=27700)
|
||||
assert pd.concat([s, s]).values.crs == self.osgb
|
||||
|
||||
df = GeoDataFrame({"col1": from_shapely(self.geoms, crs=4326)}, geometry=s)
|
||||
assert pd.concat([df, df]).geometry.values.crs == self.osgb
|
||||
assert pd.concat([df, df]).col1.values.crs == self.wgs
|
||||
|
||||
def test_merge(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
s = GeoSeries(self.geoms, crs=4326)
|
||||
df = GeoDataFrame({"col1": s}, geometry=arr)
|
||||
df2 = GeoDataFrame({"col2": s}, geometry=arr).rename_geometry("geom")
|
||||
merged = df.merge(df2, left_index=True, right_index=True)
|
||||
assert merged.col1.values.crs == self.wgs
|
||||
assert merged.geometry.values.crs == self.osgb
|
||||
assert merged.col2.values.crs == self.wgs
|
||||
assert merged.geom.values.crs == self.osgb
|
||||
assert merged.crs == self.osgb
|
||||
|
||||
# make sure that geometry column from list has CRS (__setitem__)
|
||||
def test_setitem_geometry(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
|
||||
|
||||
df["geometry"] = list(df.geometry)
|
||||
assert df.geometry.values.crs == self.osgb
|
||||
|
||||
df2 = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
|
||||
df2["geometry"] = from_shapely(self.geoms, crs=4326)
|
||||
assert df2.geometry.values.crs == self.wgs
|
||||
|
||||
def test_astype(self):
|
||||
arr = from_shapely(self.geoms, crs=27700)
|
||||
df = GeoDataFrame({"col1": [0, 1]}, geometry=arr)
|
||||
df2 = df.astype({"col1": str})
|
||||
assert df2.crs == self.osgb
|
||||
|
||||
def test_apply(self):
|
||||
s = GeoSeries(self.arr)
|
||||
assert s.crs == 27700
|
||||
|
||||
# apply preserves the CRS if the result is a GeoSeries
|
||||
result = s.apply(lambda x: x.centroid)
|
||||
assert result.crs == 27700
|
||||
|
||||
def test_apply_geodataframe(self):
|
||||
df = GeoDataFrame({"col1": [0, 1]}, geometry=self.geoms, crs=27700)
|
||||
assert df.crs == 27700
|
||||
|
||||
# apply preserves the CRS if the result is a GeoDataFrame
|
||||
result = df.apply(lambda col: col, axis=0)
|
||||
assert result.crs == 27700
|
||||
result = df.apply(lambda row: row, axis=1)
|
||||
assert result.crs == 27700
|
||||
|
||||
|
||||
class TestSetCRS:
|
||||
@pytest.mark.parametrize(
|
||||
"constructor",
|
||||
[
|
||||
lambda geoms, crs: GeoSeries(geoms, crs=crs),
|
||||
lambda geoms, crs: GeoDataFrame(geometry=geoms, crs=crs),
|
||||
],
|
||||
ids=["geoseries", "geodataframe"],
|
||||
)
|
||||
def test_set_crs(self, constructor):
|
||||
naive = constructor([Point(0, 0), Point(1, 1)], crs=None)
|
||||
assert naive.crs is None
|
||||
|
||||
# by default returns a copy
|
||||
result = naive.set_crs(crs="EPSG:4326")
|
||||
assert result.crs == "EPSG:4326"
|
||||
assert naive.crs is None
|
||||
|
||||
result = naive.set_crs(epsg=4326)
|
||||
assert result.crs == "EPSG:4326"
|
||||
assert naive.crs is None
|
||||
|
||||
# with inplace=True
|
||||
result = naive.set_crs(crs="EPSG:4326", inplace=True)
|
||||
assert result is naive
|
||||
assert result.crs == naive.crs == "EPSG:4326"
|
||||
|
||||
# raise for non-naive when crs would be overridden
|
||||
non_naive = constructor([Point(0, 0), Point(1, 1)], crs="EPSG:4326")
|
||||
assert non_naive.crs == "EPSG:4326"
|
||||
with pytest.raises(ValueError, match="already has a CRS"):
|
||||
non_naive.set_crs("EPSG:3857")
|
||||
|
||||
# allow for equal crs
|
||||
result = non_naive.set_crs("EPSG:4326")
|
||||
assert result.crs == "EPSG:4326"
|
||||
|
||||
# replace with allow_override=True
|
||||
result = non_naive.set_crs("EPSG:3857", allow_override=True)
|
||||
assert non_naive.crs == "EPSG:4326"
|
||||
assert result.crs == "EPSG:3857"
|
||||
|
||||
result = non_naive.set_crs("EPSG:3857", allow_override=True, inplace=True)
|
||||
assert non_naive.crs == "EPSG:3857"
|
||||
assert result.crs == "EPSG:3857"
|
||||
|
||||
# set CRS to None
|
||||
result = non_naive.set_crs(crs=None, allow_override=True)
|
||||
assert result.crs is None
|
||||
assert non_naive.crs == "EPSG:3857"
|
||||
@@ -0,0 +1,15 @@
|
||||
from geopandas import GeoDataFrame, read_file
|
||||
from geopandas.datasets import get_path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb", "foo"]
|
||||
)
|
||||
def test_read_paths(test_dataset):
|
||||
with pytest.raises(
|
||||
AttributeError,
|
||||
match=r"The geopandas\.dataset has been deprecated and was removed",
|
||||
):
|
||||
assert isinstance(read_file(get_path(test_dataset)), GeoDataFrame)
|
||||
@@ -0,0 +1,87 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from geopandas._decorator import doc
|
||||
|
||||
|
||||
@doc(method="cumsum", operation="sum")
|
||||
def cumsum(whatever):
|
||||
"""
|
||||
This is the {method} method.
|
||||
|
||||
It computes the cumulative {operation}.
|
||||
"""
|
||||
|
||||
|
||||
@doc(
|
||||
cumsum,
|
||||
dedent(
|
||||
"""
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> cumavg([1, 2, 3])
|
||||
2
|
||||
"""
|
||||
),
|
||||
method="cumavg",
|
||||
operation="average",
|
||||
)
|
||||
def cumavg(whatever): ...
|
||||
|
||||
|
||||
@doc(cumsum, method="cummax", operation="maximum")
|
||||
def cummax(whatever): ...
|
||||
|
||||
|
||||
@doc(cummax, method="cummin", operation="minimum")
|
||||
def cummin(whatever): ...
|
||||
|
||||
|
||||
def test_docstring_formatting():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cumsum method.
|
||||
|
||||
It computes the cumulative sum.
|
||||
"""
|
||||
)
|
||||
assert cumsum.__doc__ == docstr
|
||||
|
||||
|
||||
def test_docstring_appending():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cumavg method.
|
||||
|
||||
It computes the cumulative average.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> cumavg([1, 2, 3])
|
||||
2
|
||||
"""
|
||||
)
|
||||
assert cumavg.__doc__ == docstr
|
||||
|
||||
|
||||
def test_doc_template_from_func():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cummax method.
|
||||
|
||||
It computes the cumulative maximum.
|
||||
"""
|
||||
)
|
||||
assert cummax.__doc__ == docstr
|
||||
|
||||
|
||||
def test_inherit_doc_template():
|
||||
docstr = dedent(
|
||||
"""
|
||||
This is the cummin method.
|
||||
|
||||
It computes the cumulative minimum.
|
||||
"""
|
||||
)
|
||||
assert cummin.__doc__ == docstr
|
||||
@@ -0,0 +1,372 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, read_file
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_15, PANDAS_GE_20, PANDAS_GE_30
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, geom_almost_equals
|
||||
from pandas.testing import assert_frame_equal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nybb_polydf(nybb_filename):
|
||||
nybb_polydf = read_file(nybb_filename)
|
||||
nybb_polydf = nybb_polydf[["geometry", "BoroName", "BoroCode"]]
|
||||
nybb_polydf = nybb_polydf.rename(columns={"geometry": "myshapes"})
|
||||
nybb_polydf = nybb_polydf.set_geometry("myshapes")
|
||||
nybb_polydf["manhattan_bronx"] = 5
|
||||
nybb_polydf.loc[3:4, "manhattan_bronx"] = 6
|
||||
nybb_polydf["BoroCode"] = nybb_polydf["BoroCode"].astype("int64")
|
||||
return nybb_polydf
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def merged_shapes(nybb_polydf):
|
||||
# Merged geometry
|
||||
manhattan_bronx = nybb_polydf.loc[3:4]
|
||||
others = nybb_polydf.loc[0:2]
|
||||
|
||||
collapsed = [others.geometry.union_all(), manhattan_bronx.geometry.union_all()]
|
||||
merged_shapes = GeoDataFrame(
|
||||
{"myshapes": collapsed},
|
||||
geometry="myshapes",
|
||||
index=pd.Index([5, 6], name="manhattan_bronx"),
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
|
||||
return merged_shapes
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def first(merged_shapes):
|
||||
first = merged_shapes.copy()
|
||||
first["BoroName"] = ["Staten Island", "Manhattan"]
|
||||
first["BoroCode"] = [5, 1]
|
||||
return first
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expected_mean(merged_shapes):
|
||||
test_mean = merged_shapes.copy()
|
||||
test_mean["BoroCode"] = [4, 1.5]
|
||||
return test_mean
|
||||
|
||||
|
||||
def test_geom_dissolve(nybb_polydf, first):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert test.geometry.name == "myshapes"
|
||||
assert geom_almost_equals(test, first)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
def test_dissolve_retains_existing_crs(nybb_polydf):
|
||||
assert nybb_polydf.crs is not None
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert test.crs is not None
|
||||
|
||||
|
||||
def test_dissolve_retains_nonexisting_crs(nybb_polydf):
|
||||
nybb_polydf.geometry.array.crs = None
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert test.crs is None
|
||||
|
||||
|
||||
def test_first_dissolve(nybb_polydf, first):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx")
|
||||
assert_frame_equal(first, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_mean_dissolve(nybb_polydf, first, expected_mean):
|
||||
if not PANDAS_GE_15:
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
|
||||
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
|
||||
elif PANDAS_GE_15 and not PANDAS_GE_20:
|
||||
with pytest.warns(FutureWarning, match=".*used in dissolve is deprecated.*"):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
|
||||
test2 = nybb_polydf.dissolve("manhattan_bronx", aggfunc=np.mean)
|
||||
else: # pandas 2.0
|
||||
test = nybb_polydf.dissolve(
|
||||
"manhattan_bronx", aggfunc="mean", numeric_only=True
|
||||
)
|
||||
# for non pandas "mean", numeric only cannot be applied. Drop columns manually
|
||||
test2 = nybb_polydf.drop(columns=["BoroName"]).dissolve(
|
||||
"manhattan_bronx", aggfunc="mean"
|
||||
)
|
||||
|
||||
assert_frame_equal(expected_mean, test, check_column_type=False)
|
||||
assert_frame_equal(expected_mean, test2, check_column_type=False)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not PANDAS_GE_15 or PANDAS_GE_20, reason="warning for pandas 1.5.x")
|
||||
def test_mean_dissolve_warning_capture(nybb_polydf, first, expected_mean):
|
||||
with pytest.warns(
|
||||
FutureWarning,
|
||||
match=".*used in dissolve is deprecated.*",
|
||||
):
|
||||
nybb_polydf.dissolve("manhattan_bronx", aggfunc="mean")
|
||||
|
||||
# test no warning for aggfunc first which doesn't have numeric only semantics
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
nybb_polydf.dissolve("manhattan_bronx", aggfunc="first")
|
||||
|
||||
|
||||
def test_dissolve_emits_other_warnings(nybb_polydf):
|
||||
# we only do something special for pandas 1.5.x, but expect this
|
||||
# test to be true on any version
|
||||
def sum_and_warn(group):
|
||||
warnings.warn("foo") # noqa: B028
|
||||
if PANDAS_GE_20:
|
||||
return group.sum(numeric_only=False)
|
||||
else:
|
||||
return group.sum()
|
||||
|
||||
with pytest.warns(UserWarning, match="foo"):
|
||||
nybb_polydf.dissolve("manhattan_bronx", aggfunc=sum_and_warn)
|
||||
|
||||
|
||||
def test_multicolumn_dissolve(nybb_polydf, first):
|
||||
multi = nybb_polydf.copy()
|
||||
multi["dup_col"] = multi.manhattan_bronx
|
||||
multi_test = multi.dissolve(["manhattan_bronx", "dup_col"], aggfunc="first")
|
||||
|
||||
first_copy = first.copy()
|
||||
first_copy["dup_col"] = first_copy.index
|
||||
first_copy = first_copy.set_index([first_copy.index, "dup_col"])
|
||||
|
||||
assert_frame_equal(multi_test, first_copy, check_column_type=False)
|
||||
|
||||
|
||||
def test_reset_index(nybb_polydf, first):
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", as_index=False)
|
||||
comparison = first.reset_index()
|
||||
assert_frame_equal(comparison, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_dissolve_none(nybb_polydf):
|
||||
test = nybb_polydf.dissolve(by=None)
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
|
||||
"BoroName": ["Staten Island"],
|
||||
"BoroCode": [5],
|
||||
"manhattan_bronx": [5],
|
||||
},
|
||||
geometry=nybb_polydf.geometry.name,
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
assert_frame_equal(expected, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_dissolve_none_mean(nybb_polydf):
|
||||
test = nybb_polydf.dissolve(aggfunc="mean", numeric_only=True)
|
||||
expected = GeoDataFrame(
|
||||
{
|
||||
nybb_polydf.geometry.name: [nybb_polydf.geometry.union_all()],
|
||||
"BoroCode": [3.0],
|
||||
"manhattan_bronx": [5.4],
|
||||
},
|
||||
geometry=nybb_polydf.geometry.name,
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
assert_frame_equal(expected, test, check_column_type=False)
|
||||
|
||||
|
||||
def test_dissolve_level():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 1, 2, 2],
|
||||
"b": [3, 4, 4, 4],
|
||||
"c": [3, 4, 5, 6],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index(["a", "b", "c"])
|
||||
|
||||
expected_a = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 2],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["MULTIPOINT (0 0, 1 1)", "MULTIPOINT (2 2, 3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index("a")
|
||||
expected_b = geopandas.GeoDataFrame(
|
||||
{
|
||||
"b": [3, 4],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "MULTIPOINT (1 1, 2 2, 3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index("b")
|
||||
expected_ab = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 1, 2],
|
||||
"b": [3, 4, 4],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "MULTIPOINT (2 2, 3 3)"]
|
||||
),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
|
||||
assert_frame_equal(expected_a, gdf.dissolve(level=0))
|
||||
assert_frame_equal(expected_a, gdf.dissolve(level="a"))
|
||||
assert_frame_equal(expected_b, gdf.dissolve(level=1))
|
||||
assert_frame_equal(expected_b, gdf.dissolve(level="b"))
|
||||
assert_frame_equal(expected_ab, gdf.dissolve(level=[0, 1]))
|
||||
assert_frame_equal(expected_ab, gdf.dissolve(level=["a", "b"]))
|
||||
|
||||
|
||||
def test_dissolve_sort():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [2, 1, 1],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
expected_unsorted = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [2, 1],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "MULTIPOINT (1 1, 2 2)"]
|
||||
),
|
||||
}
|
||||
).set_index("a")
|
||||
expected_sorted = expected_unsorted.sort_index()
|
||||
|
||||
assert_frame_equal(expected_sorted, gdf.dissolve("a"))
|
||||
assert_frame_equal(expected_unsorted, gdf.dissolve("a", sort=False))
|
||||
|
||||
|
||||
def test_dissolve_categorical():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"cat": pd.Categorical(["a", "a", "b", "b"]),
|
||||
"noncat": [1, 1, 1, 2],
|
||||
"to_agg": [1, 2, 3, 4],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)", "POINT (3 3)"]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# when observed=False we get an additional observation
|
||||
# that wasn't in the original data
|
||||
none_val = "GEOMETRYCOLLECTION EMPTY" if PANDAS_GE_30 else None
|
||||
expected_gdf_observed_false = geopandas.GeoDataFrame(
|
||||
{
|
||||
"cat": pd.Categorical(["a", "a", "b", "b"]),
|
||||
"noncat": [1, 2, 1, 2],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
[
|
||||
"MULTIPOINT (0 0, 1 1)",
|
||||
none_val,
|
||||
"POINT (2 2)",
|
||||
"POINT (3 3)",
|
||||
]
|
||||
),
|
||||
"to_agg": [1, None, 3, 4],
|
||||
}
|
||||
).set_index(["cat", "noncat"])
|
||||
|
||||
# when observed=True we do not get any additional observations
|
||||
expected_gdf_observed_true = geopandas.GeoDataFrame(
|
||||
{
|
||||
"cat": pd.Categorical(["a", "b", "b"]),
|
||||
"noncat": [1, 1, 2],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)", "POINT (3 3)"]
|
||||
),
|
||||
"to_agg": [1, 3, 4],
|
||||
}
|
||||
).set_index(["cat", "noncat"])
|
||||
|
||||
assert_frame_equal(expected_gdf_observed_false, gdf.dissolve(["cat", "noncat"]))
|
||||
assert_frame_equal(
|
||||
expected_gdf_observed_true, gdf.dissolve(["cat", "noncat"], observed=True)
|
||||
)
|
||||
|
||||
|
||||
def test_dissolve_dropna():
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1, 1, None],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["POINT (0 0)", "POINT (1 1)", "POINT (2 2)"]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
expected_with_na = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1.0, np.nan],
|
||||
"geometry": geopandas.array.from_wkt(
|
||||
["MULTIPOINT (0 0, 1 1)", "POINT (2 2)"]
|
||||
),
|
||||
}
|
||||
).set_index("a")
|
||||
expected_no_na = geopandas.GeoDataFrame(
|
||||
{
|
||||
"a": [1.0],
|
||||
"geometry": geopandas.array.from_wkt(["MULTIPOINT (0 0, 1 1)"]),
|
||||
}
|
||||
).set_index("a")
|
||||
|
||||
assert_frame_equal(expected_with_na, gdf.dissolve("a", dropna=False))
|
||||
assert_frame_equal(expected_no_na, gdf.dissolve("a"))
|
||||
|
||||
|
||||
def test_dissolve_dropna_warn(nybb_polydf):
|
||||
# No warning with default params
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
nybb_polydf.dissolve()
|
||||
|
||||
for r in record:
|
||||
assert "dropna kwarg is not supported" not in str(r.message)
|
||||
|
||||
|
||||
def test_dissolve_multi_agg(nybb_polydf, merged_shapes):
|
||||
merged_shapes[("BoroCode", "min")] = [3, 1]
|
||||
merged_shapes[("BoroCode", "max")] = [5, 2]
|
||||
merged_shapes[("BoroName", "count")] = [3, 2]
|
||||
|
||||
with warnings.catch_warnings(record=True) as record:
|
||||
test = nybb_polydf.dissolve(
|
||||
by="manhattan_bronx",
|
||||
aggfunc={
|
||||
"BoroCode": ["min", "max"],
|
||||
"BoroName": "count",
|
||||
},
|
||||
)
|
||||
assert_geodataframe_equal(test, merged_shapes)
|
||||
assert len(record) == 0
|
||||
|
||||
|
||||
def test_coverage_dissolve(nybb_polydf):
|
||||
manhattan_bronx = nybb_polydf.loc[3:4]
|
||||
others = nybb_polydf.loc[0:2]
|
||||
|
||||
collapsed = [
|
||||
others.geometry.union_all(method="coverage"),
|
||||
manhattan_bronx.geometry.union_all(method="coverage"),
|
||||
]
|
||||
merged_shapes = GeoDataFrame(
|
||||
{"myshapes": collapsed},
|
||||
geometry="myshapes",
|
||||
index=pd.Index([5, 6], name="manhattan_bronx"),
|
||||
crs=nybb_polydf.crs,
|
||||
)
|
||||
|
||||
merged_shapes["BoroName"] = ["Staten Island", "Manhattan"]
|
||||
merged_shapes["BoroCode"] = [5, 1]
|
||||
|
||||
test = nybb_polydf.dissolve("manhattan_bronx", method="coverage")
|
||||
assert_frame_equal(merged_shapes, test, check_column_type=False)
|
||||
1048
.venv/lib/python3.12/site-packages/geopandas/tests/test_explore.py
Normal file
1048
.venv/lib/python3.12/site-packages/geopandas/tests/test_explore.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,648 @@
|
||||
"""
|
||||
This file contains a minimal set of tests for compliance with the extension
|
||||
array interface test suite (by inheriting the pandas test suite), and should
|
||||
contain no other tests.
|
||||
Other tests (eg related to the spatial functionality or integration
|
||||
with GeoSeries/GeoDataFrame) should be added to test_array.py and others.
|
||||
|
||||
The tests in this file are inherited from the BaseExtensionTests, and only
|
||||
minimal tweaks should be applied to get the tests passing (by overwriting a
|
||||
parent method).
|
||||
|
||||
A set of fixtures are defined to provide data for the tests (the fixtures
|
||||
expected to be available to pytest by the inherited pandas tests).
|
||||
|
||||
"""
|
||||
|
||||
import itertools
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.tests.extension import base as extension_tests
|
||||
|
||||
import shapely.geometry
|
||||
from shapely.geometry import Point
|
||||
|
||||
from geopandas._compat import PANDAS_GE_15, PANDAS_GE_21, PANDAS_GE_22
|
||||
from geopandas.array import GeometryArray, GeometryDtype, from_shapely
|
||||
|
||||
import pytest
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Compat with extension tests in older pandas versions
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
not_yet_implemented = pytest.mark.skip(reason="Not yet implemented")
|
||||
no_minmax = pytest.mark.skip(reason="Min/max not supported")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Required fixtures
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""A fixture providing the ExtensionDtype to validate."""
|
||||
return GeometryDtype()
|
||||
|
||||
|
||||
def make_data():
|
||||
a = np.empty(100, dtype=object)
|
||||
a[:] = [shapely.geometry.Point(i, i) for i in range(100)]
|
||||
ga = from_shapely(a)
|
||||
return ga
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Length-100 array for this type.
|
||||
|
||||
* data[0] and data[1] should both be non missing
|
||||
* data[0] and data[1] should not be equal
|
||||
"""
|
||||
return make_data()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_twos():
|
||||
"""Length-100 array in which all the elements are two."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing():
|
||||
"""Length-2 array with [NA, Valid]"""
|
||||
return from_shapely([None, shapely.geometry.Point(1, 1)])
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture giving 'data' and 'data_missing'"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_repeated(data):
|
||||
"""
|
||||
Generate many datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data : fixture implementing `data`
|
||||
|
||||
Returns
|
||||
-------
|
||||
Callable[[int], Generator]:
|
||||
A callable that takes a `count` argument and
|
||||
returns a generator yielding `count` datasets.
|
||||
"""
|
||||
|
||||
def gen(count):
|
||||
for _ in range(count):
|
||||
yield data
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, C, A] with
|
||||
A < B < C
|
||||
"""
|
||||
return from_shapely([Point(0, 1), Point(1, 1), Point(0, 0)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing_for_sorting():
|
||||
"""Length-3 array with a known sort order.
|
||||
|
||||
This should be three items [B, NA, A] with
|
||||
A < B and NA missing.
|
||||
"""
|
||||
return from_shapely([Point(1, 2), None, Point(0, 0)])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_cmp():
|
||||
"""Binary operator for comparing NA values.
|
||||
Should return a function of two arguments that returns
|
||||
True if both arguments are (scalar) NA for your type.
|
||||
By default, uses ``operator.or``
|
||||
"""
|
||||
return lambda x, y: x is None and y is None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def na_value():
|
||||
"""The scalar missing value for this type. Default 'None'"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_for_grouping():
|
||||
"""Data for factorization, grouping, and unique tests.
|
||||
|
||||
Expected to be like [B, B, NA, NA, A, A, B, C]
|
||||
|
||||
Where A < B < C and NA is missing
|
||||
"""
|
||||
return from_shapely(
|
||||
[
|
||||
shapely.geometry.Point(1, 1),
|
||||
shapely.geometry.Point(1, 1),
|
||||
None,
|
||||
None,
|
||||
shapely.geometry.Point(0, 0),
|
||||
shapely.geometry.Point(0, 0),
|
||||
shapely.geometry.Point(1, 1),
|
||||
shapely.geometry.Point(2, 2),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def box_in_series(request):
|
||||
"""Whether to box the data in a Series"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: pd.Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
],
|
||||
ids=["scalar", "list", "series", "object"],
|
||||
)
|
||||
def groupby_apply_op(request):
|
||||
"""
|
||||
Functions to test groupby.apply().
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_frame(request):
|
||||
"""
|
||||
Boolean fixture to support Series and Series.to_frame() comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_series(request):
|
||||
"""
|
||||
Boolean fixture to support arr and Series(arr) comparison testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def use_numpy(request):
|
||||
"""
|
||||
Boolean fixture to support comparison testing of ExtensionDtype array
|
||||
and numpy array.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["ffill", "bfill"])
|
||||
def fillna_method(request):
|
||||
"""
|
||||
Parametrized fixture giving method parameters 'ffill' and 'bfill' for
|
||||
Series.fillna(method=<method>) testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def as_array(request):
|
||||
"""
|
||||
Boolean fixture to support ExtensionDtype _from_sequence method testing.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def invalid_scalar(data):
|
||||
"""
|
||||
A scalar that *cannot* be held by this ExtensionArray.
|
||||
|
||||
The default should work for most subclasses, but is not guaranteed.
|
||||
|
||||
If the array can hold any item (i.e. object dtype), then use pytest.skip.
|
||||
"""
|
||||
return object.__new__(object)
|
||||
|
||||
|
||||
# Fixtures defined in pandas/conftest.py that are also needed: defining them
|
||||
# here instead of importing for compatibility
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=["sum", "max", "min", "mean", "prod", "std", "var", "median", "kurt", "skew"]
|
||||
)
|
||||
def all_numeric_reductions(request):
|
||||
"""
|
||||
Fixture for numeric reduction names
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["all", "any"])
|
||||
def all_boolean_reductions(request):
|
||||
"""
|
||||
Fixture for boolean reduction names
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# only == and != are support for GeometryArray
|
||||
# @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
|
||||
@pytest.fixture(params=["__eq__", "__ne__"])
|
||||
def all_compare_operators(request):
|
||||
"""
|
||||
Fixture for dunder names for common compare operations
|
||||
|
||||
* >=
|
||||
* >
|
||||
* ==
|
||||
* !=
|
||||
* <
|
||||
* <=
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, lambda x: x])
|
||||
def sort_by_key(request):
|
||||
"""
|
||||
Simple fixture for testing keys in sorting methods.
|
||||
Tests None (no key) and the identity key.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Inherited tests
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDtype(extension_tests.BaseDtypeTests):
|
||||
# additional tests
|
||||
|
||||
def test_array_type_with_arg(self, data, dtype):
|
||||
assert dtype.construct_array_type() is GeometryArray
|
||||
|
||||
def test_registry(self, data, dtype):
|
||||
s = pd.Series(np.asarray(data), dtype=object)
|
||||
result = s.astype("geometry")
|
||||
assert isinstance(result.array, GeometryArray)
|
||||
expected = pd.Series(data)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestInterface(extension_tests.BaseInterfaceTests):
|
||||
def test_contains(self, data, data_missing):
|
||||
# overridden due to the inconsistency between
|
||||
# GeometryDtype.na_value = np.nan
|
||||
# and None being used as NA in array
|
||||
|
||||
# ensure data without missing values
|
||||
data = data[~data.isna()]
|
||||
|
||||
# first elements are non-missing
|
||||
assert data[0] in data
|
||||
assert data_missing[0] in data_missing
|
||||
|
||||
assert None in data_missing
|
||||
assert None not in data
|
||||
assert pd.NaT not in data_missing
|
||||
|
||||
|
||||
class TestConstructors(extension_tests.BaseConstructorsTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestReshaping(extension_tests.BaseReshapingTests):
|
||||
|
||||
# NOTE: this test is copied from pandas/tests/extension/base/reshaping.py
|
||||
# because starting with pandas 3.0 the assert_frame_equal is strict regarding
|
||||
# the exact missing value (None vs NaN)
|
||||
# Our `result` uses None, but the way the `expected` is created results in
|
||||
# NaNs (and specifying to use None as fill value in unstack also does not
|
||||
# help)
|
||||
# -> the only change compared to the upstream test is marked
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
# Two levels, uniform.
|
||||
pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]),
|
||||
# non-uniform
|
||||
pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]),
|
||||
# three levels, non-uniform
|
||||
pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]),
|
||||
pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("A", "a", 1),
|
||||
("A", "b", 0),
|
||||
("A", "a", 0),
|
||||
("B", "a", 0),
|
||||
("B", "c", 1),
|
||||
]
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("obj", ["series", "frame"])
|
||||
def test_unstack(self, data, index, obj):
|
||||
data = data[: len(index)]
|
||||
if obj == "series":
|
||||
ser = pd.Series(data, index=index)
|
||||
else:
|
||||
ser = pd.DataFrame({"A": data, "B": data}, index=index)
|
||||
|
||||
n = index.nlevels
|
||||
levels = list(range(n))
|
||||
# [0, 1, 2]
|
||||
# [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
||||
combinations = itertools.chain.from_iterable(
|
||||
itertools.permutations(levels, i) for i in range(1, n)
|
||||
)
|
||||
|
||||
for level in combinations:
|
||||
result = ser.unstack(level=level)
|
||||
assert all(
|
||||
isinstance(result[col].array, type(data)) for col in result.columns
|
||||
)
|
||||
|
||||
if obj == "series":
|
||||
# We should get the same result with to_frame+unstack+droplevel
|
||||
df = ser.to_frame()
|
||||
|
||||
alt = df.unstack(level=level).droplevel(0, axis=1)
|
||||
assert_frame_equal(result, alt)
|
||||
|
||||
obj_ser = ser.astype(object)
|
||||
|
||||
expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
|
||||
if obj == "series":
|
||||
assert (expected.dtypes == object).all()
|
||||
# <------------ next line is added
|
||||
expected[expected.isna()] = None
|
||||
# ------------->
|
||||
|
||||
result = result.astype(object)
|
||||
assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitem(extension_tests.BaseGetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestSetitem(extension_tests.BaseSetitemTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestMissing(extension_tests.BaseMissingTests):
|
||||
def test_fillna_series(self, data_missing):
|
||||
fill_value = data_missing[1]
|
||||
ser = pd.Series(data_missing)
|
||||
|
||||
# Fill with a scalar
|
||||
result = ser.fillna(fill_value)
|
||||
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series
|
||||
filler = pd.Series(
|
||||
from_shapely(
|
||||
[
|
||||
shapely.geometry.Point(1, 1),
|
||||
shapely.geometry.Point(2, 2),
|
||||
],
|
||||
)
|
||||
)
|
||||
result = ser.fillna(filler)
|
||||
expected = pd.Series(data_missing._from_sequence([fill_value, fill_value]))
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
# Fill with a series not affecting the missing values
|
||||
filler = pd.Series(
|
||||
from_shapely(
|
||||
[
|
||||
shapely.geometry.Point(2, 2),
|
||||
shapely.geometry.Point(1, 1),
|
||||
]
|
||||
),
|
||||
index=[10, 11],
|
||||
)
|
||||
result = ser.fillna(filler)
|
||||
assert_series_equal(result, ser)
|
||||
|
||||
# More `GeoSeries.fillna` testcases are in
|
||||
# `geopandas\tests\test_pandas_methods.py::test_fillna_scalar`
|
||||
# and `geopandas\tests\test_pandas_methods.py::test_fillna_series`.
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_limit_pad(self, data_missing):
|
||||
super().test_fillna_limit_pad(data_missing)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_limit_backfill(self, data_missing):
|
||||
super().test_fillna_limit_backfill(data_missing)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_series_method(self, data_missing, fillna_method):
|
||||
super().test_fillna_series_method(data_missing, fillna_method)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_21, reason="fillna method not supported with older pandas"
|
||||
)
|
||||
def test_fillna_no_op_returns_copy(self, data):
|
||||
super().test_fillna_no_op_returns_copy(data)
|
||||
|
||||
|
||||
if PANDAS_GE_22:
|
||||
from pandas.tests.extension.base import BaseReduceTests
|
||||
else:
|
||||
from pandas.tests.extension.base import BaseNoReduceTests as BaseReduceTests
|
||||
|
||||
|
||||
class TestReduce(BaseReduceTests):
|
||||
@pytest.mark.skip("boolean reduce (any/all) tested in test_pandas_methods")
|
||||
def test_reduce_series_boolean(self):
|
||||
pass
|
||||
|
||||
|
||||
_all_arithmetic_operators = [
|
||||
"__add__",
|
||||
"__radd__",
|
||||
# '__sub__', '__rsub__',
|
||||
"__mul__",
|
||||
"__rmul__",
|
||||
"__floordiv__",
|
||||
"__rfloordiv__",
|
||||
"__truediv__",
|
||||
"__rtruediv__",
|
||||
"__pow__",
|
||||
"__rpow__",
|
||||
"__mod__",
|
||||
"__rmod__",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_arithmetic_operators)
|
||||
def all_arithmetic_operators(request):
|
||||
"""
|
||||
Fixture for dunder names for common arithmetic operations
|
||||
|
||||
Adapted to exclude __sub__, as this is implemented as "difference".
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# an inherited test from pandas creates a Series from a list of geometries, which
|
||||
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
|
||||
)
|
||||
class TestArithmeticOps(extension_tests.BaseArithmeticOpsTests):
|
||||
@pytest.mark.skip(reason="not applicable")
|
||||
def test_divmod_series_array(self, data, data_for_twos):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="not applicable")
|
||||
def test_add_series_with_extension_array(self, data):
|
||||
pass
|
||||
|
||||
|
||||
# an inherited test from pandas creates a Series from a list of geometries, which
|
||||
# triggers the warning from Shapely, out of control of GeoPandas, so ignoring here
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The array interface is deprecated and will no longer work in Shapely 2.0"
|
||||
)
|
||||
class TestComparisonOps(extension_tests.BaseComparisonOpsTests):
|
||||
def _compare_other(self, s, data, op_name, other):
|
||||
op = getattr(operator, op_name.strip("_"))
|
||||
result = op(s, other)
|
||||
expected = s.combine(other, op)
|
||||
assert_series_equal(result, expected)
|
||||
|
||||
def test_compare_scalar(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
self._compare_other(s, data, op_name, data[0])
|
||||
|
||||
def test_compare_array(self, data, all_compare_operators):
|
||||
op_name = all_compare_operators
|
||||
s = pd.Series(data)
|
||||
other = pd.Series([data[0]] * len(data))
|
||||
self._compare_other(s, data, op_name, other)
|
||||
|
||||
|
||||
class TestMethods(extension_tests.BaseMethodsTests):
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
|
||||
)
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_value_counts(self, all_data, dropna):
|
||||
pass
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not PANDAS_GE_15, reason="sorting index not yet working with older pandas"
|
||||
)
|
||||
def test_value_counts_with_normalize(self, data):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
def test_sort_values_frame(self, data_for_sorting, ascending):
|
||||
super().test_sort_values_frame(data_for_sorting, ascending)
|
||||
|
||||
@pytest.mark.skip(reason="searchsorted not supported")
|
||||
def test_searchsorted(self, data_for_sorting, as_series):
|
||||
pass
|
||||
|
||||
@not_yet_implemented
|
||||
def test_combine_le(self):
|
||||
pass
|
||||
|
||||
@pytest.mark.skip(reason="addition not supported")
|
||||
def test_combine_add(self):
|
||||
pass
|
||||
|
||||
@not_yet_implemented
|
||||
def test_fillna_length_mismatch(self, data_missing):
|
||||
msg = "Length of 'value' does not match."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data_missing.fillna(data_missing.take([1]))
|
||||
|
||||
@no_minmax
|
||||
def test_argmin_argmax(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argmin_argmax_empty_array(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argmin_argmax_all_na(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argreduce_series(self):
|
||||
pass
|
||||
|
||||
@no_minmax
|
||||
def test_argmax_argmin_no_skipna_notimplemented(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestCasting(extension_tests.BaseCastingTests):
|
||||
pass
|
||||
|
||||
|
||||
class TestGroupby(extension_tests.BaseGroupbyTests):
|
||||
@pytest.mark.parametrize("as_index", [True, False])
|
||||
def test_groupby_extension_agg(self, as_index, data_for_grouping):
|
||||
super().test_groupby_extension_agg(as_index, data_for_grouping)
|
||||
|
||||
def test_groupby_extension_transform(self, data_for_grouping):
|
||||
super().test_groupby_extension_transform(data_for_grouping)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op",
|
||||
[
|
||||
lambda x: 1,
|
||||
lambda x: [1] * len(x),
|
||||
lambda x: pd.Series([1] * len(x)),
|
||||
lambda x: x,
|
||||
],
|
||||
ids=["scalar", "list", "series", "object"],
|
||||
)
|
||||
def test_groupby_extension_apply(self, data_for_grouping, op):
|
||||
super().test_groupby_extension_apply(data_for_grouping, op)
|
||||
|
||||
|
||||
class TestPrinting(extension_tests.BasePrintingTests):
|
||||
pass
|
||||
|
||||
|
||||
@not_yet_implemented
|
||||
class TestParsing(extension_tests.BaseParsingTests):
|
||||
pass
|
||||
@@ -0,0 +1,170 @@
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import Point
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.tools import geocode, reverse_geocode
|
||||
from geopandas.tools.geocoding import _prepare_geocode_result
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
from geopandas.tests.util import assert_geoseries_equal, mock
|
||||
from pandas.testing import assert_series_equal
|
||||
|
||||
geopy = pytest.importorskip("geopy")
|
||||
|
||||
|
||||
class ForwardMock(mock.MagicMock):
|
||||
"""
|
||||
Mock the forward geocoding function.
|
||||
Returns the passed in address and (p, p+.5) where p increases
|
||||
at each call
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._n = 0.0
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.return_value = args[0], (self._n, self._n + 0.5)
|
||||
self._n += 1
|
||||
return super().__call__(*args, **kwargs)
|
||||
|
||||
|
||||
class ReverseMock(mock.MagicMock):
|
||||
"""
|
||||
Mock the reverse geocoding function.
|
||||
Returns the passed in point and 'address{p}' where p increases
|
||||
at each call
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._n = 0
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.return_value = "address{0}".format(self._n), args[0]
|
||||
self._n += 1
|
||||
return super().__call__(*args, **kwargs)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def locations():
|
||||
locations = ["260 Broadway, New York, NY", "77 Massachusetts Ave, Cambridge, MA"]
|
||||
return locations
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def points():
|
||||
points = [Point(-71.0597732, 42.3584308), Point(-77.0365305, 38.8977332)]
|
||||
return points
|
||||
|
||||
|
||||
def test_prepare_result():
|
||||
# Calls _prepare_result with sample results from the geocoder call
|
||||
# loop
|
||||
p0 = Point(12.3, -45.6) # Treat these as lat/lon
|
||||
p1 = Point(-23.4, 56.7)
|
||||
d = {"a": ("address0", p0.coords[0]), "b": ("address1", p1.coords[0])}
|
||||
|
||||
df = _prepare_geocode_result(d)
|
||||
assert type(df) is GeoDataFrame
|
||||
if HAS_PYPROJ:
|
||||
assert df.crs == "EPSG:4326"
|
||||
assert len(df) == 2
|
||||
assert "address" in df
|
||||
|
||||
coords = df.loc["a"]["geometry"].coords[0]
|
||||
test = p0.coords[0]
|
||||
# Output from the df should be lon/lat
|
||||
assert coords[0] == pytest.approx(test[1])
|
||||
assert coords[1] == pytest.approx(test[0])
|
||||
|
||||
coords = df.loc["b"]["geometry"].coords[0]
|
||||
test = p1.coords[0]
|
||||
assert coords[0] == pytest.approx(test[1])
|
||||
assert coords[1] == pytest.approx(test[0])
|
||||
|
||||
|
||||
def test_prepare_result_none():
|
||||
p0 = Point(12.3, -45.6) # Treat these as lat/lon
|
||||
d = {"a": ("address0", p0.coords[0]), "b": (None, None)}
|
||||
|
||||
df = _prepare_geocode_result(d)
|
||||
assert type(df) is GeoDataFrame
|
||||
if HAS_PYPROJ:
|
||||
assert df.crs == "EPSG:4326"
|
||||
assert len(df) == 2
|
||||
assert "address" in df
|
||||
|
||||
row = df.loc["b"]
|
||||
|
||||
# TODO we should probably replace this with a missing value instead of point?
|
||||
assert len(row["geometry"].coords) == 0
|
||||
assert row["geometry"].is_empty
|
||||
assert row["address"] is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("geocode_result", (None, (None, None)))
|
||||
def test_prepare_geocode_result_when_result_is(geocode_result):
|
||||
result = {0: geocode_result}
|
||||
expected_output = GeoDataFrame(
|
||||
{"geometry": [Point()], "address": [None]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
output = _prepare_geocode_result(result)
|
||||
|
||||
assert_geodataframe_equal(output, expected_output)
|
||||
|
||||
|
||||
def test_bad_provider_forward():
|
||||
from geopy.exc import GeocoderNotFound
|
||||
|
||||
with pytest.raises(GeocoderNotFound):
|
||||
geocode(["cambridge, ma"], "badprovider")
|
||||
|
||||
|
||||
def test_bad_provider_reverse():
|
||||
from geopy.exc import GeocoderNotFound
|
||||
|
||||
with pytest.raises(GeocoderNotFound):
|
||||
reverse_geocode([Point(0, 0)], "badprovider")
|
||||
|
||||
|
||||
def test_forward(locations, points):
|
||||
from geopy.geocoders import Photon
|
||||
|
||||
for provider in ["photon", Photon]:
|
||||
with mock.patch("geopy.geocoders.Photon.geocode", ForwardMock()) as m:
|
||||
g = geocode(locations, provider=provider, timeout=2)
|
||||
assert len(locations) == m.call_count
|
||||
|
||||
n = len(locations)
|
||||
assert isinstance(g, GeoDataFrame)
|
||||
expected = GeoSeries(
|
||||
[Point(float(x) + 0.5, float(x)) for x in range(n)], crs="EPSG:4326"
|
||||
)
|
||||
assert_geoseries_equal(expected, g["geometry"])
|
||||
assert_series_equal(g["address"], pd.Series(locations, name="address"))
|
||||
|
||||
|
||||
def test_reverse(locations, points):
|
||||
from geopy.geocoders import Photon
|
||||
|
||||
for provider in ["photon", Photon]:
|
||||
with mock.patch("geopy.geocoders.Photon.reverse", ReverseMock()) as m:
|
||||
g = reverse_geocode(points, provider=provider, timeout=2)
|
||||
assert len(points) == m.call_count
|
||||
|
||||
assert isinstance(g, GeoDataFrame)
|
||||
|
||||
expected = GeoSeries(points, crs="EPSG:4326")
|
||||
assert_geoseries_equal(expected, g["geometry"])
|
||||
address = pd.Series(
|
||||
["address" + str(x) for x in range(len(points))], name="address"
|
||||
)
|
||||
assert_series_equal(g["address"], address)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user