that's too much!
This commit is contained in:
28
.venv/lib/python3.12/site-packages/geopandas/__init__.py
Normal file
28
.venv/lib/python3.12/site-packages/geopandas/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from geopandas._config import options
|
||||
|
||||
from geopandas.geoseries import GeoSeries
|
||||
from geopandas.geodataframe import GeoDataFrame
|
||||
from geopandas.array import points_from_xy
|
||||
|
||||
from geopandas.io.file import _read_file as read_file
|
||||
from geopandas.io.arrow import _read_parquet as read_parquet
|
||||
from geopandas.io.arrow import _read_feather as read_feather
|
||||
from geopandas.io.sql import _read_postgis as read_postgis
|
||||
from geopandas.tools import sjoin, sjoin_nearest
|
||||
from geopandas.tools import overlay
|
||||
from geopandas.tools._show_versions import show_versions
|
||||
from geopandas.tools import clip
|
||||
|
||||
|
||||
import geopandas.datasets
|
||||
|
||||
|
||||
# make the interactive namespace easier to use
|
||||
# for `from geopandas import *` demos.
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from . import _version
|
||||
|
||||
__version__ = _version.get_versions()["version"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
251
.venv/lib/python3.12/site-packages/geopandas/_compat.py
Normal file
251
.venv/lib/python3.12/site-packages/geopandas/_compat.py
Normal file
@@ -0,0 +1,251 @@
|
||||
import contextlib
|
||||
from packaging.version import Version
|
||||
import importlib
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import shapely
|
||||
import shapely.geos
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# pandas compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0")
|
||||
PANDAS_GE_15 = Version(pd.__version__) >= Version("1.5.0")
|
||||
PANDAS_GE_20 = Version(pd.__version__) >= Version("2.0.0")
|
||||
PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0")
|
||||
PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0.dev0")
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Shapely / PyGEOS compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
SHAPELY_GE_182 = Version(shapely.__version__) >= Version("1.8.2")
|
||||
SHAPELY_GE_20 = Version(shapely.__version__) >= Version("2.0.0.dev0")
|
||||
SHAPELY_G_20a1 = Version(shapely.__version__) > Version("2.0a1")
|
||||
|
||||
GEOS_GE_390 = shapely.geos.geos_version >= (3, 9, 0)
|
||||
|
||||
|
||||
HAS_PYGEOS = None
|
||||
USE_PYGEOS = None
|
||||
USE_SHAPELY_20 = None
|
||||
PYGEOS_SHAPELY_COMPAT = None
|
||||
|
||||
PYGEOS_GE_09 = None
|
||||
PYGEOS_GE_010 = None
|
||||
|
||||
INSTALL_PYGEOS_ERROR = "To use PyGEOS within GeoPandas, you need to install PyGEOS: \
|
||||
'conda install pygeos' or 'pip install pygeos'"
|
||||
|
||||
try:
|
||||
import pygeos
|
||||
|
||||
# only automatically use pygeos if version is high enough
|
||||
if Version(pygeos.__version__) >= Version("0.8"):
|
||||
HAS_PYGEOS = True
|
||||
PYGEOS_GE_09 = Version(pygeos.__version__) >= Version("0.9")
|
||||
PYGEOS_GE_010 = Version(pygeos.__version__) >= Version("0.10")
|
||||
else:
|
||||
warnings.warn(
|
||||
"The installed version of PyGEOS is too old ({0} installed, 0.8 required),"
|
||||
" and thus GeoPandas will not use PyGEOS.".format(pygeos.__version__),
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
HAS_PYGEOS = False
|
||||
except ImportError:
|
||||
HAS_PYGEOS = False
|
||||
|
||||
|
||||
def set_use_pygeos(val=None):
|
||||
"""
|
||||
Set the global configuration on whether to use PyGEOS or not.
|
||||
|
||||
The default is use PyGEOS if it is installed. This can be overridden
|
||||
with an environment variable USE_PYGEOS (this is only checked at
|
||||
first import, cannot be changed during interactive session).
|
||||
|
||||
Alternatively, pass a value here to force a True/False value.
|
||||
"""
|
||||
global USE_PYGEOS
|
||||
global USE_SHAPELY_20
|
||||
global PYGEOS_SHAPELY_COMPAT
|
||||
|
||||
env_use_pygeos = os.getenv("USE_PYGEOS", None)
|
||||
|
||||
if val is not None:
|
||||
USE_PYGEOS = bool(val)
|
||||
else:
|
||||
if USE_PYGEOS is None:
|
||||
if SHAPELY_GE_20:
|
||||
USE_PYGEOS = False
|
||||
else:
|
||||
USE_PYGEOS = HAS_PYGEOS
|
||||
|
||||
if env_use_pygeos is not None:
|
||||
USE_PYGEOS = bool(int(env_use_pygeos))
|
||||
|
||||
# validate the pygeos version
|
||||
if USE_PYGEOS:
|
||||
try:
|
||||
import pygeos
|
||||
|
||||
# validate the pygeos version
|
||||
if not Version(pygeos.__version__) >= Version("0.8"):
|
||||
if SHAPELY_GE_20:
|
||||
USE_PYGEOS = False
|
||||
warnings.warn(
|
||||
"The PyGEOS version is too old, and Shapely >= 2 is installed, "
|
||||
"thus using Shapely by default and not PyGEOS.",
|
||||
stacklevel=2,
|
||||
)
|
||||
else:
|
||||
raise ImportError(
|
||||
"PyGEOS >= 0.8 is required, version {0} is installed".format(
|
||||
pygeos.__version__
|
||||
)
|
||||
)
|
||||
|
||||
# Check whether Shapely and PyGEOS use the same GEOS version.
|
||||
# Based on PyGEOS from_shapely implementation.
|
||||
|
||||
from shapely.geos import geos_version_string as shapely_geos_version
|
||||
from pygeos import geos_capi_version_string
|
||||
|
||||
# shapely has something like: "3.6.2-CAPI-1.10.2 4d2925d6"
|
||||
# pygeos has something like: "3.6.2-CAPI-1.10.2"
|
||||
if not shapely_geos_version.startswith(geos_capi_version_string):
|
||||
warnings.warn(
|
||||
"The Shapely GEOS version ({}) is incompatible with the GEOS "
|
||||
"version PyGEOS was compiled with ({}). Conversions between both "
|
||||
"will be slow.".format(
|
||||
shapely_geos_version, geos_capi_version_string
|
||||
),
|
||||
stacklevel=2,
|
||||
)
|
||||
PYGEOS_SHAPELY_COMPAT = False
|
||||
else:
|
||||
PYGEOS_SHAPELY_COMPAT = True
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(INSTALL_PYGEOS_ERROR)
|
||||
|
||||
if USE_PYGEOS:
|
||||
warnings.warn(
|
||||
"GeoPandas is set to use PyGEOS over Shapely. PyGEOS support is deprecated"
|
||||
"and will be removed in GeoPandas 1.0, released in the Q1 of 2024. "
|
||||
"Please migrate to Shapely 2.0 "
|
||||
"(https://geopandas.org/en/stable/docs/user_guide/pygeos_to_shapely.html).",
|
||||
DeprecationWarning,
|
||||
stacklevel=6,
|
||||
)
|
||||
|
||||
USE_SHAPELY_20 = (not USE_PYGEOS) and SHAPELY_GE_20
|
||||
|
||||
|
||||
set_use_pygeos()
|
||||
|
||||
|
||||
# compat related to deprecation warnings introduced in Shapely 1.8
|
||||
# -> creating a numpy array from a list-like of Multi-part geometries,
|
||||
# although doing the correct thing (not expanding in its parts), still raises
|
||||
# the warning about iteration being deprecated
|
||||
# This adds a context manager to explicitly ignore this warning
|
||||
|
||||
|
||||
try:
|
||||
from shapely.errors import ShapelyDeprecationWarning as shapely_warning
|
||||
except ImportError:
|
||||
shapely_warning = None
|
||||
|
||||
|
||||
if shapely_warning is not None and not SHAPELY_GE_20:
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ignore_shapely2_warnings():
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", "Iteration|The array interface|__len__", shapely_warning
|
||||
)
|
||||
yield
|
||||
|
||||
elif (Version(np.__version__) >= Version("1.21")) and not SHAPELY_GE_20:
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ignore_shapely2_warnings():
|
||||
with warnings.catch_warnings():
|
||||
# warning from numpy for existing Shapely releases (this is fixed
|
||||
# with Shapely 1.8)
|
||||
warnings.filterwarnings(
|
||||
"ignore", "An exception was ignored while fetching", DeprecationWarning
|
||||
)
|
||||
yield
|
||||
|
||||
else:
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ignore_shapely2_warnings():
|
||||
yield
|
||||
|
||||
|
||||
def import_optional_dependency(name: str, extra: str = ""):
|
||||
"""
|
||||
Import an optional dependency.
|
||||
|
||||
Adapted from pandas.compat._optional::import_optional_dependency
|
||||
|
||||
Raises a formatted ImportError if the module is not present.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
The module name.
|
||||
extra : str
|
||||
Additional text to include in the ImportError message.
|
||||
Returns
|
||||
-------
|
||||
module
|
||||
"""
|
||||
msg = """Missing optional dependency '{name}'. {extra} "
|
||||
"Use pip or conda to install {name}.""".format(
|
||||
name=name, extra=extra
|
||||
)
|
||||
|
||||
if not isinstance(name, str):
|
||||
raise ValueError(
|
||||
"Invalid module name: '{name}'; must be a string".format(name=name)
|
||||
)
|
||||
|
||||
try:
|
||||
module = importlib.import_module(name)
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(msg) from None
|
||||
|
||||
return module
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# RTree compat
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
HAS_RTREE = None
|
||||
RTREE_GE_094 = False
|
||||
try:
|
||||
import rtree # noqa: F401
|
||||
|
||||
HAS_RTREE = True
|
||||
except ImportError:
|
||||
HAS_RTREE = False
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# pyproj compat
|
||||
# -----------------------------------------------------------------------------
|
||||
144
.venv/lib/python3.12/site-packages/geopandas/_config.py
Normal file
144
.venv/lib/python3.12/site-packages/geopandas/_config.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
Lightweight options machinery.
|
||||
|
||||
Based on https://github.com/topper-123/optioneer, but simplified (don't deal
|
||||
with nested options, deprecated options, ..), just the attribute-style dict
|
||||
like holding the options and giving a nice repr.
|
||||
"""
|
||||
from collections import namedtuple
|
||||
import textwrap
|
||||
|
||||
|
||||
Option = namedtuple("Option", "key default_value doc validator callback")
|
||||
|
||||
|
||||
class Options(object):
|
||||
"""Provide attribute-style access to configuration dict."""
|
||||
|
||||
def __init__(self, options):
|
||||
super().__setattr__("_options", options)
|
||||
# populate with default values
|
||||
config = {}
|
||||
for key, option in options.items():
|
||||
config[key] = option.default_value
|
||||
|
||||
super().__setattr__("_config", config)
|
||||
|
||||
def __setattr__(self, key, value):
|
||||
# you can't set new keys
|
||||
if key in self._config:
|
||||
option = self._options[key]
|
||||
if option.validator:
|
||||
option.validator(value)
|
||||
self._config[key] = value
|
||||
if option.callback:
|
||||
option.callback(key, value)
|
||||
else:
|
||||
msg = "You can only set the value of existing options"
|
||||
raise AttributeError(msg)
|
||||
|
||||
def __getattr__(self, key):
|
||||
try:
|
||||
return self._config[key]
|
||||
except KeyError:
|
||||
raise AttributeError("No such option")
|
||||
|
||||
def __dir__(self):
|
||||
return list(self._config.keys())
|
||||
|
||||
def __repr__(self):
|
||||
cls = self.__class__.__name__
|
||||
description = ""
|
||||
for key, option in self._options.items():
|
||||
descr = "{key}: {cur!r} [default: {default!r}]\n".format(
|
||||
key=key, cur=self._config[key], default=option.default_value
|
||||
)
|
||||
description += descr
|
||||
|
||||
if option.doc:
|
||||
doc_text = "\n".join(textwrap.wrap(option.doc, width=70))
|
||||
else:
|
||||
doc_text = "No description available."
|
||||
doc_text = textwrap.indent(doc_text, prefix=" ")
|
||||
description += doc_text + "\n"
|
||||
space = "\n "
|
||||
description = description.replace("\n", space)
|
||||
return "{}({}{})".format(cls, space, description)
|
||||
|
||||
|
||||
def _validate_display_precision(value):
|
||||
if value is not None:
|
||||
if not isinstance(value, int) or not (0 <= value <= 16):
|
||||
raise ValueError("Invalid value, needs to be an integer [0-16]")
|
||||
|
||||
|
||||
display_precision = Option(
|
||||
key="display_precision",
|
||||
default_value=None,
|
||||
doc=(
|
||||
"The precision (maximum number of decimals) of the coordinates in "
|
||||
"the WKT representation in the Series/DataFrame display. "
|
||||
"By default (None), it tries to infer and use 3 decimals for projected "
|
||||
"coordinates and 5 decimals for geographic coordinates."
|
||||
),
|
||||
validator=_validate_display_precision,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
|
||||
def _validate_bool(value):
|
||||
if not isinstance(value, bool):
|
||||
raise TypeError("Expected bool value, got {0}".format(type(value)))
|
||||
|
||||
|
||||
def _default_use_pygeos():
|
||||
import geopandas._compat as compat
|
||||
|
||||
return compat.USE_PYGEOS
|
||||
|
||||
|
||||
def _callback_use_pygeos(key, value):
|
||||
assert key == "use_pygeos"
|
||||
import geopandas._compat as compat
|
||||
|
||||
compat.set_use_pygeos(value)
|
||||
|
||||
|
||||
use_pygeos = Option(
|
||||
key="use_pygeos",
|
||||
default_value=_default_use_pygeos(),
|
||||
doc=(
|
||||
"Whether to use PyGEOS to speed up spatial operations. The default is True "
|
||||
"if PyGEOS is installed, and follows the USE_PYGEOS environment variable "
|
||||
"if set."
|
||||
),
|
||||
validator=_validate_bool,
|
||||
callback=_callback_use_pygeos,
|
||||
)
|
||||
|
||||
|
||||
def _validate_io_engine(value):
|
||||
if value is not None:
|
||||
if value not in ("pyogrio", "fiona"):
|
||||
raise ValueError(f"Expected 'pyogrio' or 'fiona', got '{value}'")
|
||||
|
||||
|
||||
io_engine = Option(
|
||||
key="io_engine",
|
||||
default_value=None,
|
||||
doc=(
|
||||
"The default engine for ``read_file`` and ``to_file``. "
|
||||
"Options are 'pyogrio' and 'fiona'."
|
||||
),
|
||||
validator=_validate_io_engine,
|
||||
callback=None,
|
||||
)
|
||||
|
||||
|
||||
options = Options(
|
||||
{
|
||||
"display_precision": display_precision,
|
||||
"use_pygeos": use_pygeos,
|
||||
"io_engine": io_engine,
|
||||
}
|
||||
)
|
||||
51
.venv/lib/python3.12/site-packages/geopandas/_decorator.py
Normal file
51
.venv/lib/python3.12/site-packages/geopandas/_decorator.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from textwrap import dedent
|
||||
from typing import Callable, Union
|
||||
|
||||
|
||||
# doc decorator function ported with modifications from Pandas
|
||||
# https://github.com/pandas-dev/pandas/blob/master/pandas/util/_decorators.py
|
||||
|
||||
|
||||
def doc(*docstrings: Union[str, Callable], **params) -> Callable:
|
||||
"""
|
||||
A decorator take docstring templates, concatenate them and perform string
|
||||
substitution on it.
|
||||
This decorator will add a variable "_docstring_components" to the wrapped
|
||||
callable to keep track the original docstring template for potential usage.
|
||||
If it should be consider as a template, it will be saved as a string.
|
||||
Otherwise, it will be saved as callable, and later user __doc__ and dedent
|
||||
to get docstring.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*docstrings : str or callable
|
||||
The string / docstring / docstring template to be appended in order
|
||||
after default docstring under callable.
|
||||
**params
|
||||
The string which would be used to format docstring template.
|
||||
"""
|
||||
|
||||
def decorator(decorated: Callable) -> Callable:
|
||||
# collecting docstring and docstring templates
|
||||
docstring_components: list[Union[str, Callable]] = []
|
||||
if decorated.__doc__:
|
||||
docstring_components.append(dedent(decorated.__doc__))
|
||||
|
||||
for docstring in docstrings:
|
||||
if hasattr(docstring, "_docstring_components"):
|
||||
docstring_components.extend(docstring._docstring_components)
|
||||
elif isinstance(docstring, str) or docstring.__doc__:
|
||||
docstring_components.append(docstring)
|
||||
|
||||
# formatting templates and concatenating docstring
|
||||
decorated.__doc__ = "".join(
|
||||
component.format(**params)
|
||||
if isinstance(component, str)
|
||||
else dedent(component.__doc__ or "")
|
||||
for component in docstring_components
|
||||
)
|
||||
|
||||
decorated._docstring_components = docstring_components
|
||||
return decorated
|
||||
|
||||
return decorator
|
||||
1309
.venv/lib/python3.12/site-packages/geopandas/_vectorized.py
Normal file
1309
.venv/lib/python3.12/site-packages/geopandas/_vectorized.py
Normal file
File diff suppressed because it is too large
Load Diff
21
.venv/lib/python3.12/site-packages/geopandas/_version.py
Normal file
21
.venv/lib/python3.12/site-packages/geopandas/_version.py
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
# This file was generated by 'versioneer.py' (0.29) from
|
||||
# revision-control system data, or from the parent directory name of an
|
||||
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
||||
# of this file.
|
||||
|
||||
import json
|
||||
|
||||
version_json = '''
|
||||
{
|
||||
"date": "2023-11-11T10:29:16+0100",
|
||||
"dirty": false,
|
||||
"error": null,
|
||||
"full-revisionid": "9a9f0974db087ce303b94bfbeabc8ea136be0914",
|
||||
"version": "0.14.1"
|
||||
}
|
||||
''' # END VERSION_JSON
|
||||
|
||||
|
||||
def get_versions():
|
||||
return json.loads(version_json)
|
||||
1555
.venv/lib/python3.12/site-packages/geopandas/array.py
Normal file
1555
.venv/lib/python3.12/site-packages/geopandas/array.py
Normal file
File diff suppressed because it is too large
Load Diff
4508
.venv/lib/python3.12/site-packages/geopandas/base.py
Normal file
4508
.venv/lib/python3.12/site-packages/geopandas/base.py
Normal file
File diff suppressed because it is too large
Load Diff
27
.venv/lib/python3.12/site-packages/geopandas/conftest.py
Normal file
27
.venv/lib/python3.12/site-packages/geopandas/conftest.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
import geopandas
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def add_geopandas(doctest_namespace):
|
||||
doctest_namespace["geopandas"] = geopandas
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line(
|
||||
"markers",
|
||||
"skip_no_sindex: skips the tests if there is no spatial index backend",
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
geopandas.sindex._get_sindex_class()
|
||||
has_sindex_backend = True
|
||||
except ImportError:
|
||||
has_sindex_backend = False
|
||||
|
||||
|
||||
def pytest_runtest_setup(item):
|
||||
skip_no_sindex = any(mark for mark in item.iter_markers(name="skip_no_sindex"))
|
||||
if skip_no_sindex and not has_sindex_backend:
|
||||
pytest.skip("Skipped because there is no spatial index backend available")
|
||||
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
|
||||
from warnings import warn
|
||||
|
||||
__all__ = ["available", "get_path"]
|
||||
|
||||
_module_path = os.path.dirname(__file__)
|
||||
_available_dir = [p for p in next(os.walk(_module_path))[1] if not p.startswith("__")]
|
||||
_available_zip = {"nybb": "nybb_16a.zip"}
|
||||
available = _available_dir + list(_available_zip.keys())
|
||||
|
||||
|
||||
def get_path(dataset):
|
||||
"""
|
||||
Get the path to the data file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dataset : str
|
||||
The name of the dataset. See ``geopandas.datasets.available`` for
|
||||
all options.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> geopandas.datasets.get_path("naturalearth_lowres") # doctest: +SKIP
|
||||
'.../python3.8/site-packages/geopandas/datasets/\
|
||||
naturalearth_lowres/naturalearth_lowres.shp'
|
||||
|
||||
"""
|
||||
ne_message = "https://www.naturalearthdata.com/downloads/110m-cultural-vectors/."
|
||||
nybb_message = (
|
||||
"the geodatasets package.\n\nfrom geodatasets import get_path\n"
|
||||
"path_to_file = get_path('nybb')\n"
|
||||
)
|
||||
depr_warning = (
|
||||
"The geopandas.dataset module is deprecated and will be removed in GeoPandas "
|
||||
f"1.0. You can get the original '{dataset}' data from "
|
||||
f"{ne_message if 'natural' in dataset else nybb_message}"
|
||||
)
|
||||
|
||||
if dataset in _available_dir:
|
||||
warn(
|
||||
depr_warning,
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return os.path.abspath(os.path.join(_module_path, dataset, dataset + ".shp"))
|
||||
elif dataset in _available_zip:
|
||||
warn(
|
||||
depr_warning,
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
fpath = os.path.abspath(os.path.join(_module_path, _available_zip[dataset]))
|
||||
return "zip://" + fpath
|
||||
else:
|
||||
msg = "The dataset '{data}' is not available. ".format(data=dataset)
|
||||
msg += "Available datasets are {}".format(", ".join(available))
|
||||
raise ValueError(msg)
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,336 @@
|
||||
|
||||
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" dir="ltr" lang="en-US">
|
||||
|
||||
<head profile="http://gmpg.org/xfn/11">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
|
||||
<title>Populated Places | Natural Earth</title>
|
||||
|
||||
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
|
||||
<link rel="alternate" type="application/rss+xml" title="Natural Earth RSS Feed" href="http://www.naturalearthdata.com/feed/" />
|
||||
<link rel="pingback" href="http://www.naturalearthdata.com/xmlrpc.php" />
|
||||
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/themes/NEV/includes/js/suckerfish.js"></script>
|
||||
<!--[if lt IE 7]>
|
||||
<script src="http://ie7-js.googlecode.com/svn/version/2.0(beta3)/IE7.js" type="text/javascript"></script>
|
||||
<script defer="defer" type="text/javascript" src="http://www.naturalearthdata.com/wp-content/themes/NEV/includes/js/pngfix.js"></script>
|
||||
<![endif]-->
|
||||
<link rel="stylesheet" href="http://www.naturalearthdata.com/wp-content/themes/NEV/style.css" type="text/css" media="screen" />
|
||||
|
||||
<meta name='Admin Management Xtended WordPress plugin' content='2.1.1' />
|
||||
<link rel="alternate" type="application/rss+xml" title="Natural Earth » Populated Places Comments Feed" href="http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-populated-places/feed/" />
|
||||
<link rel='stylesheet' id='sociable-front-css-css' href='http://www.naturalearthdata.com/wp-content/plugins/sociable/sociable.css?ver=2.9.2' type='text/css' media='' />
|
||||
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://www.naturalearthdata.com/xmlrpc.php?rsd" />
|
||||
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://www.naturalearthdata.com/wp-includes/wlwmanifest.xml" />
|
||||
<link rel='index' title='Natural Earth' href='http://www.naturalearthdata.com' />
|
||||
<link rel='start' title='Welcome to the Natural Earth Blog' href='http://www.naturalearthdata.com/blog/miscellaneous/test/' />
|
||||
<link rel='prev' title='Antarctic Ice Shelves' href='http://www.naturalearthdata.com/downloads/10m-physical-vectors/10m-antarctic-ice-shelves/' />
|
||||
<link rel='next' title='Admin 1 – States, Provinces' href='http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-1-states-provinces/' />
|
||||
<meta name="generator" content="WordPress 2.9.2" />
|
||||
|
||||
<!-- All in One SEO Pack 1.6.10.2 by Michael Torbert of Semper Fi Web Design[309,448] -->
|
||||
<meta name="description" content="City and town points, from Tokyo to Wasilla, Cairo to Kandahar About Point symbols with name attributes. Includes all admin-0 and many" />
|
||||
<link rel="canonical" href="http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-populated-places/" />
|
||||
<!-- /all in one seo pack -->
|
||||
|
||||
<!-- begin gallery scripts -->
|
||||
<link rel="stylesheet" href="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/css/jd.gallery.css.php" type="text/css" media="screen" charset="utf-8"/>
|
||||
<link rel="stylesheet" href="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/css/jd.gallery.css" type="text/css" media="screen" charset="utf-8"/>
|
||||
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/scripts/mootools.v1.11.js"></script>
|
||||
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/scripts/jd.gallery.js.php"></script>
|
||||
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/scripts/jd.gallery.transitions.js"></script>
|
||||
<!-- end gallery scripts -->
|
||||
<style type="text/css">.broken_link, a.broken_link {
|
||||
text-decoration: line-through;
|
||||
}</style><link href="http://www.naturalearthdata.com/wp-content/themes/NEV/css/default.css" rel="stylesheet" type="text/css" />
|
||||
<style type="text/css">.recentcomments a{display:inline !important;padding:0 !important;margin:0 !important;}</style>
|
||||
<!--[if lte IE 7]>
|
||||
<link rel="stylesheet" type="text/css" href="http://www.naturalearthdata.com/wp-content/themes/NEV/ie.css" />
|
||||
<![endif]-->
|
||||
<script src="http://www.naturalearthdata.com/wp-content/themes/NEV/js/jquery-1.2.6.min.js" type="text/javascript" charset="utf-8"></script>
|
||||
<script>
|
||||
jQuery.noConflict();
|
||||
</script>
|
||||
<script type="text/javascript" charset="utf-8">
|
||||
$(function(){
|
||||
var tabContainers = $('div#maintabdiv > div');
|
||||
tabContainers.hide().filter('#comments').show();
|
||||
|
||||
$('div#maintabdiv ul#tabnav a').click(function () {
|
||||
tabContainers.hide();
|
||||
tabContainers.filter(this.hash).show();
|
||||
$('div#maintabdiv ul#tabnav a').removeClass('current');
|
||||
$(this).addClass('current');
|
||||
return false;
|
||||
}).filter('#comments').click();
|
||||
|
||||
|
||||
});
|
||||
</script>
|
||||
|
||||
<script type="text/javascript" language="javascript" src="http://www.naturalearthdata.com/dataTables/media/js/jquery.dataTables.js"></script>
|
||||
<script type="text/javascript" charset="utf-8">
|
||||
$(document).ready(function() {
|
||||
$('#ne_table').dataTable();
|
||||
} );
|
||||
</script>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<div id="page">
|
||||
<div id="header">
|
||||
<div id="headerimg">
|
||||
<h1><a href="http://www.naturalearthdata.com/"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/nev_logo.png" alt="Natural Earth title="Natural Earth" /></a></h1>
|
||||
<div class="description">Free vector and raster map data at 1:10m, 1:50m, and 1:110m scales</div>
|
||||
<div class="header_search"><form method="get" id="searchform" action="http://www.naturalearthdata.com/">
|
||||
<label class="hidden" for="s">Search for:</label>
|
||||
<div><input type="text" value="" name="s" id="s" />
|
||||
<input type="submit" id="searchsubmit" value="Search" />
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<!--<div class="translate_panel" style="align:top; margin-left:650px; top:50px;">
|
||||
<div id="google_translate_element" style="float:left;"></div>
|
||||
<script>
|
||||
function googleTranslateElementInit() {
|
||||
new google.translate.TranslateElement({
|
||||
pageLanguage: 'en'
|
||||
}, 'google_translate_element');
|
||||
}
|
||||
</script>
|
||||
<script src="http://translate.google.com/translate_a/element.js?cb=googleTranslateElementInit"></script>
|
||||
</div>-->
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div id="pagemenu" style="align:bottom;">
|
||||
<ul id="page-list" class="clearfix"><li class="page_item page-item-4"><a href="http://www.naturalearthdata.com" title="Home">Home</a></li>
|
||||
<li class="page_item page-item-10"><a href="http://www.naturalearthdata.com/features/" title="Features">Features</a></li>
|
||||
<li class="page_item page-item-12"><a href="http://www.naturalearthdata.com/downloads/" title="Downloads">Downloads</a></li>
|
||||
<li class="page_item page-item-6 current_page_parent"><a href="http://www.naturalearthdata.com/blog/" title="Blog">Blog</a></li>
|
||||
<li class="page_item page-item-14"><a href="http://www.naturalearthdata.com/forums" title="Forums">Forums</a></li>
|
||||
<li class="page_item page-item-366"><a href="http://www.naturalearthdata.com/corrections" title="Corrections">Corrections</a></li>
|
||||
<li class="page_item page-item-16"><a href="http://www.naturalearthdata.com/about/" title="About">About</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<hr /> <div id="main">
|
||||
<div id="content" class="narrowcolumn">
|
||||
|
||||
|
||||
« <a href="http://www.naturalearthdata.com/downloads/10m-cultural-vectors/">1:10m Cultural Vectors</a>
|
||||
<div class="post" id="post-472">
|
||||
<h2>Populated Places</h2>
|
||||
<div class="entry">
|
||||
<div class="downloadPromoBlock" style="float: left;">
|
||||
<div style="float: left; width: 170px;"><img class="alignnone size-full wp-image-1918" title="pop_thumb" src="http://www.naturalearthdata.com/wp-content/uploads/2009/09/pop_thumb.png" alt="pop_thumb" width="150" height="97" /></div>
|
||||
<div style="float: left; width: 410px;"><em>City and town points, from Tokyo to Wasilla, Cairo to Kandahar</em>
|
||||
<div class="download-link-div">
|
||||
<a class="download-link" rel="nofollow" title="Downloaded 26754 times (Shapefile, geoDB, or TIFF format)" onclick="if (window.urchinTracker) urchinTracker ('http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip');" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_populated_places.zip');">Download populated places</a> <span class="download-link-span">(1.51 MB) version 2.0.0</span>
|
||||
</div> <div class="download-link-div">
|
||||
<a class="download-link" rel="nofollow" title="Downloaded 2515 times (Shapefile, geoDB, or TIFF format)" onclick="if (window.urchinTracker) urchinTracker ('http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip');" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_populated_places_simple.zip');">Download simple (less columns)</a> <span class="download-link-span">(719.87 KB) version 2.0.0</span>
|
||||
</div>
|
||||
<span id="more-472"></span></div>
|
||||
</div>
|
||||
<div class="downloadMainBlock" style="float: left;">
|
||||
<p><strong>About</strong></p>
|
||||
<p>Point symbols with name attributes. Includes all admin-0 and many admin-1 capitals, major cities and towns, plus a sampling of smaller towns in sparsely inhabited regions. We favor regional significance over population census in determining our selection of places. Use the scale rankings to filter the number of towns that appear on your map.</p>
|
||||
<p><img class="alignnone size-full wp-image-1920" title="pop_banner" src="http://www.naturalearthdata.com/wp-content/uploads/2009/09/pop_banner.png" alt="pop_banner" width="580" height="150" /></p>
|
||||
<p><a href="http://www.ornl.gov/sci/landscan/" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.ornl.gov/sci/landscan/');">LandScan</a> derived population estimates are provided for 90% of our cities. Those lacking population estimates are often in sparsely inhabited areas. We provide a range of population values that account for the total “metropolitan” population rather than it’s administrative boundary population. Use the PopMax column to size your town labels. Starting in version 1.1, popMax has been throttled down to the UN estimated metro population for the ~500 largest urban areas in the world. This affects towns in China, India, and parts of Africa where our Landscan counting method usually over estimated.</p>
|
||||
<p>Population estimates were derived from the LANDSCAN dataset maintained and distributed by the Oak Ridge National Laboratory. These data were converted from raster to vector and pixels with fewer than 200 persons per square kilometer were removed from the dataset as they were classified as rural. Once urban pixels were selected, these pixels were aggregated into contiguous units. Concurrently Thiessen polygons were created based on the selected city points. The Thiessen polygons were used to intersect the contiguous city boundaries to produce bounded areas for the cities. As a result, our estimates capture a metropolitan and micropolitan populations per city regardless of administrative units.</p>
|
||||
<p>Once intersected, the contiguous polygons were recalculated, using aerial interpolation assuming uniform population distribution within each pixel, to determine the population total. This process was conducted multiple times, for each scale level, to produce population estimates for each city at nested scales of 1:300 million, 1:110 million, 1:50 million, 1:20 million, and 1:10 million. </p>
|
||||
<div class="download-link-div">
|
||||
<a class="download-link" rel="nofollow" title="Downloaded 481 times (Shapefile, geoDB, or TIFF format)" onclick="if (window.urchinTracker) urchinTracker ('http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_urban_areas_landscan.zip');" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_urban_areas_landscan.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_urban_areas_landscan.zip');">Download landscan urban areas</a> <span class="download-link-span">(13.01 MB) version 2.0.0</span>
|
||||
</div>
|
||||
<p><strong>Population ranks</strong></p>
|
||||
<p>Are calculated as rank_max and rank_min using this general VB formula that can be pasted into ArcMap Field Calculator advanced area (set your output to x):</p>
|
||||
<blockquote><p>
|
||||
a = [pop_max]</p>
|
||||
<p>if( a > 10000000 ) then
|
||||
x = 14
|
||||
elseif( a > 5000000 ) then
|
||||
x = 13
|
||||
elseif( a > 1000000 ) then
|
||||
x = 12
|
||||
elseif( a > 500000 ) then
|
||||
x = 11
|
||||
elseif( a > 200000 ) then
|
||||
x = 10
|
||||
elseif( a > 100000 ) then
|
||||
x = 9
|
||||
elseif( a > 50000 ) then
|
||||
x = 8
|
||||
elseif( a > 20000 ) then
|
||||
x = 7
|
||||
elseif( a > 10000 ) then
|
||||
x = 6
|
||||
elseif( a > 5000 ) then
|
||||
x = 5
|
||||
elseif( a > 2000 ) then
|
||||
x = 4
|
||||
elseif( a > 1000 ) then
|
||||
x = 3
|
||||
elseif( a > 200 ) then
|
||||
x = 2
|
||||
elseif( a > 0 ) then
|
||||
x = 1
|
||||
else
|
||||
x = 0
|
||||
end if</p></blockquote>
|
||||
<p><strong>Issues</strong></p>
|
||||
<p>While we don’t want to show every admin-1 capital, for those countries where we show most admin-1 capitals, we should have a complete set. If you find we are missing one, please log it in the Cx tool at right.</p>
|
||||
<p><strong>Version History</strong></p>
|
||||
<ul>
|
||||
<li>
|
||||
<a rel="nofollow" title="Download version 2.0.0 of ne_10m_populated_places.zip" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_populated_places.zip');">2.0.0</a>
|
||||
</li>
|
||||
<li>
|
||||
1.4.0
|
||||
</li>
|
||||
<li>
|
||||
1.3.0
|
||||
</li>
|
||||
<li>
|
||||
1.1.0
|
||||
</li>
|
||||
<li>
|
||||
0.9.0
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p><a href="https://github.com/nvkelso/natural-earth-vector/blob/master/CHANGELOG" onclick="javascript:pageTracker._trackPageview('/outbound/article/https://github.com/nvkelso/natural-earth-vector/blob/master/CHANGELOG');">The master changelog is available on Github »</a>
|
||||
</div>
|
||||
|
||||
<div class="sociable">
|
||||
<div class="sociable_tagline">
|
||||
<strong>Share and Enjoy:</strong>
|
||||
</div>
|
||||
<ul>
|
||||
<li class="sociablefirst"><a rel="nofollow" target="_blank" href="http://twitter.com/home?status=Populated%20Places%20-%20http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://twitter.com/home?status=Populated%20Places%20-%20http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F');" title="Twitter"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Twitter" alt="Twitter" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-343px -55px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://www.facebook.com/share.php?u=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&t=Populated%20Places" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.facebook.com/share.php?u=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&t=Populated%20Places');" title="Facebook"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Facebook" alt="Facebook" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-343px -1px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://digg.com/submit?phase=2&url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&bodytext=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://digg.com/submit?phase=2&url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&bodytext=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="Digg"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Digg" alt="Digg" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-235px -1px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://delicious.com/post?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&notes=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://delicious.com/post?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&notes=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="del.icio.us"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="del.icio.us" alt="del.icio.us" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-199px -1px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://www.google.com/bookmarks/mark?op=edit&bkmk=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&annotation=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.google.com/bookmarks/mark?op=edit&bkmk=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&annotation=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="Google Bookmarks"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Google Bookmarks" alt="Google Bookmarks" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-91px -19px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://slashdot.org/bookmark.pl?title=Populated%20Places&url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://slashdot.org/bookmark.pl?title=Populated%20Places&url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F');" title="Slashdot"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Slashdot" alt="Slashdot" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-145px -55px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places');" title="StumbleUpon"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="StumbleUpon" alt="StumbleUpon" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-217px -55px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="mailto:?subject=Populated%20Places&body=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F" title="email"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="email" alt="email" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-325px -1px" class="sociable-hovers" /></a></li>
|
||||
<li><a rel="nofollow" target="_blank" href="http://www.linkedin.com/shareArticle?mini=true&url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&source=Natural+Earth+Free+vector+and+raster+map+data+at+1%3A10m%2C+1%3A50m%2C+and+1%3A110m+scales&summary=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.linkedin.com/shareArticle?mini=true&url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places&source=Natural+Earth+Free+vector+and+raster+map+data+at+1%3A10m%2C+1%3A50m%2C+and+1%3A110m+scales&summary=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="LinkedIn"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="LinkedIn" alt="LinkedIn" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-1px -37px" class="sociable-hovers" /></a></li>
|
||||
<li class="sociablelast"><a rel="nofollow" target="_blank" href="http://reddit.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://reddit.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&title=Populated%20Places');" title="Reddit"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Reddit" alt="Reddit" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-55px -55px" class="sociable-hovers" /></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
<div id="sidebar">
|
||||
<ul><li id='text-5' class='widget widget_text'><h2 class="widgettitle">Stay up to Date</h2>
|
||||
<div class="textwidget"> Know when a new version of Natural Earth is released by subscribing to our <a href="http://www.naturalearthdata.com/updates/" class="up-to-date-link" >announcement list</a>.</div>
|
||||
</li></ul><ul><li id='text-2' class='widget widget_text'><h2 class="widgettitle">Find a Problem?</h2>
|
||||
<div class="textwidget"><div>
|
||||
<div style="float:left; width:65px;"><a href="/corrections/index.php?a=add"><img class="alignleft" title="New Ticket" src="http://www.naturalearthdata.com/corrections/img/newticket.png" alt="" width="60" height="60" /></a></div><div class="textwidget" style="float:left;width:120px; font-size:1.2em; font-size-adjust:none; font-style:normal;
|
||||
font-variant:normal; font-weight:normal; line-height:normal;">Submit suggestions and bug reports via our <a href="/corrections/index.php?a=add">correction system</a> and track the progress of your edits.</div>
|
||||
</div></div>
|
||||
</li></ul><ul><li id='text-3' class='widget widget_text'><h2 class="widgettitle">Join Our Community</h2>
|
||||
<div class="textwidget"><div>
|
||||
<div style="float:left; width:65px;"><a href="/forums/"><img src="http://www.naturalearthdata.com/wp-content/uploads/2009/08/green_globe_chat_bubble_562e.png" alt="forums" title="Chat in the forum!" width="50" height="50" /></a></div><div class="textwidget" style="float:left;width:120px; font-size:1.2em; font-size-adjust:none; font-style:normal;
|
||||
font-variant:normal; font-weight:normal; line-height:normal;">Talk back and discuss Natural Earth in the <a href="/forums/">Forums</a>.</div>
|
||||
</div></div>
|
||||
</li></ul><ul><li id='text-4' class='widget widget_text'><h2 class="widgettitle">Thank You</h2>
|
||||
<div class="textwidget">Our data downloads are generously hosted by Florida State University.</div>
|
||||
</li></ul> </div>
|
||||
|
||||
</div>
|
||||
|
||||
<hr />
|
||||
<div id="footer">
|
||||
<div id="footerarea">
|
||||
<div id="footerlogos">
|
||||
<p>Supported by:</p>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.nacis.org" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/nacis.png" alt="NACIS" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.cartotalk.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/cartotalk_ad.png" alt="Cartotalk" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.mapgiving.org" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/mapgiving.png" alt="Mapgiving" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.geography.wisc.edu/cartography/" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/wisconsin.png" alt="University of Wisconsin Madison - Cartography Dept." /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.shadedrelief.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/shaded_relief.png" alt="Shaded Relief" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.xnrproductions.com " target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/xnr.png" alt="XNR Productions" /></a>
|
||||
</div>
|
||||
|
||||
<p style="clear:both;"></p>
|
||||
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.freac.fsu.edu" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/fsu.png" alt="Florida State University - FREAC" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.springercartographics.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/scllc.png" alt="Springer Cartographics LLC" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.washingtonpost.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/wpost.png" alt="Washington Post" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.redgeographics.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/redgeo.png" alt="Red Geographics" /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://kelsocartography.com/blog " target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/kelso.png" alt="Kelso Cartography" /></a>
|
||||
</div>
|
||||
|
||||
<p style="clear:both;"></p>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.avenza.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/avenza.png" alt="Avenza Systems Inc." /></a>
|
||||
</div>
|
||||
<div class="footer-ad-box">
|
||||
<a href="http://www.stamen.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/stamen_ne_logo.png" alt="Stamen Design" /></a>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
<p style="clear:both;"></p>
|
||||
<span id="footerleft">
|
||||
© 2012. Natural Earth. All rights reserved.
|
||||
</span>
|
||||
<span id="footerright">
|
||||
<!-- Please help promote WordPress and simpleX. Do not remove -->
|
||||
<div>Powered by <a href="http://wordpress.org/">WordPress</a></div>
|
||||
<div><a href="http://www.naturalearthdata.com/wp-admin">Staff Login »</a></div>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Google Analytics for WordPress | http://yoast.com/wordpress/google-analytics/ -->
|
||||
<script type="text/javascript">
|
||||
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
|
||||
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
|
||||
</script>
|
||||
<script type="text/javascript">
|
||||
try {
|
||||
var pageTracker = _gat._getTracker("UA-10168306-1");
|
||||
} catch(err) {}
|
||||
</script>
|
||||
<script src="http://www.naturalearthdata.com/wp-content/plugins/google-analytics-for-wordpress/custom_se.js" type="text/javascript"></script>
|
||||
<script type="text/javascript">
|
||||
try {
|
||||
// Cookied already:
|
||||
pageTracker._trackPageview();
|
||||
} catch(err) {}
|
||||
</script>
|
||||
<!-- End of Google Analytics code -->
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1 @@
|
||||
2.0.0
|
||||
@@ -0,0 +1 @@
|
||||
ISO-8859-1
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,128 @@
|
||||
"""
|
||||
Script that generates the included dataset 'naturalearth_lowres.shp'
|
||||
and 'naturalearth_cities.shp'.
|
||||
|
||||
Raw data: https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip
|
||||
Current version used: see code
|
||||
"""
|
||||
|
||||
import geopandas as gpd
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
import tempfile
|
||||
from shapely.geometry import box
|
||||
|
||||
version = "latest"
|
||||
urlbase = "https://www.naturalearthdata.com/"
|
||||
urlbase += "http//www.naturalearthdata.com/download/110m/cultural/"
|
||||
|
||||
|
||||
def countries_override(world_raw):
|
||||
# not ideal - fix some country codes
|
||||
mask = world_raw["ISO_A3"].eq("-99") & world_raw["TYPE"].isin(
|
||||
["Sovereign country", "Country"]
|
||||
)
|
||||
world_raw.loc[mask, "ISO_A3"] = world_raw.loc[mask, "ADM0_A3"]
|
||||
# backwards compatibility
|
||||
return world_raw.rename(columns={"GDP_MD": "GDP_MD_EST"})
|
||||
|
||||
|
||||
# any change between versions?
|
||||
def df_same(new, old, dataset, log):
|
||||
assert (new.columns == old.columns).all(), "columns should be the same"
|
||||
if new.shape != old.shape:
|
||||
dfc = old.merge(new, on="name", how="outer", suffixes=("_old", "_new")).loc[
|
||||
lambda d: d.isna().any(axis=1)
|
||||
]
|
||||
log.append(f"### {dataset} row count changed ###\n{dfc.to_markdown()}")
|
||||
return False
|
||||
dfc = new.compare(old)
|
||||
if len(dfc) > 0:
|
||||
log.append(f"### {dataset} data changed ###\n{dfc.to_markdown()}")
|
||||
return len(dfc) == 0
|
||||
|
||||
|
||||
config = [
|
||||
{
|
||||
"file": "ne_110m_populated_places.zip",
|
||||
"cols": ["NAME", "geometry"],
|
||||
"current": gpd.datasets.get_path("naturalearth_cities"),
|
||||
},
|
||||
{
|
||||
"file": "ne_110m_admin_0_countries.zip",
|
||||
"cols": ["POP_EST", "CONTINENT", "NAME", "ISO_A3", "GDP_MD_EST", "geometry"],
|
||||
"override": countries_override,
|
||||
"current": gpd.datasets.get_path("naturalearth_lowres"),
|
||||
},
|
||||
]
|
||||
|
||||
downloads = {}
|
||||
log = []
|
||||
for dl in config:
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
url = urlbase + dl["file"]
|
||||
r = requests.get(
|
||||
url,
|
||||
stream=True,
|
||||
headers={"User-Agent": "XY"},
|
||||
params=None if version == "latest" else {"version": version},
|
||||
)
|
||||
assert (
|
||||
r.status_code == 200
|
||||
), f"version: {version} does not exist. status: {r.status_code}"
|
||||
|
||||
f = Path(tmpdirname).joinpath(dl["file"])
|
||||
with open(f, "wb") as fd:
|
||||
for chunk in r.iter_content(chunk_size=128):
|
||||
fd.write(chunk)
|
||||
# extract the natural earth version
|
||||
z = ZipFile(f)
|
||||
version_f = [i for i in z.infolist() if "VERSION" in i.filename]
|
||||
assert len(version_f) == 1, "failed to find VERSION file"
|
||||
with open(z.extract(version_f[0], Path(tmpdirname).joinpath("v.txt"))) as f_:
|
||||
dl_version = f_.read().strip()
|
||||
|
||||
# extract geodataframe from zip
|
||||
gdf = gpd.read_file(f)
|
||||
# maintain structure that geopandas distributes
|
||||
if "override" in dl.keys():
|
||||
gdf = dl["override"](gdf)
|
||||
gdf = gdf.loc[:, dl["cols"]]
|
||||
gdf = gdf.rename(columns={c: c.lower() for c in gdf.columns})
|
||||
|
||||
# override Crimea #2382
|
||||
if dl["file"] == "ne_110m_admin_0_countries.zip":
|
||||
crimean_bbox = box(32.274, 44.139, 36.65, 46.704)
|
||||
crimea_only = (
|
||||
gdf.loc[gdf.name == "Russia", "geometry"]
|
||||
.iloc[0]
|
||||
.intersection(crimean_bbox)
|
||||
)
|
||||
complete_ukraine = (
|
||||
gdf.loc[gdf.name == "Ukraine", "geometry"].iloc[0].union(crimea_only)
|
||||
)
|
||||
correct_russia = (
|
||||
gdf.loc[gdf.name == "Russia", "geometry"]
|
||||
.iloc[0]
|
||||
.difference(crimean_bbox)
|
||||
)
|
||||
r_ix = gdf.loc[gdf.name == "Russia"].index[0]
|
||||
gdf.at[r_ix, "geometry"] = correct_russia
|
||||
|
||||
u_ix = gdf.loc[gdf.name == "Ukraine"].index[0]
|
||||
gdf.at[u_ix, "geometry"] = complete_ukraine
|
||||
|
||||
# get changes between current version and new version
|
||||
if not df_same(gdf, gpd.read_file(dl["current"]), dl["file"], log):
|
||||
downloads[dl["file"]] = gdf
|
||||
|
||||
|
||||
# create change log that can be pasted into PR
|
||||
with open(f"CHANGE_{dl_version}.md", "w") as f:
|
||||
f.write("\n\n".join(log))
|
||||
|
||||
# save downloaded geodataframe to appropriate place
|
||||
for k, gdf_ in downloads.items():
|
||||
f = [Path(c["current"]) for c in config if c["file"] == k][0]
|
||||
gdf_.to_file(driver="ESRI Shapefile", filename=Path(f.parent.name).joinpath(f.name))
|
||||
@@ -0,0 +1 @@
|
||||
ISO-8859-1
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
1008
.venv/lib/python3.12/site-packages/geopandas/explore.py
Normal file
1008
.venv/lib/python3.12/site-packages/geopandas/explore.py
Normal file
File diff suppressed because it is too large
Load Diff
2484
.venv/lib/python3.12/site-packages/geopandas/geodataframe.py
Normal file
2484
.venv/lib/python3.12/site-packages/geopandas/geodataframe.py
Normal file
File diff suppressed because it is too large
Load Diff
1390
.venv/lib/python3.12/site-packages/geopandas/geoseries.py
Normal file
1390
.venv/lib/python3.12/site-packages/geopandas/geoseries.py
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,73 @@
|
||||
from packaging.version import Version
|
||||
|
||||
import pyarrow
|
||||
|
||||
|
||||
_ERROR_MSG = """\
|
||||
Disallowed deserialization of 'arrow.py_extension_type':
|
||||
storage_type = {storage_type}
|
||||
serialized = {serialized}
|
||||
pickle disassembly:\n{pickle_disassembly}
|
||||
|
||||
Reading of untrusted Parquet or Feather files with a PyExtensionType column
|
||||
allows arbitrary code execution.
|
||||
If you trust this file, you can enable reading the extension type by one of:
|
||||
|
||||
- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)`
|
||||
- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running
|
||||
`import pyarrow_hotfix; pyarrow_hotfix.uninstall()`
|
||||
|
||||
We strongly recommend updating your Parquet/Feather files to use extension types
|
||||
derived from `pyarrow.ExtensionType` instead, and register this type explicitly.
|
||||
See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types
|
||||
for more details.
|
||||
"""
|
||||
|
||||
|
||||
def patch_pyarrow():
|
||||
# starting from pyarrow 14.0.1, it has its own mechanism
|
||||
if Version(pyarrow.__version__) >= Version("14.0.1"):
|
||||
return
|
||||
|
||||
# if the user has pyarrow_hotfix (https://github.com/pitrou/pyarrow-hotfix)
|
||||
# installed, use this instead (which also ensures it works if they had
|
||||
# called `pyarrow_hotfix.uninstall()`)
|
||||
try:
|
||||
import pyarrow_hotfix # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
return
|
||||
|
||||
# if the hotfix is already installed and enabled
|
||||
if getattr(pyarrow, "_hotfix_installed", False):
|
||||
return
|
||||
|
||||
class ForbiddenExtensionType(pyarrow.ExtensionType):
|
||||
def __arrow_ext_serialize__(self):
|
||||
return b""
|
||||
|
||||
@classmethod
|
||||
def __arrow_ext_deserialize__(cls, storage_type, serialized):
|
||||
import io
|
||||
import pickletools
|
||||
|
||||
out = io.StringIO()
|
||||
pickletools.dis(serialized, out)
|
||||
raise RuntimeError(
|
||||
_ERROR_MSG.format(
|
||||
storage_type=storage_type,
|
||||
serialized=serialized,
|
||||
pickle_disassembly=out.getvalue(),
|
||||
)
|
||||
)
|
||||
|
||||
pyarrow.unregister_extension_type("arrow.py_extension_type")
|
||||
pyarrow.register_extension_type(
|
||||
ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type")
|
||||
)
|
||||
|
||||
pyarrow._hotfix_installed = True
|
||||
|
||||
|
||||
patch_pyarrow()
|
||||
687
.venv/lib/python3.12/site-packages/geopandas/io/arrow.py
Normal file
687
.venv/lib/python3.12/site-packages/geopandas/io/arrow.py
Normal file
@@ -0,0 +1,687 @@
|
||||
from packaging.version import Version
|
||||
import json
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
import geopandas._compat as compat
|
||||
from geopandas._compat import import_optional_dependency
|
||||
from geopandas.array import from_wkb
|
||||
from geopandas import GeoDataFrame
|
||||
import geopandas
|
||||
from .file import _expand_user
|
||||
|
||||
METADATA_VERSION = "1.0.0"
|
||||
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0"]
|
||||
# reference: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
# Metadata structure:
|
||||
# {
|
||||
# "geo": {
|
||||
# "columns": {
|
||||
# "<name>": {
|
||||
# "encoding": "WKB"
|
||||
# "geometry_types": <list of str: REQUIRED>
|
||||
# "crs": "<PROJJSON or None: OPTIONAL>",
|
||||
# "orientation": "<'counterclockwise' or None: OPTIONAL>"
|
||||
# "edges": "planar"
|
||||
# "bbox": <list of [xmin, ymin, xmax, ymax]: OPTIONAL>
|
||||
# "epoch": <float: OPTIONAL>
|
||||
# }
|
||||
# },
|
||||
# "primary_column": "<str: REQUIRED>",
|
||||
# "version": "<METADATA_VERSION>",
|
||||
#
|
||||
# # Additional GeoPandas specific metadata (not in metadata spec)
|
||||
# "creator": {
|
||||
# "library": "geopandas",
|
||||
# "version": "<geopandas.__version__>"
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
def _is_fsspec_url(url):
|
||||
return (
|
||||
isinstance(url, str)
|
||||
and "://" in url
|
||||
and not url.startswith(("http://", "https://"))
|
||||
)
|
||||
|
||||
|
||||
def _remove_id_from_member_of_ensembles(json_dict):
|
||||
"""
|
||||
Older PROJ versions will not recognize IDs of datum ensemble members that
|
||||
were added in more recent PROJ database versions.
|
||||
|
||||
Cf https://github.com/opengeospatial/geoparquet/discussions/110
|
||||
and https://github.com/OSGeo/PROJ/pull/3221
|
||||
|
||||
Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872
|
||||
"""
|
||||
for key, value in json_dict.items():
|
||||
if isinstance(value, dict):
|
||||
_remove_id_from_member_of_ensembles(value)
|
||||
elif key == "members" and isinstance(value, list):
|
||||
for member in value:
|
||||
member.pop("id", None)
|
||||
|
||||
|
||||
def _create_metadata(df, schema_version=None):
|
||||
"""Create and encode geo metadata dict.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
|
||||
schema_version = schema_version or METADATA_VERSION
|
||||
|
||||
if schema_version not in SUPPORTED_VERSIONS:
|
||||
raise ValueError(
|
||||
f"schema_version must be one of: {', '.join(SUPPORTED_VERSIONS)}"
|
||||
)
|
||||
|
||||
# Construct metadata for each geometry
|
||||
column_metadata = {}
|
||||
for col in df.columns[df.dtypes == "geometry"]:
|
||||
series = df[col]
|
||||
geometry_types = sorted(Series(series.geom_type.unique()).dropna())
|
||||
if schema_version[0] == "0":
|
||||
geometry_types_name = "geometry_type"
|
||||
if len(geometry_types) == 1:
|
||||
geometry_types = geometry_types[0]
|
||||
else:
|
||||
geometry_types_name = "geometry_types"
|
||||
|
||||
crs = None
|
||||
if series.crs:
|
||||
if schema_version == "0.1.0":
|
||||
crs = series.crs.to_wkt()
|
||||
else: # version >= 0.4.0
|
||||
crs = series.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
|
||||
column_metadata[col] = {
|
||||
"encoding": "WKB",
|
||||
"crs": crs,
|
||||
geometry_types_name: geometry_types,
|
||||
}
|
||||
|
||||
bbox = series.total_bounds.tolist()
|
||||
if np.isfinite(bbox).all():
|
||||
# don't add bbox with NaNs for empty / all-NA geometry column
|
||||
column_metadata[col]["bbox"] = bbox
|
||||
|
||||
return {
|
||||
"primary_column": df._geometry_column_name,
|
||||
"columns": column_metadata,
|
||||
"version": schema_version or METADATA_VERSION,
|
||||
"creator": {"library": "geopandas", "version": geopandas.__version__},
|
||||
}
|
||||
|
||||
|
||||
def _encode_metadata(metadata):
|
||||
"""Encode metadata dict to UTF-8 JSON string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata : dict
|
||||
|
||||
Returns
|
||||
-------
|
||||
UTF-8 encoded JSON string
|
||||
"""
|
||||
return json.dumps(metadata).encode("utf-8")
|
||||
|
||||
|
||||
def _decode_metadata(metadata_str):
|
||||
"""Decode a UTF-8 encoded JSON string to dict
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata_str : string (UTF-8 encoded)
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
if metadata_str is None:
|
||||
return None
|
||||
|
||||
return json.loads(metadata_str.decode("utf-8"))
|
||||
|
||||
|
||||
def _validate_dataframe(df):
|
||||
"""Validate that the GeoDataFrame conforms to requirements for writing
|
||||
to Parquet format.
|
||||
|
||||
Raises `ValueError` if the GeoDataFrame is not valid.
|
||||
|
||||
copied from `pandas.io.parquet`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame
|
||||
"""
|
||||
|
||||
if not isinstance(df, DataFrame):
|
||||
raise ValueError("Writing to Parquet/Feather only supports IO with DataFrames")
|
||||
|
||||
# must have value column names (strings only)
|
||||
if df.columns.inferred_type not in {"string", "unicode", "empty"}:
|
||||
raise ValueError("Writing to Parquet/Feather requires string column names")
|
||||
|
||||
# index level names must be strings
|
||||
valid_names = all(
|
||||
isinstance(name, str) for name in df.index.names if name is not None
|
||||
)
|
||||
if not valid_names:
|
||||
raise ValueError("Index level names must be strings")
|
||||
|
||||
|
||||
def _validate_metadata(metadata):
|
||||
"""Validate geo metadata.
|
||||
Must not be empty, and must contain the structure specified above.
|
||||
|
||||
Raises ValueError if metadata is not valid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metadata : dict
|
||||
"""
|
||||
|
||||
if not metadata:
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
# version was schema_version in 0.1.0
|
||||
version = metadata.get("version", metadata.get("schema_version"))
|
||||
if not version:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key: "
|
||||
"'version'"
|
||||
)
|
||||
|
||||
required_keys = ("primary_column", "columns")
|
||||
for key in required_keys:
|
||||
if metadata.get(key, None) is None:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key: "
|
||||
"'{key}'".format(key=key)
|
||||
)
|
||||
|
||||
if not isinstance(metadata["columns"], dict):
|
||||
raise ValueError("'columns' in 'geo' metadata must be a dict")
|
||||
|
||||
# Validate that geometry columns have required metadata and values
|
||||
# leaving out "geometry_type" for compatibility with 0.1
|
||||
required_col_keys = ("encoding",)
|
||||
for col, column_metadata in metadata["columns"].items():
|
||||
for key in required_col_keys:
|
||||
if key not in column_metadata:
|
||||
raise ValueError(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key "
|
||||
"'{key}' for column '{col}'".format(key=key, col=col)
|
||||
)
|
||||
|
||||
if column_metadata["encoding"] != "WKB":
|
||||
raise ValueError("Only WKB geometry encoding is supported")
|
||||
|
||||
if column_metadata.get("edges", "planar") == "spherical":
|
||||
warnings.warn(
|
||||
f"The geo metadata indicate that column '{col}' has spherical edges, "
|
||||
"but because GeoPandas currently does not support spherical "
|
||||
"geometry, it ignores this metadata and will interpret the edges of "
|
||||
"the geometries as planar.",
|
||||
UserWarning,
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
|
||||
def _geopandas_to_arrow(df, index=None, schema_version=None):
|
||||
"""
|
||||
Helper function with main, shared logic for to_parquet/to_feather.
|
||||
"""
|
||||
from pyarrow import Table
|
||||
|
||||
_validate_dataframe(df)
|
||||
|
||||
# create geo metadata before altering incoming data frame
|
||||
geo_metadata = _create_metadata(df, schema_version=schema_version)
|
||||
|
||||
kwargs = {}
|
||||
if compat.USE_SHAPELY_20:
|
||||
kwargs = {"flavor": "iso"}
|
||||
else:
|
||||
for col in df.columns[df.dtypes == "geometry"]:
|
||||
series = df[col]
|
||||
if series.has_z.any():
|
||||
warnings.warn(
|
||||
"The GeoDataFrame contains 3D geometries, and when using "
|
||||
"shapely < 2.0, such geometries will be written not exactly "
|
||||
"following to the GeoParquet spec (not using ISO WKB). For "
|
||||
"most use cases this should not be a problem (GeoPandas can "
|
||||
"read such files fine).",
|
||||
stacklevel=2,
|
||||
)
|
||||
break
|
||||
df = df.to_wkb(**kwargs)
|
||||
|
||||
table = Table.from_pandas(df, preserve_index=index)
|
||||
|
||||
# Store geopandas specific file-level metadata
|
||||
# This must be done AFTER creating the table or it is not persisted
|
||||
metadata = table.schema.metadata
|
||||
metadata.update({b"geo": _encode_metadata(geo_metadata)})
|
||||
|
||||
return table.replace_schema_metadata(metadata)
|
||||
|
||||
|
||||
def _to_parquet(
|
||||
df, path, index=None, compression="snappy", schema_version=None, **kwargs
|
||||
):
|
||||
"""
|
||||
Write a GeoDataFrame to the Parquet format.
|
||||
|
||||
Any geometry columns present are serialized to WKB format in the file.
|
||||
|
||||
Requires 'pyarrow'.
|
||||
|
||||
This is tracking version 1.0.0 of the GeoParquet specification at:
|
||||
https://github.com/opengeospatial/geoparquet. Writing older versions is
|
||||
supported using the `schema_version` keyword.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
|
||||
Name of the compression to use. Use ``None`` for no compression.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
**kwargs
|
||||
Additional keyword arguments passed to pyarrow.parquet.write_table().
|
||||
"""
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
|
||||
if kwargs and "version" in kwargs and kwargs["version"] is not None:
|
||||
if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
|
||||
warnings.warn(
|
||||
"the `version` parameter has been replaced with `schema_version`. "
|
||||
"`version` will instead be passed directly to the underlying "
|
||||
"parquet writer unless `version` is 0.1.0 or 0.4.0.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
schema_version = kwargs.pop("version")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
|
||||
parquet.write_table(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _to_feather(df, path, index=None, compression=None, schema_version=None, **kwargs):
|
||||
"""
|
||||
Write a GeoDataFrame to the Feather format.
|
||||
|
||||
Any geometry columns present are serialized to WKB format in the file.
|
||||
|
||||
Requires 'pyarrow' >= 0.17.
|
||||
|
||||
This is tracking version 1.0.0 of the GeoParquet specification for
|
||||
the metadata at: https://github.com/opengeospatial/geoparquet. Writing
|
||||
older versions is supported using the `schema_version` keyword.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
index : bool, default None
|
||||
If ``True``, always include the dataframe's index(es) as columns
|
||||
in the file output.
|
||||
If ``False``, the index(es) will not be written to the file.
|
||||
If ``None``, the index(ex) will be included as columns in the file
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'zstd', 'lz4', 'uncompressed'}, optional
|
||||
Name of the compression to use. Use ``"uncompressed"`` for no
|
||||
compression. By default uses LZ4 if available, otherwise uncompressed.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version for the metadata; if not provided
|
||||
will default to latest supported version.
|
||||
kwargs
|
||||
Additional keyword arguments passed to pyarrow.feather.write_feather().
|
||||
"""
|
||||
feather = import_optional_dependency(
|
||||
"pyarrow.feather", extra="pyarrow is required for Feather support."
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
if kwargs and "version" in kwargs and kwargs["version"] is not None:
|
||||
if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
|
||||
warnings.warn(
|
||||
"the `version` parameter has been replaced with `schema_version`. "
|
||||
"`version` will instead be passed directly to the underlying "
|
||||
"feather writer unless `version` is 0.1.0 or 0.4.0.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
schema_version = kwargs.pop("version")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
|
||||
feather.write_feather(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _arrow_to_geopandas(table, metadata=None):
|
||||
"""
|
||||
Helper function with main, shared logic for read_parquet/read_feather.
|
||||
"""
|
||||
df = table.to_pandas()
|
||||
|
||||
metadata = metadata or table.schema.metadata
|
||||
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
raise ValueError(
|
||||
"""Missing geo metadata in Parquet/Feather file.
|
||||
Use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
try:
|
||||
metadata = _decode_metadata(metadata.get(b"geo", b""))
|
||||
|
||||
except (TypeError, json.decoder.JSONDecodeError):
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
_validate_metadata(metadata)
|
||||
|
||||
# Find all geometry columns that were read from the file. May
|
||||
# be a subset if 'columns' parameter is used.
|
||||
geometry_columns = df.columns.intersection(metadata["columns"])
|
||||
|
||||
if not len(geometry_columns):
|
||||
raise ValueError(
|
||||
"""No geometry columns are included in the columns read from
|
||||
the Parquet/Feather file. To read this file without geometry columns,
|
||||
use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
geometry = metadata["primary_column"]
|
||||
|
||||
# Missing geometry likely indicates a subset of columns was read;
|
||||
# promote the first available geometry to the primary geometry.
|
||||
if len(geometry_columns) and geometry not in geometry_columns:
|
||||
geometry = geometry_columns[0]
|
||||
|
||||
# if there are multiple non-primary geometry columns, raise a warning
|
||||
if len(geometry_columns) > 1:
|
||||
warnings.warn(
|
||||
"Multiple non-primary geometry columns read from Parquet/Feather "
|
||||
"file. The first column read was promoted to the primary geometry.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
# Convert the WKB columns that are present back to geometry.
|
||||
for col in geometry_columns:
|
||||
col_metadata = metadata["columns"][col]
|
||||
if "crs" in col_metadata:
|
||||
crs = col_metadata["crs"]
|
||||
if isinstance(crs, dict):
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
else:
|
||||
# per the GeoParquet spec, missing CRS is to be interpreted as
|
||||
# OGC:CRS84
|
||||
crs = "OGC:CRS84"
|
||||
|
||||
df[col] = from_wkb(df[col].values, crs=crs)
|
||||
|
||||
return GeoDataFrame(df, geometry=geometry)
|
||||
|
||||
|
||||
def _get_filesystem_path(path, filesystem=None, storage_options=None):
|
||||
"""
|
||||
Get the filesystem and path for a given filesystem and path.
|
||||
|
||||
If the filesystem is not None then it's just returned as is.
|
||||
"""
|
||||
import pyarrow
|
||||
|
||||
if (
|
||||
isinstance(path, str)
|
||||
and storage_options is None
|
||||
and filesystem is None
|
||||
and Version(pyarrow.__version__) >= Version("5.0.0")
|
||||
):
|
||||
# Use the native pyarrow filesystem if possible.
|
||||
try:
|
||||
from pyarrow.fs import FileSystem
|
||||
|
||||
filesystem, path = FileSystem.from_uri(path)
|
||||
except Exception:
|
||||
# fallback to use get_handle / fsspec for filesystems
|
||||
# that pyarrow doesn't support
|
||||
pass
|
||||
|
||||
if _is_fsspec_url(path) and filesystem is None:
|
||||
fsspec = import_optional_dependency(
|
||||
"fsspec", extra="fsspec is requred for 'storage_options'."
|
||||
)
|
||||
filesystem, path = fsspec.core.url_to_fs(path, **(storage_options or {}))
|
||||
|
||||
if filesystem is None and storage_options:
|
||||
raise ValueError(
|
||||
"Cannot provide 'storage_options' with non-fsspec path '{}'".format(path)
|
||||
)
|
||||
|
||||
return filesystem, path
|
||||
|
||||
|
||||
def _ensure_arrow_fs(filesystem):
|
||||
"""
|
||||
Simplified version of pyarrow.fs._ensure_filesystem. This is only needed
|
||||
below because `pyarrow.parquet.read_metadata` does not yet accept a
|
||||
filesystem keyword (https://issues.apache.org/jira/browse/ARROW-16719)
|
||||
"""
|
||||
from pyarrow import fs
|
||||
|
||||
if isinstance(filesystem, fs.FileSystem):
|
||||
return filesystem
|
||||
|
||||
# handle fsspec-compatible filesystems
|
||||
try:
|
||||
import fsspec
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(filesystem, fsspec.AbstractFileSystem):
|
||||
return fs.PyFileSystem(fs.FSSpecHandler(filesystem))
|
||||
|
||||
return filesystem
|
||||
|
||||
|
||||
def _read_parquet(path, columns=None, storage_options=None, **kwargs):
|
||||
"""
|
||||
Load a Parquet object from the file path, returning a GeoDataFrame.
|
||||
|
||||
You can read a subset of columns in the file using the ``columns`` parameter.
|
||||
However, the structure of the returned GeoDataFrame will depend on which
|
||||
columns you read:
|
||||
|
||||
* if no geometry columns are read, this will raise a ``ValueError`` - you
|
||||
should use the pandas `read_parquet` method instead.
|
||||
* if the primary geometry column saved to this file is not included in
|
||||
columns, the first available geometry column will be set as the geometry
|
||||
column of the returned GeoDataFrame.
|
||||
|
||||
Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
|
||||
specification at: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
If 'crs' key is not present in the GeoParquet metadata associated with the
|
||||
Parquet object, it will default to "OGC:CRS84" according to the specification.
|
||||
|
||||
Requires 'pyarrow'.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
columns : list-like of strings, default=None
|
||||
If not None, only these columns will be read from the file. If
|
||||
the primary geometry column is not included, the first secondary
|
||||
geometry read from the file will be set as the geometry column
|
||||
of the returned GeoDataFrame. If no geometry columns are present,
|
||||
a ``ValueError`` will be raised.
|
||||
storage_options : dict, optional
|
||||
Extra options that make sense for a particular storage connection, e.g. host,
|
||||
port, username, password, etc. For HTTP(S) URLs the key-value pairs are
|
||||
forwarded to urllib as header options. For other URLs (e.g. starting with
|
||||
"s3://", and "gcs://") the key-value pairs are forwarded to fsspec. Please
|
||||
see fsspec and urllib for more details.
|
||||
|
||||
When no storage options are provided and a filesystem is implemented by
|
||||
both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
|
||||
filesystem is preferred. Provide the instantiated fsspec filesystem using
|
||||
the ``filesystem`` keyword if you wish to use its implementation.
|
||||
**kwargs
|
||||
Any additional kwargs passed to pyarrow.parquet.read_table().
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_parquet("data.parquet") # doctest: +SKIP
|
||||
|
||||
Specifying columns to read:
|
||||
|
||||
>>> df = geopandas.read_parquet(
|
||||
... "data.parquet",
|
||||
... columns=["geometry", "pop_est"]
|
||||
... ) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
# TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
|
||||
# adds filesystem as a keyword and match that.
|
||||
filesystem = kwargs.pop("filesystem", None)
|
||||
filesystem, path = _get_filesystem_path(
|
||||
path, filesystem=filesystem, storage_options=storage_options
|
||||
)
|
||||
|
||||
path = _expand_user(path)
|
||||
kwargs["use_pandas_metadata"] = True
|
||||
table = parquet.read_table(path, columns=columns, filesystem=filesystem, **kwargs)
|
||||
|
||||
# read metadata separately to get the raw Parquet FileMetaData metadata
|
||||
# (pyarrow doesn't properly exposes those in schema.metadata for files
|
||||
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
|
||||
metadata = None
|
||||
if table.schema.metadata is None or b"geo" not in table.schema.metadata:
|
||||
try:
|
||||
# read_metadata does not accept a filesystem keyword, so need to
|
||||
# handle this manually (https://issues.apache.org/jira/browse/ARROW-16719)
|
||||
if filesystem is not None:
|
||||
pa_filesystem = _ensure_arrow_fs(filesystem)
|
||||
with pa_filesystem.open_input_file(path) as source:
|
||||
metadata = parquet.read_metadata(source).metadata
|
||||
else:
|
||||
metadata = parquet.read_metadata(path).metadata
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return _arrow_to_geopandas(table, metadata)
|
||||
|
||||
|
||||
def _read_feather(path, columns=None, **kwargs):
|
||||
"""
|
||||
Load a Feather object from the file path, returning a GeoDataFrame.
|
||||
|
||||
You can read a subset of columns in the file using the ``columns`` parameter.
|
||||
However, the structure of the returned GeoDataFrame will depend on which
|
||||
columns you read:
|
||||
|
||||
* if no geometry columns are read, this will raise a ``ValueError`` - you
|
||||
should use the pandas `read_feather` method instead.
|
||||
* if the primary geometry column saved to this file is not included in
|
||||
columns, the first available geometry column will be set as the geometry
|
||||
column of the returned GeoDataFrame.
|
||||
|
||||
Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
|
||||
specification at: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
If 'crs' key is not present in the Feather metadata associated with the
|
||||
Parquet object, it will default to "OGC:CRS84" according to the specification.
|
||||
|
||||
Requires 'pyarrow' >= 0.17.
|
||||
|
||||
.. versionadded:: 0.8
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str, path object
|
||||
columns : list-like of strings, default=None
|
||||
If not None, only these columns will be read from the file. If
|
||||
the primary geometry column is not included, the first secondary
|
||||
geometry read from the file will be set as the geometry column
|
||||
of the returned GeoDataFrame. If no geometry columns are present,
|
||||
a ``ValueError`` will be raised.
|
||||
**kwargs
|
||||
Any additional kwargs passed to pyarrow.feather.read_table().
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_feather("data.feather") # doctest: +SKIP
|
||||
|
||||
Specifying columns to read:
|
||||
|
||||
>>> df = geopandas.read_feather(
|
||||
... "data.feather",
|
||||
... columns=["geometry", "pop_est"]
|
||||
... ) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
feather = import_optional_dependency(
|
||||
"pyarrow.feather", extra="pyarrow is required for Feather support."
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = feather.read_table(path, columns=columns, **kwargs)
|
||||
return _arrow_to_geopandas(table)
|
||||
734
.venv/lib/python3.12/site-packages/geopandas/io/file.py
Normal file
734
.venv/lib/python3.12/site-packages/geopandas/io/file.py
Normal file
@@ -0,0 +1,734 @@
|
||||
import os
|
||||
from packaging.version import Version
|
||||
from pathlib import Path
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_integer_dtype
|
||||
|
||||
import pyproj
|
||||
from shapely.geometry import mapping
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
|
||||
# Adapted from pandas.io.common
|
||||
from urllib.parse import urlparse as parse_url
|
||||
from urllib.parse import uses_netloc, uses_params, uses_relative
|
||||
import urllib.request
|
||||
|
||||
|
||||
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
|
||||
_VALID_URLS.discard("")
|
||||
# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
|
||||
_VALID_URLS.discard("file")
|
||||
|
||||
|
||||
fiona = None
|
||||
fiona_env = None
|
||||
fiona_import_error = None
|
||||
FIONA_GE_19 = False
|
||||
|
||||
|
||||
def _import_fiona():
|
||||
global fiona
|
||||
global fiona_env
|
||||
global fiona_import_error
|
||||
global FIONA_GE_19
|
||||
|
||||
if fiona is None:
|
||||
try:
|
||||
import fiona
|
||||
|
||||
# only try to import fiona.Env if the main fiona import succeeded
|
||||
# (otherwise you can get confusing "AttributeError: module 'fiona'
|
||||
# has no attribute '_loading'" / partially initialized module errors)
|
||||
try:
|
||||
from fiona import Env as fiona_env
|
||||
except ImportError:
|
||||
try:
|
||||
from fiona import drivers as fiona_env
|
||||
except ImportError:
|
||||
fiona_env = None
|
||||
|
||||
FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
|
||||
"1.9.0"
|
||||
)
|
||||
except ImportError as err:
|
||||
fiona = False
|
||||
fiona_import_error = str(err)
|
||||
|
||||
|
||||
pyogrio = None
|
||||
pyogrio_import_error = None
|
||||
|
||||
|
||||
def _import_pyogrio():
|
||||
global pyogrio
|
||||
global pyogrio_import_error
|
||||
|
||||
if pyogrio is None:
|
||||
try:
|
||||
import pyogrio
|
||||
except ImportError as err:
|
||||
pyogrio = False
|
||||
pyogrio_import_error = str(err)
|
||||
|
||||
|
||||
def _check_fiona(func):
|
||||
if fiona is None:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'fiona' package, but it is not installed or does "
|
||||
f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
|
||||
)
|
||||
|
||||
|
||||
def _check_pyogrio(func):
|
||||
if pyogrio is None:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'pyogrio' package, but it is not installed "
|
||||
"or does not import correctly."
|
||||
"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
)
|
||||
|
||||
|
||||
def _check_engine(engine, func):
|
||||
# if not specified through keyword or option, then default to "fiona" if
|
||||
# installed, otherwise try pyogrio
|
||||
if engine is None:
|
||||
import geopandas
|
||||
|
||||
engine = geopandas.options.io_engine
|
||||
|
||||
if engine is None:
|
||||
_import_fiona()
|
||||
if fiona:
|
||||
engine = "fiona"
|
||||
else:
|
||||
_import_pyogrio()
|
||||
if pyogrio:
|
||||
engine = "pyogrio"
|
||||
|
||||
if engine == "fiona":
|
||||
_import_fiona()
|
||||
_check_fiona(func)
|
||||
elif engine == "pyogrio":
|
||||
_import_pyogrio()
|
||||
_check_pyogrio(func)
|
||||
elif engine is None:
|
||||
raise ImportError(
|
||||
f"The {func} requires the 'pyogrio' or 'fiona' package, "
|
||||
"but neither is installed or imports correctly."
|
||||
f"\nImporting fiona resulted in: {fiona_import_error}"
|
||||
f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
_EXTENSION_TO_DRIVER = {
|
||||
".bna": "BNA",
|
||||
".dxf": "DXF",
|
||||
".csv": "CSV",
|
||||
".shp": "ESRI Shapefile",
|
||||
".dbf": "ESRI Shapefile",
|
||||
".json": "GeoJSON",
|
||||
".geojson": "GeoJSON",
|
||||
".geojsonl": "GeoJSONSeq",
|
||||
".geojsons": "GeoJSONSeq",
|
||||
".gpkg": "GPKG",
|
||||
".gml": "GML",
|
||||
".xml": "GML",
|
||||
".gpx": "GPX",
|
||||
".gtm": "GPSTrackMaker",
|
||||
".gtz": "GPSTrackMaker",
|
||||
".tab": "MapInfo File",
|
||||
".mif": "MapInfo File",
|
||||
".mid": "MapInfo File",
|
||||
".dgn": "DGN",
|
||||
".fgb": "FlatGeobuf",
|
||||
}
|
||||
|
||||
|
||||
def _expand_user(path):
|
||||
"""Expand paths that use ~."""
|
||||
if isinstance(path, str):
|
||||
path = os.path.expanduser(path)
|
||||
elif isinstance(path, Path):
|
||||
path = path.expanduser()
|
||||
return path
|
||||
|
||||
|
||||
def _is_url(url):
|
||||
"""Check to see if *url* has a valid protocol."""
|
||||
try:
|
||||
return parse_url(url).scheme in _VALID_URLS
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _is_zip(path):
|
||||
"""Check if a given path is a zipfile"""
|
||||
parsed = fiona.path.ParsedPath.from_uri(path)
|
||||
return (
|
||||
parsed.archive.endswith(".zip")
|
||||
if parsed.archive
|
||||
else parsed.path.endswith(".zip")
|
||||
)
|
||||
|
||||
|
||||
def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs):
|
||||
"""
|
||||
Returns a GeoDataFrame from a file or URL.
|
||||
|
||||
.. note::
|
||||
|
||||
GeoPandas currently defaults to use Fiona as the engine in ``read_file``.
|
||||
However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
|
||||
pyogrio can provide a significant speedup compared to Fiona. We recommend to
|
||||
already install pyogrio and specify the engine by using the ``engine`` keyword
|
||||
(``geopandas.read_file(..., engine="pyogrio")``), or by setting the default for
|
||||
the ``engine`` keyword globally with::
|
||||
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
Either the absolute or relative path to the file or URL to
|
||||
be opened, or any object with a read() method (such as an open file
|
||||
or StringIO)
|
||||
bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None
|
||||
Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely
|
||||
geometry. With engine="fiona", CRS mis-matches are resolved if given a GeoSeries
|
||||
or GeoDataFrame. With engine="pyogrio", bbox must be in the same CRS as the
|
||||
dataset. Tuple is (minx, miny, maxx, maxy) to match the bounds property of
|
||||
shapely geometry objects. Cannot be used with mask.
|
||||
mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None
|
||||
Filter for features that intersect with the given dict-like geojson
|
||||
geometry, GeoSeries, GeoDataFrame or shapely geometry.
|
||||
CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
|
||||
Cannot be used with bbox.
|
||||
rows : int or slice, default None
|
||||
Load in specific rows by passing an integer (first `n` rows) or a
|
||||
slice() object.
|
||||
engine : str, "fiona" or "pyogrio"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
|
||||
installed, otherwise tries "pyogrio".
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine. In case of the "fiona" engine,
|
||||
the keyword arguments are passed to :func:`fiona.open` or
|
||||
:class:`fiona.collection.BytesCollection` when opening the file.
|
||||
For more information on possible keywords, type:
|
||||
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
|
||||
the keyword arguments are passed to :func:`pyogrio.read_dataframe`.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = geopandas.read_file("nybb.shp") # doctest: +SKIP
|
||||
|
||||
Specifying layer of GPKG:
|
||||
|
||||
>>> df = geopandas.read_file("file.gpkg", layer='cities') # doctest: +SKIP
|
||||
|
||||
Reading only first 10 rows:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", rows=10) # doctest: +SKIP
|
||||
|
||||
Reading only geometries intersecting ``mask``:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", mask=polygon) # doctest: +SKIP
|
||||
|
||||
Reading only geometries intersecting ``bbox``:
|
||||
|
||||
>>> df = geopandas.read_file("nybb.shp", bbox=(0, 0, 10, 20)) # doctest: +SKIP
|
||||
|
||||
Returns
|
||||
-------
|
||||
:obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
|
||||
If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The format drivers will attempt to detect the encoding of your data, but
|
||||
may fail. In this case, the proper encoding can be specified explicitly
|
||||
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
|
||||
|
||||
When specifying a URL, geopandas will check if the server supports reading
|
||||
partial data and in that case pass the URL as is to the underlying engine,
|
||||
which will then use the network file system handler of GDAL to read from
|
||||
the URL. Otherwise geopandas will download the data from the URL and pass
|
||||
all data in-memory to the underlying engine.
|
||||
If you need more control over how the URL is read, you can specify the
|
||||
GDAL virtual filesystem manually (e.g. ``/vsicurl/https://...``). See the
|
||||
GDAL documentation on filesystems for more details
|
||||
(https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access).
|
||||
|
||||
"""
|
||||
engine = _check_engine(engine, "'read_file' function")
|
||||
|
||||
filename = _expand_user(filename)
|
||||
|
||||
from_bytes = False
|
||||
if _is_url(filename):
|
||||
# if it is a url that supports random access -> pass through to
|
||||
# pyogrio/fiona as is (to support downloading only part of the file)
|
||||
# otherwise still download manually because pyogrio/fiona don't support
|
||||
# all types of urls (https://github.com/geopandas/geopandas/issues/2908)
|
||||
with urllib.request.urlopen(filename) as response:
|
||||
if not response.headers.get("Accept-Ranges") == "bytes":
|
||||
filename = response.read()
|
||||
from_bytes = True
|
||||
|
||||
if engine == "pyogrio":
|
||||
return _read_file_pyogrio(filename, bbox=bbox, mask=mask, rows=rows, **kwargs)
|
||||
|
||||
elif engine == "fiona":
|
||||
if pd.api.types.is_file_like(filename):
|
||||
data = filename.read()
|
||||
path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data
|
||||
from_bytes = True
|
||||
else:
|
||||
path_or_bytes = filename
|
||||
|
||||
return _read_file_fiona(
|
||||
path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _read_file_fiona(
|
||||
path_or_bytes, from_bytes, bbox=None, mask=None, rows=None, where=None, **kwargs
|
||||
):
|
||||
if where is not None and not FIONA_GE_19:
|
||||
raise NotImplementedError("where requires fiona 1.9+")
|
||||
|
||||
if not from_bytes:
|
||||
# Opening a file via URL or file-like-object above automatically detects a
|
||||
# zipped file. In order to match that behavior, attempt to add a zip scheme
|
||||
# if missing.
|
||||
if _is_zip(str(path_or_bytes)):
|
||||
parsed = fiona.parse_path(str(path_or_bytes))
|
||||
if isinstance(parsed, fiona.path.ParsedPath):
|
||||
# If fiona is able to parse the path, we can safely look at the scheme
|
||||
# and update it to have a zip scheme if necessary.
|
||||
schemes = (parsed.scheme or "").split("+")
|
||||
if "zip" not in schemes:
|
||||
parsed.scheme = "+".join(["zip"] + schemes)
|
||||
path_or_bytes = parsed.name
|
||||
elif isinstance(parsed, fiona.path.UnparsedPath) and not str(
|
||||
path_or_bytes
|
||||
).startswith("/vsi"):
|
||||
# If fiona is unable to parse the path, it might have a Windows drive
|
||||
# scheme. Try adding zip:// to the front. If the path starts with "/vsi"
|
||||
# it is a legacy GDAL path type, so let it pass unmodified.
|
||||
path_or_bytes = "zip://" + parsed.name
|
||||
|
||||
if from_bytes:
|
||||
reader = fiona.BytesCollection
|
||||
else:
|
||||
reader = fiona.open
|
||||
|
||||
with fiona_env():
|
||||
with reader(path_or_bytes, **kwargs) as features:
|
||||
crs = features.crs_wkt
|
||||
# attempt to get EPSG code
|
||||
try:
|
||||
# fiona 1.9+
|
||||
epsg = features.crs.to_epsg(confidence_threshold=100)
|
||||
if epsg is not None:
|
||||
crs = epsg
|
||||
except AttributeError:
|
||||
# fiona <= 1.8
|
||||
try:
|
||||
crs = features.crs["init"]
|
||||
except (TypeError, KeyError):
|
||||
pass
|
||||
|
||||
# handle loading the bounding box
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
bbox = tuple(bbox.to_crs(crs).total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
assert len(bbox) == 4
|
||||
# handle loading the mask
|
||||
elif isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
mask = mapping(mask.to_crs(crs).unary_union)
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = mapping(mask)
|
||||
|
||||
filters = {}
|
||||
if bbox is not None:
|
||||
filters["bbox"] = bbox
|
||||
if mask is not None:
|
||||
filters["mask"] = mask
|
||||
if where is not None:
|
||||
filters["where"] = where
|
||||
|
||||
# setup the data loading filter
|
||||
if rows is not None:
|
||||
if isinstance(rows, int):
|
||||
rows = slice(rows)
|
||||
elif not isinstance(rows, slice):
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
f_filt = features.filter(rows.start, rows.stop, rows.step, **filters)
|
||||
elif filters:
|
||||
f_filt = features.filter(**filters)
|
||||
else:
|
||||
f_filt = features
|
||||
# get list of columns
|
||||
columns = list(features.schema["properties"])
|
||||
datetime_fields = [
|
||||
k for (k, v) in features.schema["properties"].items() if v == "datetime"
|
||||
]
|
||||
if kwargs.get("ignore_geometry", False):
|
||||
df = pd.DataFrame(
|
||||
[record["properties"] for record in f_filt], columns=columns
|
||||
)
|
||||
else:
|
||||
df = GeoDataFrame.from_features(
|
||||
f_filt, crs=crs, columns=columns + ["geometry"]
|
||||
)
|
||||
for k in datetime_fields:
|
||||
as_dt = pd.to_datetime(df[k], errors="ignore")
|
||||
# if to_datetime failed, try again for mixed timezone offsets
|
||||
if as_dt.dtype == "object":
|
||||
# This can still fail if there are invalid datetimes
|
||||
as_dt = pd.to_datetime(df[k], errors="ignore", utc=True)
|
||||
# if to_datetime succeeded, round datetimes as
|
||||
# fiona only supports up to ms precision (any microseconds are
|
||||
# floating point rounding error)
|
||||
if not (as_dt.dtype == "object"):
|
||||
df[k] = as_dt.dt.round(freq="ms")
|
||||
return df
|
||||
|
||||
|
||||
def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if rows is not None:
|
||||
if isinstance(rows, int):
|
||||
kwargs["max_features"] = rows
|
||||
elif isinstance(rows, slice):
|
||||
if rows.start is not None:
|
||||
if rows.start < 0:
|
||||
raise ValueError(
|
||||
"Negative slice start not supported with the 'pyogrio' engine."
|
||||
)
|
||||
kwargs["skip_features"] = rows.start
|
||||
if rows.stop is not None:
|
||||
kwargs["max_features"] = rows.stop - (rows.start or 0)
|
||||
if rows.step is not None:
|
||||
raise ValueError("slice with step is not supported")
|
||||
else:
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
bbox = tuple(bbox.total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
if len(bbox) != 4:
|
||||
raise ValueError("'bbox' should be a length-4 tuple.")
|
||||
if mask is not None:
|
||||
raise ValueError(
|
||||
"The 'mask' keyword is not supported with the 'pyogrio' engine. "
|
||||
"You can use 'bbox' instead."
|
||||
)
|
||||
if kwargs.pop("ignore_geometry", False):
|
||||
kwargs["read_geometry"] = False
|
||||
|
||||
# TODO: if bbox is not None, check its CRS vs the CRS of the file
|
||||
return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
|
||||
|
||||
|
||||
def read_file(*args, **kwargs):
|
||||
warnings.warn(
|
||||
"geopandas.io.file.read_file() is intended for internal "
|
||||
"use only, and will be deprecated. Use geopandas.read_file() instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _read_file(*args, **kwargs)
|
||||
|
||||
|
||||
def to_file(*args, **kwargs):
|
||||
warnings.warn(
|
||||
"geopandas.io.file.to_file() is intended for internal "
|
||||
"use only, and will be deprecated. Use GeoDataFrame.to_file() "
|
||||
"or GeoSeries.to_file() instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _to_file(*args, **kwargs)
|
||||
|
||||
|
||||
def _detect_driver(path):
|
||||
"""
|
||||
Attempt to auto-detect driver based on the extension
|
||||
"""
|
||||
try:
|
||||
# in case the path is a file handle
|
||||
path = path.name
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
return _EXTENSION_TO_DRIVER[Path(path).suffix.lower()]
|
||||
except KeyError:
|
||||
# Assume it is a shapefile folder for now. In the future,
|
||||
# will likely raise an exception when the expected
|
||||
# folder writing behavior is more clearly defined.
|
||||
return "ESRI Shapefile"
|
||||
|
||||
|
||||
def _to_file(
|
||||
df,
|
||||
filename,
|
||||
driver=None,
|
||||
schema=None,
|
||||
index=None,
|
||||
mode="w",
|
||||
crs=None,
|
||||
engine=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write this GeoDataFrame to an OGR data source
|
||||
|
||||
A dictionary of supported OGR providers is available via:
|
||||
>>> import fiona
|
||||
>>> fiona.supported_drivers # doctest: +SKIP
|
||||
|
||||
.. note::
|
||||
|
||||
GeoPandas currently defaults to use Fiona as the engine in ``to_file``.
|
||||
However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
|
||||
pyogrio can provide a significant speedup compared to Fiona. We recommend to
|
||||
already install pyogrio and specify the engine by using the ``engine`` keyword
|
||||
(``df.to_file(..., engine="pyogrio")``), or by setting the default for
|
||||
the ``engine`` keyword globally with::
|
||||
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : GeoDataFrame to be written
|
||||
filename : string
|
||||
File path or file handle to write to. The path may specify a
|
||||
GDAL VSI scheme.
|
||||
driver : string, default None
|
||||
The OGR format driver used to write the vector file.
|
||||
If not specified, it attempts to infer it from the file extension.
|
||||
If no extension is specified, it saves ESRI Shapefile to a folder.
|
||||
schema : dict, default None
|
||||
If specified, the schema dictionary is passed to Fiona to
|
||||
better control how the file is written. If None, GeoPandas
|
||||
will determine the schema based on each column's dtype.
|
||||
Not supported for the "pyogrio" engine.
|
||||
index : bool, default None
|
||||
If True, write index into one or more columns (for MultiIndex).
|
||||
Default None writes the index into one or more columns only if
|
||||
the index is named, is a MultiIndex, or has a non-integer data
|
||||
type. If False, no index is written.
|
||||
|
||||
.. versionadded:: 0.7
|
||||
Previously the index was not written.
|
||||
mode : string, default 'w'
|
||||
The write mode, 'w' to overwrite the existing file and 'a' to append;
|
||||
when using the pyogrio engine, you can also pass ``append=True``.
|
||||
Not all drivers support appending. For the fiona engine, the drivers
|
||||
that support appending are listed in fiona.supported_drivers or
|
||||
https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py.
|
||||
For the pyogrio engine, you should be able to use any driver that
|
||||
is available in your installation of GDAL that supports append
|
||||
capability; see the specific driver entry at
|
||||
https://gdal.org/drivers/vector/index.html for more information.
|
||||
crs : pyproj.CRS, default None
|
||||
If specified, the CRS is passed to Fiona to
|
||||
better control how the file is written. If None, GeoPandas
|
||||
will determine the crs based on crs df attribute.
|
||||
The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
engine : str, "fiona" or "pyogrio"
|
||||
The underlying library that is used to write the file. Currently, the
|
||||
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
|
||||
installed, otherwise tries "pyogrio".
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
In case of the "fiona" engine, the keyword arguments are passed to
|
||||
fiona.open`. For more information on possible keywords, type:
|
||||
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
|
||||
the keyword arguments are passed to `pyogrio.write_dataframe`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The format drivers will attempt to detect the encoding of your data, but
|
||||
may fail. In this case, the proper encoding can be specified explicitly
|
||||
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
|
||||
"""
|
||||
engine = _check_engine(engine, "'to_file' method")
|
||||
|
||||
filename = _expand_user(filename)
|
||||
|
||||
if index is None:
|
||||
# Determine if index attribute(s) should be saved to file
|
||||
# (only if they are named or are non-integer)
|
||||
index = list(df.index.names) != [None] or not is_integer_dtype(df.index.dtype)
|
||||
if index:
|
||||
df = df.reset_index(drop=False)
|
||||
|
||||
if driver is None:
|
||||
driver = _detect_driver(filename)
|
||||
|
||||
if driver == "ESRI Shapefile" and any(len(c) > 10 for c in df.columns.tolist()):
|
||||
warnings.warn(
|
||||
"Column names longer than 10 characters will be truncated when saved to "
|
||||
"ESRI Shapefile.",
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
if (df.dtypes == "geometry").sum() > 1:
|
||||
raise ValueError(
|
||||
"GeoDataFrame contains multiple geometry columns but GeoDataFrame.to_file "
|
||||
"supports only a single geometry column. Use a GeoDataFrame.to_parquet or "
|
||||
"GeoDataFrame.to_feather, drop additional geometry columns or convert them "
|
||||
"to a supported format like a well-known text (WKT) using "
|
||||
"`GeoSeries.to_wkt()`.",
|
||||
)
|
||||
|
||||
if mode not in ("w", "a"):
|
||||
raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
|
||||
|
||||
if engine == "fiona":
|
||||
_to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs)
|
||||
elif engine == "pyogrio":
|
||||
_to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs)
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs):
|
||||
if schema is None:
|
||||
schema = infer_schema(df)
|
||||
|
||||
if crs:
|
||||
crs = pyproj.CRS.from_user_input(crs)
|
||||
else:
|
||||
crs = df.crs
|
||||
|
||||
with fiona_env():
|
||||
crs_wkt = None
|
||||
try:
|
||||
gdal_version = fiona.env.get_gdal_release_name()
|
||||
except AttributeError:
|
||||
gdal_version = "2.0.0" # just assume it is not the latest
|
||||
if Version(gdal_version) >= Version("3.0.0") and crs:
|
||||
crs_wkt = crs.to_wkt()
|
||||
elif crs:
|
||||
crs_wkt = crs.to_wkt("WKT1_GDAL")
|
||||
with fiona.open(
|
||||
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
|
||||
) as colxn:
|
||||
colxn.writerecords(df.iterfeatures())
|
||||
|
||||
|
||||
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if schema is not None:
|
||||
raise ValueError(
|
||||
"The 'schema' argument is not supported with the 'pyogrio' engine."
|
||||
)
|
||||
|
||||
if mode == "a":
|
||||
kwargs["append"] = True
|
||||
|
||||
if crs is not None:
|
||||
raise ValueError("Passing 'crs' it not supported with the 'pyogrio' engine.")
|
||||
|
||||
# for the fiona engine, this check is done in gdf.iterfeatures()
|
||||
if not df.columns.is_unique:
|
||||
raise ValueError("GeoDataFrame cannot contain duplicated column names.")
|
||||
|
||||
pyogrio.write_dataframe(df, filename, driver=driver, **kwargs)
|
||||
|
||||
|
||||
def infer_schema(df):
|
||||
from collections import OrderedDict
|
||||
|
||||
# TODO: test pandas string type and boolean type once released
|
||||
types = {
|
||||
"Int32": "int32",
|
||||
"int32": "int32",
|
||||
"Int64": "int",
|
||||
"string": "str",
|
||||
"boolean": "bool",
|
||||
}
|
||||
|
||||
def convert_type(column, in_type):
|
||||
if in_type == object:
|
||||
return "str"
|
||||
if in_type.name.startswith("datetime64"):
|
||||
# numpy datetime type regardless of frequency
|
||||
return "datetime"
|
||||
if str(in_type) in types:
|
||||
out_type = types[str(in_type)]
|
||||
else:
|
||||
out_type = type(np.zeros(1, in_type).item()).__name__
|
||||
if out_type == "long":
|
||||
out_type = "int"
|
||||
return out_type
|
||||
|
||||
properties = OrderedDict(
|
||||
[
|
||||
(col, convert_type(col, _type))
|
||||
for col, _type in zip(df.columns, df.dtypes)
|
||||
if col != df._geometry_column_name
|
||||
]
|
||||
)
|
||||
|
||||
if df.empty:
|
||||
warnings.warn(
|
||||
"You are attempting to write an empty DataFrame to file. "
|
||||
"For some drivers, this operation may fail.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
# Since https://github.com/Toblerity/Fiona/issues/446 resolution,
|
||||
# Fiona allows a list of geometry types
|
||||
geom_types = _geometry_types(df)
|
||||
|
||||
schema = {"geometry": geom_types, "properties": properties}
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
def _geometry_types(df):
|
||||
"""
|
||||
Determine the geometry types in the GeoDataFrame for the schema.
|
||||
"""
|
||||
geom_types_2D = df[~df.geometry.has_z].geometry.geom_type.unique()
|
||||
geom_types_2D = [gtype for gtype in geom_types_2D if gtype is not None]
|
||||
geom_types_3D = df[df.geometry.has_z].geometry.geom_type.unique()
|
||||
geom_types_3D = ["3D " + gtype for gtype in geom_types_3D if gtype is not None]
|
||||
geom_types = geom_types_3D + geom_types_2D
|
||||
|
||||
if len(geom_types) == 0:
|
||||
# Default geometry type supported by Fiona
|
||||
# (Since https://github.com/Toblerity/Fiona/issues/446 resolution)
|
||||
return "Unknown"
|
||||
|
||||
if len(geom_types) == 1:
|
||||
geom_types = geom_types[0]
|
||||
|
||||
return geom_types
|
||||
471
.venv/lib/python3.12/site-packages/geopandas/io/sql.py
Normal file
471
.venv/lib/python3.12/site-packages/geopandas/io/sql.py
Normal file
@@ -0,0 +1,471 @@
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
import shapely.wkb
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from geopandas import _compat as compat
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _get_conn(conn_or_engine):
|
||||
"""
|
||||
Yield a connection within a transaction context.
|
||||
|
||||
Engine.begin() returns a Connection with an implicit Transaction while
|
||||
Connection.begin() returns the Transaction. This helper will always return a
|
||||
Connection with an implicit (possibly nested) Transaction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
conn_or_engine : Connection or Engine
|
||||
A sqlalchemy Connection or Engine instance
|
||||
Returns
|
||||
-------
|
||||
Connection
|
||||
"""
|
||||
from sqlalchemy.engine.base import Engine, Connection
|
||||
|
||||
if isinstance(conn_or_engine, Connection):
|
||||
if not conn_or_engine.in_transaction():
|
||||
with conn_or_engine.begin():
|
||||
yield conn_or_engine
|
||||
else:
|
||||
yield conn_or_engine
|
||||
elif isinstance(conn_or_engine, Engine):
|
||||
with conn_or_engine.begin() as conn:
|
||||
yield conn
|
||||
else:
|
||||
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
|
||||
|
||||
|
||||
def _df_to_geodf(df, geom_col="geom", crs=None):
|
||||
"""
|
||||
Transforms a pandas DataFrame into a GeoDataFrame.
|
||||
The column 'geom_col' must be a geometry column in WKB representation.
|
||||
To be used to convert df based on pd.read_sql to gdf.
|
||||
Parameters
|
||||
----------
|
||||
df : DataFrame
|
||||
pandas DataFrame with geometry column in WKB representation.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : pyproj.CRS, optional
|
||||
CRS to use for the returned GeoDataFrame. The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
If not set, tries to determine CRS from the SRID associated with the
|
||||
first geometry in the database, and assigns that to all geometries.
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
"""
|
||||
|
||||
if geom_col not in df:
|
||||
raise ValueError("Query missing geometry column '{}'".format(geom_col))
|
||||
|
||||
if df.columns.to_list().count(geom_col) > 1:
|
||||
raise ValueError(
|
||||
f"Duplicate geometry column '{geom_col}' detected in SQL query output. Only"
|
||||
"one geometry column is allowed."
|
||||
)
|
||||
|
||||
geoms = df[geom_col].dropna()
|
||||
|
||||
if not geoms.empty:
|
||||
load_geom_bytes = shapely.wkb.loads
|
||||
"""Load from Python 3 binary."""
|
||||
|
||||
def load_geom_buffer(x):
|
||||
"""Load from Python 2 binary."""
|
||||
return shapely.wkb.loads(str(x))
|
||||
|
||||
def load_geom_text(x):
|
||||
"""Load from binary encoded as text."""
|
||||
return shapely.wkb.loads(str(x), hex=True)
|
||||
|
||||
if isinstance(geoms.iat[0], bytes):
|
||||
load_geom = load_geom_bytes
|
||||
else:
|
||||
load_geom = load_geom_text
|
||||
|
||||
df[geom_col] = geoms = geoms.apply(load_geom)
|
||||
if crs is None:
|
||||
if compat.SHAPELY_GE_20:
|
||||
srid = shapely.get_srid(geoms.iat[0])
|
||||
else:
|
||||
srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom)
|
||||
# if no defined SRID in geodatabase, returns SRID of 0
|
||||
if srid != 0:
|
||||
crs = "epsg:{}".format(srid)
|
||||
|
||||
return GeoDataFrame(df, crs=crs, geometry=geom_col)
|
||||
|
||||
|
||||
def _read_postgis(
|
||||
sql,
|
||||
con,
|
||||
geom_col="geom",
|
||||
crs=None,
|
||||
index_col=None,
|
||||
coerce_float=True,
|
||||
parse_dates=None,
|
||||
params=None,
|
||||
chunksize=None,
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame corresponding to the result of the query
|
||||
string, which must contain a geometry column in WKB representation.
|
||||
|
||||
It is also possible to use :meth:`~GeoDataFrame.read_file` to read from a database.
|
||||
Especially for file geodatabases like GeoPackage or SpatiaLite this can be easier.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sql : string
|
||||
SQL query to execute in selecting entries from database, or name
|
||||
of the table to read from the database.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : dict or str, optional
|
||||
CRS to use for the returned GeoDataFrame; if not set, tries to
|
||||
determine CRS from the SRID associated with the first geometry in
|
||||
the database, and assigns that to all geometries.
|
||||
chunksize : int, default None
|
||||
If specified, return an iterator where chunksize is the number of rows to
|
||||
include in each chunk.
|
||||
|
||||
See the documentation for pandas.read_sql for further explanation
|
||||
of the following parameters:
|
||||
index_col, coerce_float, parse_dates, params, chunksize
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
PostGIS
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydatabase"
|
||||
>>> con = create_engine(db_connection_url) # doctest: +SKIP
|
||||
>>> sql = "SELECT geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
|
||||
SpatiaLite
|
||||
|
||||
>>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if chunksize is None:
|
||||
# read all in one chunk and return a single GeoDataFrame
|
||||
df = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return _df_to_geodf(df, geom_col=geom_col, crs=crs)
|
||||
|
||||
else:
|
||||
# read data in chunks and return a generator
|
||||
df_generator = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return (_df_to_geodf(df, geom_col=geom_col, crs=crs) for df in df_generator)
|
||||
|
||||
|
||||
def read_postgis(*args, **kwargs):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"geopandas.io.sql.read_postgis() is intended for internal "
|
||||
"use only, and will be deprecated. Use geopandas.read_postgis() instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _read_postgis(*args, **kwargs)
|
||||
|
||||
|
||||
def _get_geometry_type(gdf):
|
||||
"""
|
||||
Get basic geometry type of a GeoDataFrame. See more info from:
|
||||
https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType
|
||||
|
||||
Following rules apply:
|
||||
- if geometries all share the same geometry-type,
|
||||
geometries are inserted with the given GeometryType with following types:
|
||||
- Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
|
||||
GeometryCollection.
|
||||
- LinearRing geometries will be converted into LineString -objects.
|
||||
- in all other cases, geometries will be inserted with type GEOMETRY:
|
||||
- a mix of Polygons and MultiPolygons in GeoSeries
|
||||
- a mix of Points and LineStrings in GeoSeries
|
||||
- geometry is of type GeometryCollection,
|
||||
such as GeometryCollection([Point, LineStrings])
|
||||
- if any of the geometries has Z-coordinate, all records will
|
||||
be written with 3D.
|
||||
"""
|
||||
geom_types = list(gdf.geometry.geom_type.unique())
|
||||
has_curve = False
|
||||
|
||||
for gt in geom_types:
|
||||
if gt is None:
|
||||
continue
|
||||
elif "LinearRing" in gt:
|
||||
has_curve = True
|
||||
|
||||
if len(geom_types) == 1:
|
||||
if has_curve:
|
||||
target_geom_type = "LINESTRING"
|
||||
else:
|
||||
if geom_types[0] is None:
|
||||
raise ValueError("No valid geometries in the data.")
|
||||
else:
|
||||
target_geom_type = geom_types[0].upper()
|
||||
else:
|
||||
target_geom_type = "GEOMETRY"
|
||||
|
||||
# Check for 3D-coordinates
|
||||
if any(gdf.geometry.has_z):
|
||||
target_geom_type += "Z"
|
||||
|
||||
return target_geom_type, has_curve
|
||||
|
||||
|
||||
def _get_srid_from_crs(gdf):
|
||||
"""
|
||||
Get EPSG code from CRS if available. If not, return -1.
|
||||
"""
|
||||
|
||||
# Use geoalchemy2 default for srid
|
||||
# Note: undefined srid in PostGIS is 0
|
||||
srid = None
|
||||
warning_msg = (
|
||||
"Could not parse CRS from the GeoDataFrame. "
|
||||
"Inserting data without defined CRS."
|
||||
)
|
||||
if gdf.crs is not None:
|
||||
try:
|
||||
for confidence in (100, 70, 25):
|
||||
srid = gdf.crs.to_epsg(min_confidence=confidence)
|
||||
if srid is not None:
|
||||
break
|
||||
auth_srid = gdf.crs.to_authority(
|
||||
auth_name="ESRI", min_confidence=confidence
|
||||
)
|
||||
if auth_srid is not None:
|
||||
srid = int(auth_srid[1])
|
||||
break
|
||||
except Exception:
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
if srid is None:
|
||||
srid = -1
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
return srid
|
||||
|
||||
|
||||
def _convert_linearring_to_linestring(gdf, geom_name):
|
||||
from shapely.geometry import LineString
|
||||
|
||||
# Todo: Use Pygeos function once it's implemented:
|
||||
# https://github.com/pygeos/pygeos/issues/76
|
||||
|
||||
mask = gdf.geom_type == "LinearRing"
|
||||
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
|
||||
lambda geom: LineString(geom)
|
||||
)
|
||||
return gdf
|
||||
|
||||
|
||||
def _convert_to_ewkb(gdf, geom_name, srid):
|
||||
"""Convert geometries to ewkb."""
|
||||
if compat.USE_SHAPELY_20:
|
||||
geoms = shapely.to_wkb(
|
||||
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
elif compat.USE_PYGEOS:
|
||||
from pygeos import set_srid, to_wkb
|
||||
|
||||
geoms = to_wkb(
|
||||
set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
else:
|
||||
from shapely.wkb import dumps
|
||||
|
||||
geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]]
|
||||
|
||||
# The gdf will warn that the geometry column doesn't hold in-memory geometries
|
||||
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
|
||||
# the user that the dtypes are unexpected.
|
||||
df = pd.DataFrame(gdf, copy=False)
|
||||
df[geom_name] = geoms
|
||||
return df
|
||||
|
||||
|
||||
def _psql_insert_copy(tbl, conn, keys, data_iter):
|
||||
import io
|
||||
import csv
|
||||
|
||||
s_buf = io.StringIO()
|
||||
writer = csv.writer(s_buf)
|
||||
writer.writerows(data_iter)
|
||||
s_buf.seek(0)
|
||||
|
||||
columns = ", ".join('"{}"'.format(k) for k in keys)
|
||||
|
||||
dbapi_conn = conn.connection
|
||||
with dbapi_conn.cursor() as cur:
|
||||
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
|
||||
tbl.table.schema, tbl.table.name, columns
|
||||
)
|
||||
cur.copy_expert(sql=sql, file=s_buf)
|
||||
|
||||
|
||||
def _write_postgis(
|
||||
gdf,
|
||||
name,
|
||||
con,
|
||||
schema=None,
|
||||
if_exists="fail",
|
||||
index=False,
|
||||
index_label=None,
|
||||
chunksize=None,
|
||||
dtype=None,
|
||||
):
|
||||
"""
|
||||
Upload GeoDataFrame into PostGIS database.
|
||||
|
||||
This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
|
||||
Python driver (e.g. psycopg2) to be installed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of the target table.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the PostGIS database.
|
||||
if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
||||
How to behave if the table already exists:
|
||||
|
||||
- fail: Raise a ValueError.
|
||||
- replace: Drop the table before inserting new values.
|
||||
- append: Insert new values to the existing table.
|
||||
schema : string, optional
|
||||
Specify the schema. If None, use default schema: 'public'.
|
||||
index : bool, default True
|
||||
Write DataFrame index as a column.
|
||||
Uses *index_label* as the column name in the table.
|
||||
index_label : string or sequence, default None
|
||||
Column label for index column(s).
|
||||
If None is given (default) and index is True,
|
||||
then the index names are used.
|
||||
chunksize : int, optional
|
||||
Rows will be written in batches of this size at a time.
|
||||
By default, all rows will be written at once.
|
||||
dtype : dict of column name to SQL type, default None
|
||||
Specifying the datatype for columns.
|
||||
The keys should be the column names and the values
|
||||
should be the SQLAlchemy types.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
|
||||
/mydatabase";) # doctest: +SKIP
|
||||
>>> gdf.to_postgis("my_table", engine) # doctest: +SKIP
|
||||
"""
|
||||
try:
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
raise ImportError("'to_postgis()' requires geoalchemy2 package.")
|
||||
|
||||
gdf = gdf.copy()
|
||||
geom_name = gdf.geometry.name
|
||||
|
||||
# Get srid
|
||||
srid = _get_srid_from_crs(gdf)
|
||||
|
||||
# Get geometry type and info whether data contains LinearRing.
|
||||
geometry_type, has_curve = _get_geometry_type(gdf)
|
||||
|
||||
# Build dtype with Geometry
|
||||
if dtype is not None:
|
||||
dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid)
|
||||
else:
|
||||
dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)}
|
||||
|
||||
# Convert LinearRing geometries to LineString
|
||||
if has_curve:
|
||||
gdf = _convert_linearring_to_linestring(gdf, geom_name)
|
||||
|
||||
# Convert geometries to EWKB
|
||||
gdf = _convert_to_ewkb(gdf, geom_name, srid)
|
||||
|
||||
if schema is not None:
|
||||
schema_name = schema
|
||||
else:
|
||||
schema_name = "public"
|
||||
|
||||
if if_exists == "append":
|
||||
# Check that the geometry srid matches with the current GeoDataFrame
|
||||
with _get_conn(con) as connection:
|
||||
# Only check SRID if table exists
|
||||
if connection.dialect.has_table(connection, name, schema):
|
||||
target_srid = connection.execute(
|
||||
text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema=schema_name, table=name, geom_col=geom_name
|
||||
)
|
||||
)
|
||||
).fetchone()[0]
|
||||
|
||||
if target_srid != srid:
|
||||
msg = (
|
||||
"The CRS of the target table (EPSG:{epsg_t}) differs from the "
|
||||
"CRS of current GeoDataFrame (EPSG:{epsg_src}).".format(
|
||||
epsg_t=target_srid, epsg_src=srid
|
||||
)
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
with _get_conn(con) as connection:
|
||||
gdf.to_sql(
|
||||
name,
|
||||
connection,
|
||||
schema=schema_name,
|
||||
if_exists=if_exists,
|
||||
index=index,
|
||||
index_label=index_label,
|
||||
chunksize=chunksize,
|
||||
dtype=dtype,
|
||||
method=_psql_insert_copy,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Script to create the data and write legacy storage (pickle) files.
|
||||
|
||||
Based on pandas' generate_legacy_storage_files.py script.
|
||||
|
||||
To use this script, create an environment for which you want to
|
||||
generate pickles, activate the environment, and run this script as:
|
||||
|
||||
$ python geopandas/geopandas/io/tests/generate_legacy_storage_files.py \
|
||||
geopandas/geopandas/io/tests/data/pickle/ pickle
|
||||
|
||||
This script generates a storage file for the current arch, system,
|
||||
|
||||
The idea here is you are using the *current* version of the
|
||||
generate_legacy_storage_files with an *older* version of geopandas to
|
||||
generate a pickle file. We will then check this file into a current
|
||||
branch, and test using test_pickle.py. This will load the *older*
|
||||
pickles and test versus the current data that is generated
|
||||
(with master). These are then compared.
|
||||
|
||||
"""
|
||||
import os
|
||||
import pickle
|
||||
import platform
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from shapely.geometry import Point
|
||||
|
||||
|
||||
def create_pickle_data():
|
||||
"""create the pickle data"""
|
||||
|
||||
# custom geometry column name
|
||||
gdf_the_geom = geopandas.GeoDataFrame(
|
||||
{"a": [1, 2, 3], "the_geom": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
geometry="the_geom",
|
||||
)
|
||||
|
||||
# with crs
|
||||
gdf_crs = geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
return {"gdf_the_geom": gdf_the_geom, "gdf_crs": gdf_crs}
|
||||
|
||||
|
||||
def platform_name():
|
||||
return "_".join(
|
||||
[
|
||||
str(geopandas.__version__),
|
||||
"pd-" + str(pd.__version__),
|
||||
"py-" + str(platform.python_version()),
|
||||
str(platform.machine()),
|
||||
str(platform.system().lower()),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def write_legacy_pickles(output_dir):
|
||||
print(
|
||||
"This script generates a storage file for the current arch, system, "
|
||||
"and python version"
|
||||
)
|
||||
print("geopandas version: {}").format(geopandas.__version__)
|
||||
print(" output dir : {}".format(output_dir))
|
||||
print(" storage format: pickle")
|
||||
|
||||
pth = "{}.pickle".format(platform_name())
|
||||
|
||||
fh = open(os.path.join(output_dir, pth), "wb")
|
||||
pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
|
||||
fh.close()
|
||||
|
||||
print("created pickle file: {}".format(pth))
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
sys.exit(
|
||||
"Specify output directory and storage type: generate_legacy_"
|
||||
"storage_files.py <output_dir> <storage_type> "
|
||||
)
|
||||
|
||||
output_dir = str(sys.argv[1])
|
||||
storage_type = str(sys.argv[2])
|
||||
|
||||
if storage_type == "pickle":
|
||||
write_legacy_pickles(output_dir=output_dir)
|
||||
else:
|
||||
sys.exit("storage_type must be one of {'pickle'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,914 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from itertools import product
|
||||
import json
|
||||
from packaging.version import Version
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
from pandas import DataFrame, read_parquet as pd_read_parquet
|
||||
from pandas.testing import assert_frame_equal
|
||||
import numpy as np
|
||||
import pyproj
|
||||
from shapely.geometry import box, Point, MultiPolygon
|
||||
|
||||
|
||||
import geopandas
|
||||
import geopandas._compat as compat
|
||||
from geopandas import GeoDataFrame, read_file, read_parquet, read_feather
|
||||
from geopandas.array import to_wkb
|
||||
from geopandas.datasets import get_path
|
||||
from geopandas.io.arrow import (
|
||||
SUPPORTED_VERSIONS,
|
||||
_create_metadata,
|
||||
_decode_metadata,
|
||||
_encode_metadata,
|
||||
_geopandas_to_arrow,
|
||||
_get_filesystem_path,
|
||||
_remove_id_from_member_of_ensembles,
|
||||
_validate_dataframe,
|
||||
_validate_metadata,
|
||||
METADATA_VERSION,
|
||||
)
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from geopandas.tests.util import mock
|
||||
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
# Skip all tests in this module if pyarrow is not available
|
||||
pyarrow = pytest.importorskip("pyarrow")
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"parquet",
|
||||
pytest.param(
|
||||
"feather",
|
||||
marks=pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) < Version("0.17.0"),
|
||||
reason="needs pyarrow >= 0.17",
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
def file_format(request):
|
||||
if request.param == "parquet":
|
||||
return read_parquet, GeoDataFrame.to_parquet
|
||||
elif request.param == "feather":
|
||||
return read_feather, GeoDataFrame.to_feather
|
||||
|
||||
|
||||
def test_create_metadata():
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
metadata = _create_metadata(df)
|
||||
|
||||
assert isinstance(metadata, dict)
|
||||
assert metadata["version"] == METADATA_VERSION
|
||||
assert metadata["primary_column"] == "geometry"
|
||||
assert "geometry" in metadata["columns"]
|
||||
crs_expected = df.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs_expected)
|
||||
assert metadata["columns"]["geometry"]["crs"] == crs_expected
|
||||
assert metadata["columns"]["geometry"]["encoding"] == "WKB"
|
||||
assert metadata["columns"]["geometry"]["geometry_types"] == [
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
]
|
||||
|
||||
assert np.array_equal(
|
||||
metadata["columns"]["geometry"]["bbox"], df.geometry.total_bounds
|
||||
)
|
||||
|
||||
assert metadata["creator"]["library"] == "geopandas"
|
||||
assert metadata["creator"]["version"] == geopandas.__version__
|
||||
|
||||
|
||||
def test_crs_metadata_datum_ensemble():
|
||||
# compatibility for older PROJ versions using PROJJSON with datum ensembles
|
||||
# https://github.com/geopandas/geopandas/pull/2453
|
||||
crs = pyproj.CRS("EPSG:4326")
|
||||
crs_json = crs.to_json_dict()
|
||||
check_ensemble = False
|
||||
if "datum_ensemble" in crs_json:
|
||||
# older version of PROJ don't yet have datum ensembles
|
||||
check_ensemble = True
|
||||
assert "id" in crs_json["datum_ensemble"]["members"][0]
|
||||
_remove_id_from_member_of_ensembles(crs_json)
|
||||
if check_ensemble:
|
||||
assert "id" not in crs_json["datum_ensemble"]["members"][0]
|
||||
# ensure roundtrip still results in an equivalent CRS
|
||||
assert pyproj.CRS(crs_json) == crs
|
||||
|
||||
|
||||
def test_write_metadata_invalid_spec_version():
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326")
|
||||
with pytest.raises(ValueError, match="schema_version must be one of"):
|
||||
_create_metadata(gdf, schema_version="invalid")
|
||||
|
||||
|
||||
def test_encode_metadata():
|
||||
metadata = {"a": "b"}
|
||||
|
||||
expected = b'{"a": "b"}'
|
||||
assert _encode_metadata(metadata) == expected
|
||||
|
||||
|
||||
def test_decode_metadata():
|
||||
metadata_str = b'{"a": "b"}'
|
||||
|
||||
expected = {"a": "b"}
|
||||
assert _decode_metadata(metadata_str) == expected
|
||||
|
||||
assert _decode_metadata(None) is None
|
||||
|
||||
|
||||
def test_validate_dataframe():
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
# valid: should not raise ValueError
|
||||
_validate_dataframe(df)
|
||||
_validate_dataframe(df.set_index("iso_a3"))
|
||||
|
||||
# add column with non-string type
|
||||
df[0] = 1
|
||||
|
||||
# invalid: should raise ValueError
|
||||
with pytest.raises(ValueError):
|
||||
_validate_dataframe(df)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
_validate_dataframe(df.set_index(0))
|
||||
|
||||
# not a DataFrame: should raise ValueError
|
||||
with pytest.raises(ValueError):
|
||||
_validate_dataframe("not a dataframe")
|
||||
|
||||
|
||||
def test_validate_metadata_valid():
|
||||
_validate_metadata(
|
||||
{
|
||||
"primary_column": "geometry",
|
||||
"columns": {"geometry": {"crs": None, "encoding": "WKB"}},
|
||||
"schema_version": "0.1.0",
|
||||
}
|
||||
)
|
||||
|
||||
_validate_metadata(
|
||||
{
|
||||
"primary_column": "geometry",
|
||||
"columns": {"geometry": {"crs": None, "encoding": "WKB"}},
|
||||
"version": "<version>",
|
||||
}
|
||||
)
|
||||
|
||||
_validate_metadata(
|
||||
{
|
||||
"primary_column": "geometry",
|
||||
"columns": {
|
||||
"geometry": {
|
||||
"crs": {
|
||||
# truncated PROJJSON for testing, as PROJJSON contents
|
||||
# not validated here
|
||||
"id": {"authority": "EPSG", "code": 4326},
|
||||
},
|
||||
"encoding": "WKB",
|
||||
}
|
||||
},
|
||||
"version": "0.4.0",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metadata,error",
|
||||
[
|
||||
(None, "Missing or malformed geo metadata in Parquet/Feather file"),
|
||||
({}, "Missing or malformed geo metadata in Parquet/Feather file"),
|
||||
# missing "version" key:
|
||||
(
|
||||
{"primary_column": "foo", "columns": None},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key",
|
||||
),
|
||||
# missing "columns" key:
|
||||
(
|
||||
{"primary_column": "foo", "version": "<version>"},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key:",
|
||||
),
|
||||
# missing "primary_column"
|
||||
(
|
||||
{"columns": [], "version": "<version>"},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key:",
|
||||
),
|
||||
(
|
||||
{"primary_column": "foo", "columns": [], "version": "<version>"},
|
||||
"'columns' in 'geo' metadata must be a dict",
|
||||
),
|
||||
# missing "encoding" for column
|
||||
(
|
||||
{"primary_column": "foo", "columns": {"foo": {}}, "version": "<version>"},
|
||||
(
|
||||
"'geo' metadata in Parquet/Feather file is missing required key "
|
||||
"'encoding' for column 'foo'"
|
||||
),
|
||||
),
|
||||
# invalid column encoding
|
||||
(
|
||||
{
|
||||
"primary_column": "foo",
|
||||
"columns": {"foo": {"crs": None, "encoding": None}},
|
||||
"version": "<version>",
|
||||
},
|
||||
"Only WKB geometry encoding is supported",
|
||||
),
|
||||
(
|
||||
{
|
||||
"primary_column": "foo",
|
||||
"columns": {"foo": {"crs": None, "encoding": "BKW"}},
|
||||
"version": "<version>",
|
||||
},
|
||||
"Only WKB geometry encoding is supported",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_validate_metadata_invalid(metadata, error):
|
||||
with pytest.raises(ValueError, match=error):
|
||||
_validate_metadata(metadata)
|
||||
|
||||
|
||||
def test_validate_metadata_edges():
|
||||
metadata = {
|
||||
"primary_column": "geometry",
|
||||
"columns": {"geometry": {"crs": None, "encoding": "WKB", "edges": "spherical"}},
|
||||
"version": "1.0.0-beta.1",
|
||||
}
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match="The geo metadata indicate that column 'geometry' has spherical edges",
|
||||
):
|
||||
_validate_metadata(metadata)
|
||||
|
||||
|
||||
def test_to_parquet_fails_on_invalid_engine(tmpdir):
|
||||
df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)])
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=(
|
||||
"GeoPandas only supports using pyarrow as the engine for "
|
||||
"to_parquet: 'fastparquet' passed instead."
|
||||
),
|
||||
):
|
||||
df.to_parquet(tmpdir / "test.parquet", engine="fastparquet")
|
||||
|
||||
|
||||
@mock.patch("geopandas.io.arrow._to_parquet")
|
||||
def test_to_parquet_does_not_pass_engine_along(mock_to_parquet):
|
||||
df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)])
|
||||
df.to_parquet("", engine="pyarrow")
|
||||
# assert that engine keyword is not passed through to _to_parquet (and thus
|
||||
# parquet.write_table)
|
||||
mock_to_parquet.assert_called_with(
|
||||
df, "", compression="snappy", index=None, schema_version=None
|
||||
)
|
||||
|
||||
|
||||
# TEMPORARY: used to determine if pyarrow fails for roundtripping pandas data
|
||||
# without geometries
|
||||
def test_pandas_parquet_roundtrip1(tmpdir):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
df.to_parquet(filename)
|
||||
|
||||
pq_df = pd_read_parquet(filename)
|
||||
|
||||
assert_frame_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"]
|
||||
)
|
||||
def test_pandas_parquet_roundtrip2(test_dataset, tmpdir):
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = DataFrame(read_file(get_path(test_dataset)).drop(columns=["geometry"]))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
df.to_parquet(filename)
|
||||
|
||||
pq_df = pd_read_parquet(filename)
|
||||
|
||||
assert_frame_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_dataset", ["naturalearth_lowres", "naturalearth_cities", "nybb"]
|
||||
)
|
||||
def test_roundtrip(tmpdir, file_format, test_dataset):
|
||||
"""Writing to parquet should not raise errors, and should not alter original
|
||||
GeoDataFrame
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
df = read_file(get_path(test_dataset))
|
||||
orig = df.copy()
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
|
||||
writer(df, filename)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
|
||||
# make sure that the original data frame is unaltered
|
||||
assert_geodataframe_equal(df, orig)
|
||||
|
||||
# make sure that we can roundtrip the data frame
|
||||
pq_df = reader(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
|
||||
def test_index(tmpdir, file_format):
|
||||
"""Setting index=`True` should preserve index in output, and
|
||||
setting index=`False` should drop index from output.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset)).set_index("iso_a3")
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test_with_index.pq")
|
||||
writer(df, filename, index=True)
|
||||
pq_df = reader(filename)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "drop_index.pq")
|
||||
writer(df, filename, index=False)
|
||||
pq_df = reader(filename)
|
||||
assert_geodataframe_equal(df.reset_index(drop=True), pq_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("compression", ["snappy", "gzip", "brotli", None])
|
||||
def test_parquet_compression(compression, tmpdir):
|
||||
"""Using compression options should not raise errors, and should
|
||||
return identical GeoDataFrame.
|
||||
"""
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
df.to_parquet(filename, compression=compression)
|
||||
pq_df = read_parquet(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) < Version("0.17.0"),
|
||||
reason="Feather only supported for pyarrow >= 0.17",
|
||||
)
|
||||
@pytest.mark.parametrize("compression", ["uncompressed", "lz4", "zstd"])
|
||||
def test_feather_compression(compression, tmpdir):
|
||||
"""Using compression options should not raise errors, and should
|
||||
return identical GeoDataFrame.
|
||||
"""
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.feather")
|
||||
df.to_feather(filename, compression=compression)
|
||||
pq_df = read_feather(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
|
||||
def test_parquet_multiple_geom_cols(tmpdir, file_format):
|
||||
"""If multiple geometry columns are present when written to parquet,
|
||||
they should all be returned as such when read from parquet.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
df["geom2"] = df.geometry.copy()
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
|
||||
pq_df = reader(filename)
|
||||
|
||||
assert isinstance(pq_df, GeoDataFrame)
|
||||
assert_geodataframe_equal(df, pq_df)
|
||||
|
||||
assert_geoseries_equal(df.geom2, pq_df.geom2, check_geom_type=True)
|
||||
|
||||
|
||||
def test_parquet_missing_metadata(tmpdir):
|
||||
"""Missing geo metadata, such as from a parquet file created
|
||||
from a pandas DataFrame, will raise a ValueError.
|
||||
"""
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
# convert to DataFrame
|
||||
df = DataFrame(df)
|
||||
|
||||
# convert the geometry column so we can extract later
|
||||
df["geometry"] = to_wkb(df["geometry"].values)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
|
||||
# use pandas to_parquet (no geo metadata)
|
||||
df.to_parquet(filename)
|
||||
|
||||
# missing metadata will raise ValueError
|
||||
with pytest.raises(
|
||||
ValueError, match="Missing geo metadata in Parquet/Feather file."
|
||||
):
|
||||
read_parquet(filename)
|
||||
|
||||
|
||||
def test_parquet_missing_metadata2(tmpdir):
|
||||
"""Missing geo metadata, such as from a parquet file created
|
||||
from a pyarrow Table (which will also not contain pandas metadata),
|
||||
will raise a ValueError.
|
||||
"""
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
table = pyarrow.table({"a": [1, 2, 3]})
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
|
||||
# use pyarrow.parquet write_table (no geo metadata, but also no pandas metadata)
|
||||
pq.write_table(table, filename)
|
||||
|
||||
# missing metadata will raise ValueError
|
||||
with pytest.raises(
|
||||
ValueError, match="Missing geo metadata in Parquet/Feather file."
|
||||
):
|
||||
read_parquet(filename)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"geo_meta,error",
|
||||
[
|
||||
({"geo": b""}, "Missing or malformed geo metadata in Parquet/Feather file"),
|
||||
(
|
||||
{"geo": _encode_metadata({})},
|
||||
"Missing or malformed geo metadata in Parquet/Feather file",
|
||||
),
|
||||
(
|
||||
{"geo": _encode_metadata({"foo": "bar"})},
|
||||
"'geo' metadata in Parquet/Feather file is missing required key",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parquet_invalid_metadata(tmpdir, geo_meta, error):
|
||||
"""Has geo metadata with missing required fields will raise a ValueError.
|
||||
|
||||
This requires writing the parquet file directly below, so that we can
|
||||
control the metadata that is written for this test.
|
||||
"""
|
||||
|
||||
from pyarrow import parquet, Table
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
# convert to DataFrame and encode geometry to WKB
|
||||
df = DataFrame(df)
|
||||
df["geometry"] = to_wkb(df["geometry"].values)
|
||||
|
||||
table = Table.from_pandas(df)
|
||||
metadata = table.schema.metadata
|
||||
metadata.update(geo_meta)
|
||||
table = table.replace_schema_metadata(metadata)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
parquet.write_table(table, filename)
|
||||
|
||||
with pytest.raises(ValueError, match=error):
|
||||
read_parquet(filename)
|
||||
|
||||
|
||||
def test_subset_columns(tmpdir, file_format):
|
||||
"""Reading a subset of columns should correctly decode selected geometry
|
||||
columns.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
pq_df = reader(filename, columns=["name", "geometry"])
|
||||
|
||||
assert_geodataframe_equal(df[["name", "geometry"]], pq_df)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="No geometry columns are included in the columns read"
|
||||
):
|
||||
reader(filename, columns=["name"])
|
||||
|
||||
|
||||
def test_promote_secondary_geometry(tmpdir, file_format):
|
||||
"""Reading a subset of columns that does not include the primary geometry
|
||||
column should promote the first geometry column present.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
df["geom2"] = df.geometry.copy()
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
pq_df = reader(filename, columns=["name", "geom2"])
|
||||
|
||||
assert_geodataframe_equal(df.set_geometry("geom2")[["name", "geom2"]], pq_df)
|
||||
|
||||
df["geom3"] = df.geometry.copy()
|
||||
|
||||
writer(df, filename)
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match="Multiple non-primary geometry columns read from Parquet/Feather file.",
|
||||
):
|
||||
pq_df = reader(filename, columns=["name", "geom2", "geom3"])
|
||||
|
||||
assert_geodataframe_equal(
|
||||
df.set_geometry("geom2")[["name", "geom2", "geom3"]], pq_df
|
||||
)
|
||||
|
||||
|
||||
def test_columns_no_geometry(tmpdir, file_format):
|
||||
"""Reading a parquet file that is missing all of the geometry columns
|
||||
should raise a ValueError"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
reader(filename, columns=["name"])
|
||||
|
||||
|
||||
def test_missing_crs(tmpdir, file_format):
|
||||
"""If CRS is `None`, it should be properly handled
|
||||
and remain `None` when read from parquet`.
|
||||
"""
|
||||
reader, writer = file_format
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
|
||||
df = read_file(get_path(test_dataset))
|
||||
df.crs = None
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.pq")
|
||||
writer(df, filename)
|
||||
pq_df = reader(filename)
|
||||
|
||||
assert pq_df.crs is None
|
||||
|
||||
assert_geodataframe_equal(df, pq_df, check_crs=True)
|
||||
|
||||
|
||||
def test_default_geo_col_writes(tmp_path):
|
||||
# edge case geo col name None writes successfully
|
||||
df = GeoDataFrame({"a": [1, 2]})
|
||||
df.to_parquet(tmp_path / "test.pq")
|
||||
# cannot be round tripped as gdf due to invalid geom col
|
||||
pq_df = pd_read_parquet(tmp_path / "test.pq")
|
||||
assert_frame_equal(df, pq_df)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) >= Version("0.17.0"),
|
||||
reason="Feather only supported for pyarrow >= 0.17",
|
||||
)
|
||||
def test_feather_arrow_version(tmpdir):
|
||||
df = read_file(get_path("naturalearth_lowres"))
|
||||
filename = os.path.join(str(tmpdir), "test.feather")
|
||||
|
||||
with pytest.raises(
|
||||
ImportError, match="pyarrow >= 0.17 required for Feather support"
|
||||
):
|
||||
df.to_feather(filename)
|
||||
|
||||
|
||||
def test_fsspec_url():
|
||||
fsspec = pytest.importorskip("fsspec")
|
||||
import fsspec.implementations.memory
|
||||
|
||||
class MyMemoryFileSystem(fsspec.implementations.memory.MemoryFileSystem):
|
||||
# Simple fsspec filesystem that adds a required keyword.
|
||||
# Attempting to use this filesystem without the keyword will raise an exception.
|
||||
def __init__(self, is_set, *args, **kwargs):
|
||||
self.is_set = is_set
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
fsspec.register_implementation("memory", MyMemoryFileSystem, clobber=True)
|
||||
memfs = MyMemoryFileSystem(is_set=True)
|
||||
|
||||
test_dataset = "naturalearth_lowres"
|
||||
df = read_file(get_path(test_dataset))
|
||||
|
||||
with memfs.open("data.parquet", "wb") as f:
|
||||
df.to_parquet(f)
|
||||
|
||||
result = read_parquet("memory://data.parquet", storage_options={"is_set": True})
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
result = read_parquet("memory://data.parquet", filesystem=memfs)
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
# reset fsspec registry
|
||||
fsspec.register_implementation(
|
||||
"memory", fsspec.implementations.memory.MemoryFileSystem, clobber=True
|
||||
)
|
||||
|
||||
|
||||
def test_non_fsspec_url_with_storage_options_raises():
|
||||
with pytest.raises(ValueError, match="storage_options"):
|
||||
test_dataset = "naturalearth_lowres"
|
||||
read_parquet(get_path(test_dataset), storage_options={"foo": "bar"})
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(pyarrow.__version__) < Version("5.0.0"),
|
||||
reason="pyarrow.fs requires pyarrow>=5.0.0",
|
||||
)
|
||||
def test_prefers_pyarrow_fs():
|
||||
filesystem, _ = _get_filesystem_path("file:///data.parquet")
|
||||
assert isinstance(filesystem, pyarrow.fs.LocalFileSystem)
|
||||
|
||||
|
||||
def test_write_read_parquet_expand_user():
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
test_file = "~/test_file.parquet"
|
||||
gdf.to_parquet(test_file)
|
||||
pq_df = geopandas.read_parquet(test_file)
|
||||
assert_geodataframe_equal(gdf, pq_df, check_crs=True)
|
||||
os.remove(os.path.expanduser(test_file))
|
||||
|
||||
|
||||
def test_write_read_feather_expand_user():
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
|
||||
test_file = "~/test_file.feather"
|
||||
gdf.to_feather(test_file)
|
||||
f_df = geopandas.read_feather(test_file)
|
||||
assert_geodataframe_equal(gdf, f_df, check_crs=True)
|
||||
os.remove(os.path.expanduser(test_file))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("geometry", [[], [None]])
|
||||
def test_write_empty_bbox(tmpdir, geometry):
|
||||
# empty dataframe or all missing geometries -> avoid bbox with NaNs
|
||||
gdf = geopandas.GeoDataFrame({"col": [1] * len(geometry)}, geometry=geometry)
|
||||
gdf.to_parquet(tmpdir / "test.parquet")
|
||||
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
table = read_table(tmpdir / "test.parquet")
|
||||
metadata = json.loads(table.schema.metadata[b"geo"])
|
||||
assert "encoding" in metadata["columns"]["geometry"]
|
||||
assert "bbox" not in metadata["columns"]["geometry"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["feather", "parquet"])
|
||||
def test_write_read_default_crs(tmpdir, format):
|
||||
if format == "feather":
|
||||
from pyarrow.feather import write_feather as write
|
||||
else:
|
||||
from pyarrow.parquet import write_table as write
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test.{format}")
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)])
|
||||
table = _geopandas_to_arrow(gdf)
|
||||
|
||||
# update the geo metadata to strip 'crs' entry
|
||||
metadata = table.schema.metadata
|
||||
geo_metadata = _decode_metadata(metadata[b"geo"])
|
||||
del geo_metadata["columns"]["geometry"]["crs"]
|
||||
metadata.update({b"geo": _encode_metadata(geo_metadata)})
|
||||
table = table.replace_schema_metadata(metadata)
|
||||
|
||||
write(table, filename)
|
||||
|
||||
read = getattr(geopandas, f"read_{format}")
|
||||
df = read(filename)
|
||||
assert df.crs.equals(pyproj.CRS("OGC:CRS84"))
|
||||
|
||||
|
||||
def test_write_iso_wkb(tmpdir):
|
||||
gdf = geopandas.GeoDataFrame(
|
||||
geometry=geopandas.GeoSeries.from_wkt(["POINT Z (1 2 3)"])
|
||||
)
|
||||
if compat.USE_SHAPELY_20:
|
||||
gdf.to_parquet(tmpdir / "test.parquet")
|
||||
else:
|
||||
with pytest.warns(UserWarning, match="The GeoDataFrame contains 3D geometries"):
|
||||
gdf.to_parquet(tmpdir / "test.parquet")
|
||||
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
table = read_table(tmpdir / "test.parquet")
|
||||
wkb = table["geometry"][0].as_py().hex()
|
||||
|
||||
if compat.USE_SHAPELY_20:
|
||||
# correct ISO flavor
|
||||
assert wkb == "01e9030000000000000000f03f00000000000000400000000000000840"
|
||||
else:
|
||||
assert wkb == "0101000080000000000000f03f00000000000000400000000000000840"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"format,schema_version",
|
||||
product(["feather", "parquet"], [None] + SUPPORTED_VERSIONS),
|
||||
)
|
||||
def test_write_spec_version(tmpdir, format, schema_version):
|
||||
if format == "feather":
|
||||
from pyarrow.feather import read_table
|
||||
else:
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test.{format}")
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326")
|
||||
write = getattr(gdf, f"to_{format}")
|
||||
write(filename, schema_version=schema_version)
|
||||
|
||||
# ensure that we can roundtrip data regardless of version
|
||||
read = getattr(geopandas, f"read_{format}")
|
||||
df = read(filename)
|
||||
assert_geodataframe_equal(df, gdf)
|
||||
|
||||
# verify the correct version is written in the metadata
|
||||
schema_version = schema_version or METADATA_VERSION
|
||||
table = read_table(filename)
|
||||
metadata = json.loads(table.schema.metadata[b"geo"])
|
||||
assert metadata["version"] == schema_version
|
||||
|
||||
# verify that CRS is correctly handled between versions
|
||||
if schema_version == "0.1.0":
|
||||
assert metadata["columns"]["geometry"]["crs"] == gdf.crs.to_wkt()
|
||||
|
||||
else:
|
||||
crs_expected = gdf.crs.to_json_dict()
|
||||
_remove_id_from_member_of_ensembles(crs_expected)
|
||||
assert metadata["columns"]["geometry"]["crs"] == crs_expected
|
||||
|
||||
# verify that geometry_type(s) is correctly handled between versions
|
||||
if Version(schema_version) <= Version("0.4.0"):
|
||||
assert "geometry_type" in metadata["columns"]["geometry"]
|
||||
assert metadata["columns"]["geometry"]["geometry_type"] == "Polygon"
|
||||
else:
|
||||
assert "geometry_types" in metadata["columns"]["geometry"]
|
||||
assert metadata["columns"]["geometry"]["geometry_types"] == ["Polygon"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"format,version", product(["feather", "parquet"], [None] + SUPPORTED_VERSIONS)
|
||||
)
|
||||
def test_write_deprecated_version_parameter(tmpdir, format, version):
|
||||
if format == "feather":
|
||||
from pyarrow.feather import read_table
|
||||
|
||||
version = version or 2
|
||||
|
||||
else:
|
||||
from pyarrow.parquet import read_table
|
||||
|
||||
version = version or "2.6"
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test.{format}")
|
||||
gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326")
|
||||
write = getattr(gdf, f"to_{format}")
|
||||
|
||||
if version in SUPPORTED_VERSIONS:
|
||||
with pytest.warns(
|
||||
FutureWarning,
|
||||
match="the `version` parameter has been replaced with `schema_version`",
|
||||
):
|
||||
write(filename, version=version)
|
||||
|
||||
else:
|
||||
# no warning raised if not one of the captured versions
|
||||
write(filename, version=version)
|
||||
|
||||
table = read_table(filename)
|
||||
metadata = json.loads(table.schema.metadata[b"geo"])
|
||||
|
||||
if version in SUPPORTED_VERSIONS:
|
||||
# version is captured as a parameter
|
||||
assert metadata["version"] == version
|
||||
else:
|
||||
# version is passed to underlying writer
|
||||
assert metadata["version"] == METADATA_VERSION
|
||||
|
||||
|
||||
@pytest.mark.parametrize("version", ["0.1.0", "0.4.0", "1.0.0-beta.1"])
|
||||
def test_read_versioned_file(version):
|
||||
"""
|
||||
Verify that files for different metadata spec versions can be read
|
||||
created for each supported version:
|
||||
|
||||
# small dummy test dataset (not naturalearth_lowres, as this can change over time)
|
||||
from shapely.geometry import box, MultiPolygon
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
df.to_feather(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.feather')
|
||||
df.to_parquet(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.parquet')
|
||||
"""
|
||||
expected = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5, 5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
df = geopandas.read_feather(DATA_PATH / "arrow" / f"test_data_v{version}.feather")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
df = geopandas.read_parquet(DATA_PATH / "arrow" / f"test_data_v{version}.parquet")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
|
||||
def test_read_gdal_files():
|
||||
"""
|
||||
Verify that files written by GDAL can be read by geopandas.
|
||||
Since it is currently not yet straightforward to install GDAL with
|
||||
Parquet/Arrow enabled in our conda setup, we are testing with some
|
||||
generated files included in the repo (using GDAL 3.5.0):
|
||||
|
||||
# small dummy test dataset (not naturalearth_lowres, as this can change over time)
|
||||
from shapely.geometry import box, MultiPolygon
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
df.to_file("test_data.gpkg", GEOMETRY_NAME="geometry")
|
||||
and then the gpkg file is converted to Parquet/Arrow with:
|
||||
$ ogr2ogr -f Parquet -lco FID= test_data_gdal350.parquet test_data.gpkg
|
||||
$ ogr2ogr -f Arrow -lco FID= -lco GEOMETRY_ENCODING=WKB test_data_gdal350.arrow test_data.gpkg
|
||||
""" # noqa: E501
|
||||
expected = geopandas.GeoDataFrame(
|
||||
{"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]},
|
||||
geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5, 5)],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
df = geopandas.read_parquet(DATA_PATH / "arrow" / "test_data_gdal350.parquet")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
df = geopandas.read_feather(DATA_PATH / "arrow" / "test_data_gdal350.arrow")
|
||||
assert_geodataframe_equal(df, expected, check_crs=True)
|
||||
|
||||
|
||||
def test_parquet_read_partitioned_dataset(tmpdir):
|
||||
# we don't yet explicitly support this (in writing), but for Parquet it
|
||||
# works for reading (by relying on pyarrow.read_table)
|
||||
df = read_file(get_path("naturalearth_lowres"))
|
||||
|
||||
# manually create partitioned dataset
|
||||
basedir = tmpdir / "partitioned_dataset"
|
||||
basedir.mkdir()
|
||||
df[:100].to_parquet(basedir / "data1.parquet")
|
||||
df[100:].to_parquet(basedir / "data2.parquet")
|
||||
|
||||
result = read_parquet(basedir)
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
|
||||
def test_parquet_read_partitioned_dataset_fsspec(tmpdir):
|
||||
fsspec = pytest.importorskip("fsspec")
|
||||
|
||||
df = read_file(get_path("naturalearth_lowres"))
|
||||
|
||||
# manually create partitioned dataset
|
||||
memfs = fsspec.filesystem("memory")
|
||||
memfs.mkdir("partitioned_dataset")
|
||||
with memfs.open("partitioned_dataset/data1.parquet", "wb") as f:
|
||||
df[:100].to_parquet(f)
|
||||
with memfs.open("partitioned_dataset/data2.parquet", "wb") as f:
|
||||
df[100:].to_parquet(f)
|
||||
|
||||
result = read_parquet("memory://partitioned_dataset")
|
||||
assert_geodataframe_equal(result, df)
|
||||
1178
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
Normal file
1178
.venv/lib/python3.12/site-packages/geopandas/io/tests/test_file.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,307 @@
|
||||
import os
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
import pytest
|
||||
|
||||
from .test_file import FIONA_MARK, PYOGRIO_MARK
|
||||
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST TOOLING
|
||||
|
||||
|
||||
class _ExpectedError:
|
||||
def __init__(self, error_type, error_message_match):
|
||||
self.type = error_type
|
||||
self.match = error_message_match
|
||||
|
||||
|
||||
class _ExpectedErrorBuilder:
|
||||
def __init__(self, composite_key):
|
||||
self.composite_key = composite_key
|
||||
|
||||
def to_raise(self, error_type, error_match):
|
||||
_expected_exceptions[self.composite_key] = _ExpectedError(
|
||||
error_type, error_match
|
||||
)
|
||||
|
||||
|
||||
def _expect_writing(gdf, ogr_driver):
|
||||
return _ExpectedErrorBuilder(_composite_key(gdf, ogr_driver))
|
||||
|
||||
|
||||
def _composite_key(gdf, ogr_driver):
|
||||
return frozenset([id(gdf), ogr_driver])
|
||||
|
||||
|
||||
def _expected_error_on(gdf, ogr_driver):
|
||||
composite_key = _composite_key(gdf, ogr_driver)
|
||||
return _expected_exceptions.get(composite_key, None)
|
||||
|
||||
|
||||
# *****************************************
|
||||
# TEST CASES
|
||||
_geodataframes_to_write = []
|
||||
_expected_exceptions = {}
|
||||
_CRS = "epsg:4326"
|
||||
|
||||
# ------------------
|
||||
# gdf with Points
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_entrance, city_hall_balcony]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPoint([city_hall_balcony, city_hall_council_chamber]),
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony, city_hall_council_chamber]),
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Points and MultiPoints
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPoint([city_hall_entrance, city_hall_balcony]), city_hall_balcony],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# 'ESRI Shapefile' driver supports writing LineString/MultiLinestring and
|
||||
# Polygon/MultiPolygon but does not mention Point/MultiPoint
|
||||
# see https://www.gdal.org/drv_shapefile.html
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=city_hall_walls)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), MultiLineString(city_hall_walls)],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with LineStrings and MultiLineStrings
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygons
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]}, crs=_CRS, geometry=[city_hall_boundaries, vauquelin_place]
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1]},
|
||||
crs=_CRS,
|
||||
geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with Polygon and MultiPolygon
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, city_hall_entrance])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometry and 3D Point
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, point_3D])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with null geometries only
|
||||
gdf = GeoDataFrame({"a": [1, 2]}, crs=_CRS, geometry=[None, None])
|
||||
_geodataframes_to_write.append(gdf)
|
||||
|
||||
# ------------------
|
||||
# gdf with all shape types mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
# ------------------
|
||||
# gdf with all 2D shape types and 3D Point mixed together
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [1, 2, 3, 4, 5, 6, 7]},
|
||||
crs=_CRS,
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_entrance,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
],
|
||||
)
|
||||
_geodataframes_to_write.append(gdf)
|
||||
# Not supported by 'ESRI Shapefile' driver
|
||||
_expect_writing(gdf, "ESRI Shapefile").to_raise(RuntimeError, "Failed to write record")
|
||||
|
||||
|
||||
@pytest.fixture(params=_geodataframes_to_write)
|
||||
def geodataframe(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["GeoJSON", "ESRI Shapefile", "GPKG", "SQLite"])
|
||||
def ogr_driver(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("fiona", marks=FIONA_MARK),
|
||||
pytest.param("pyogrio", marks=PYOGRIO_MARK),
|
||||
]
|
||||
)
|
||||
def engine(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
|
||||
output_file = os.path.join(str(tmpdir), "output_file")
|
||||
write_kwargs = {}
|
||||
if ogr_driver == "SQLite":
|
||||
write_kwargs["spatialite"] = True
|
||||
|
||||
# This if statement can be removed once minimal fiona version >= 1.8.20
|
||||
if engine == "fiona":
|
||||
import fiona
|
||||
from packaging.version import Version
|
||||
|
||||
if Version(fiona.__version__) < Version("1.8.20"):
|
||||
pytest.skip("SQLite driver only available from version 1.8.20")
|
||||
|
||||
# If only 3D Points, geometry_type needs to be specified for spatialite at the
|
||||
# moment. This if can be removed once the following PR is released:
|
||||
# https://github.com/geopandas/pyogrio/pull/223
|
||||
if (
|
||||
engine == "pyogrio"
|
||||
and len(geodataframe == 2)
|
||||
and geodataframe.geometry[0] is None
|
||||
and geodataframe.geometry[1] is not None
|
||||
and geodataframe.geometry[1].has_z
|
||||
):
|
||||
write_kwargs["geometry_type"] = "Point Z"
|
||||
|
||||
expected_error = _expected_error_on(geodataframe, ogr_driver)
|
||||
if expected_error:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Failed to write record|Could not add feature to layer"
|
||||
):
|
||||
geodataframe.to_file(
|
||||
output_file, driver=ogr_driver, engine=engine, **write_kwargs
|
||||
)
|
||||
else:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=ogr_driver, engine=engine, **write_kwargs
|
||||
)
|
||||
|
||||
reloaded = geopandas.read_file(output_file, engine=engine)
|
||||
|
||||
if ogr_driver == "GeoJSON" and engine == "pyogrio":
|
||||
# For GeoJSON files, the int64 column comes back as int32
|
||||
reloaded["a"] = reloaded["a"].astype("int64")
|
||||
|
||||
assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv")
|
||||
@@ -0,0 +1,304 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
MultiPoint,
|
||||
MultiPolygon,
|
||||
Point,
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import numpy as np
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas.io.file import infer_schema
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
)
|
||||
vauquelin_place = Polygon(
|
||||
(
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5548825850032, 45.5084033554357),
|
||||
(-73.5542465586147, 45.5081555487952),
|
||||
)
|
||||
)
|
||||
|
||||
city_hall_walls = [
|
||||
LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
(-73.5546126200639, 45.5086813829106),
|
||||
(-73.5540185061397, 45.5084409343852),
|
||||
)
|
||||
),
|
||||
LineString(
|
||||
(
|
||||
(-73.5539986525799, 45.5084323044531),
|
||||
(-73.5535801792994, 45.5089539203786),
|
||||
(-73.5541107525234, 45.5091983609661),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
city_hall_entrance = Point(-73.553785, 45.508722)
|
||||
city_hall_balcony = Point(-73.554138, 45.509080)
|
||||
city_hall_council_chamber = Point(-73.554246, 45.508931)
|
||||
|
||||
point_3D = Point(-73.553785, 45.508722, 300)
|
||||
linestring_3D = LineString(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5546126200639, 45.5086813829106, 300),
|
||||
(-73.5540185061397, 45.5084409343852, 300),
|
||||
)
|
||||
)
|
||||
polygon_3D = Polygon(
|
||||
(
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
(-73.5535801792994, 45.5089539203786, 300),
|
||||
(-73.5541107525234, 45.5091983609661, 300),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_infer_schema_only_points():
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_points_and_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPoint", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipoints():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPoint(
|
||||
[city_hall_entrance, city_hall_balcony, city_hall_council_chamber]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPoint", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_only_linestrings():
|
||||
df = GeoDataFrame(geometry=city_hall_walls)
|
||||
|
||||
assert infer_schema(df) == {"geometry": "LineString", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_linestrings_and_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls), city_hall_walls[0]])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiLineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multilinestrings():
|
||||
df = GeoDataFrame(geometry=[MultiLineString(city_hall_walls)])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "MultiLineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_polygons():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, vauquelin_place])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_polygons_and_multipolygons():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["MultiPolygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_multipolygons():
|
||||
df = GeoDataFrame(geometry=[MultiPolygon((city_hall_boundaries, vauquelin_place))])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "MultiPolygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_multiple_shape_types():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_shape_type():
|
||||
df = GeoDataFrame(
|
||||
geometry=[
|
||||
MultiPolygon((city_hall_boundaries, vauquelin_place)),
|
||||
city_hall_boundaries,
|
||||
MultiLineString(city_hall_walls),
|
||||
city_hall_walls[0],
|
||||
MultiPoint([city_hall_entrance, city_hall_balcony]),
|
||||
city_hall_balcony,
|
||||
point_3D,
|
||||
]
|
||||
)
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": [
|
||||
"3D Point",
|
||||
"MultiPolygon",
|
||||
"Polygon",
|
||||
"MultiLineString",
|
||||
"LineString",
|
||||
"MultiPoint",
|
||||
"Point",
|
||||
],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Point():
|
||||
df = GeoDataFrame(geometry=[city_hall_balcony, point_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Point", "Point"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Points():
|
||||
df = GeoDataFrame(geometry=[point_3D, point_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_linestring():
|
||||
df = GeoDataFrame(geometry=[city_hall_walls[0], linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D LineString", "LineString"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_linestrings():
|
||||
df = GeoDataFrame(geometry=[linestring_3D, linestring_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "3D LineString",
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_mixed_3D_Polygon():
|
||||
df = GeoDataFrame(geometry=[city_hall_boundaries, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": ["3D Polygon", "Polygon"],
|
||||
"properties": OrderedDict(),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_only_3D_Polygons():
|
||||
df = GeoDataFrame(geometry=[polygon_3D, polygon_3D])
|
||||
|
||||
assert infer_schema(df) == {"geometry": "3D Polygon", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_2D_point():
|
||||
df = GeoDataFrame(geometry=[None, city_hall_entrance])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_and_3D_point():
|
||||
df = GeoDataFrame(geometry=[None, point_3D])
|
||||
|
||||
# None geometry type is then omitted
|
||||
assert infer_schema(df) == {"geometry": "3D Point", "properties": OrderedDict()}
|
||||
|
||||
|
||||
def test_infer_schema_null_geometry_all():
|
||||
df = GeoDataFrame(geometry=[None, None])
|
||||
|
||||
# None geometry type in then replaced by 'Unknown'
|
||||
# (default geometry type supported by Fiona)
|
||||
assert infer_schema(df) == {"geometry": "Unknown", "properties": OrderedDict()}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array_data,dtype", [([1, 2**31 - 1], np.int32), ([1, np.nan], pd.Int32Dtype())]
|
||||
)
|
||||
def test_infer_schema_int32(array_data, dtype):
|
||||
int32col = pd.array(data=array_data, dtype=dtype)
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int32_column"] = int32col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int32_column", "int32")]),
|
||||
}
|
||||
|
||||
|
||||
def test_infer_schema_int64():
|
||||
int64col = pd.array([1, np.nan], dtype=pd.Int64Dtype())
|
||||
df = GeoDataFrame(geometry=[city_hall_entrance, city_hall_balcony])
|
||||
df["int64_column"] = int64col
|
||||
|
||||
assert infer_schema(df) == {
|
||||
"geometry": "Point",
|
||||
"properties": OrderedDict([("int64_column", "int")]),
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
See generate_legacy_storage_files.py for the creation of the legacy files.
|
||||
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
from geopandas import _compat as compat
|
||||
import geopandas
|
||||
from shapely.geometry import Point
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def current_pickle_data():
|
||||
# our current version pickle data
|
||||
from .generate_legacy_storage_files import create_pickle_data
|
||||
|
||||
return create_pickle_data()
|
||||
|
||||
|
||||
files = glob.glob(str(DATA_PATH / "pickle" / "*.pickle"))
|
||||
|
||||
|
||||
@pytest.fixture(params=files, ids=[p.split("/")[-1] for p in files])
|
||||
def legacy_pickle(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@contextmanager
|
||||
def with_use_pygeos(option):
|
||||
orig = geopandas.options.use_pygeos
|
||||
geopandas.options.use_pygeos = option
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
geopandas.options.use_pygeos = orig
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
compat.USE_SHAPELY_20 or compat.USE_PYGEOS,
|
||||
reason=(
|
||||
"shapely 2.0/pygeos-based unpickling currently only works for "
|
||||
"shapely-2.0/pygeos-written files"
|
||||
),
|
||||
)
|
||||
def test_legacy_pickles(current_pickle_data, legacy_pickle):
|
||||
result = pd.read_pickle(legacy_pickle)
|
||||
|
||||
for name, value in result.items():
|
||||
expected = current_pickle_data[name]
|
||||
assert_geodataframe_equal(value, expected)
|
||||
|
||||
|
||||
def test_round_trip_current(tmpdir, current_pickle_data):
|
||||
data = current_pickle_data
|
||||
|
||||
for name, value in data.items():
|
||||
path = str(tmpdir / "{}.pickle".format(name))
|
||||
value.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, value)
|
||||
assert isinstance(result.has_sindex, bool)
|
||||
|
||||
|
||||
def _create_gdf():
|
||||
return geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745")
|
||||
def test_pygeos_switch(tmpdir):
|
||||
# writing and reading with pygeos disabled
|
||||
with with_use_pygeos(False):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
# writing without pygeos, reading with pygeos
|
||||
with with_use_pygeos(False):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
|
||||
with with_use_pygeos(True):
|
||||
result = pd.read_pickle(path)
|
||||
gdf = _create_gdf()
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
# writing with pygeos, reading without pygeos
|
||||
with with_use_pygeos(True):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
|
||||
with with_use_pygeos(False):
|
||||
result = pd.read_pickle(path)
|
||||
gdf = _create_gdf()
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
@@ -0,0 +1,752 @@
|
||||
"""
|
||||
Tests here include reading/writing to different types of spatial databases.
|
||||
The spatial database tests may not work without additional system
|
||||
configuration. postGIS tests require a test database to have been setup;
|
||||
see geopandas.tests.util for more information.
|
||||
"""
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, read_file, read_postgis
|
||||
|
||||
import geopandas._compat as compat
|
||||
from geopandas.io.sql import _get_conn as get_conn, _write_postgis as write_postgis
|
||||
from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df
|
||||
import pytest
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
# Avoid local imports for text in all sqlalchemy tests
|
||||
# all tests using text use engine_postgis, which ensures sqlalchemy is available
|
||||
text = str
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_nybb():
|
||||
nybb_path = geopandas.datasets.get_path("nybb")
|
||||
df = read_file(nybb_path)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_postgis():
|
||||
"""
|
||||
Initiates a connection to a postGIS database that must already exist.
|
||||
See create_postgis for more information.
|
||||
"""
|
||||
psycopg2 = pytest.importorskip("psycopg2")
|
||||
from psycopg2 import OperationalError
|
||||
|
||||
dbname = "test_geopandas"
|
||||
user = os.environ.get("PGUSER")
|
||||
password = os.environ.get("PGPASSWORD")
|
||||
host = os.environ.get("PGHOST")
|
||||
port = os.environ.get("PGPORT")
|
||||
try:
|
||||
con = psycopg2.connect(
|
||||
dbname=dbname, user=user, password=password, host=host, port=port
|
||||
)
|
||||
except OperationalError:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="pandas only supports SQLAlchemy connectable.*"
|
||||
)
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def engine_postgis():
|
||||
"""
|
||||
Initiates a connection engine to a postGIS database that must already exist.
|
||||
"""
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
user = os.environ.get("PGUSER")
|
||||
password = os.environ.get("PGPASSWORD")
|
||||
host = os.environ.get("PGHOST")
|
||||
port = os.environ.get("PGPORT")
|
||||
dbname = "test_geopandas"
|
||||
|
||||
try:
|
||||
con = sqlalchemy.create_engine(
|
||||
URL.create(
|
||||
drivername="postgresql+psycopg2",
|
||||
username=user,
|
||||
database=dbname,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port,
|
||||
)
|
||||
)
|
||||
con.connect()
|
||||
except Exception:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
|
||||
yield con
|
||||
con.dispose()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_spatialite():
|
||||
"""
|
||||
Return a memory-based SQLite3 connection with SpatiaLite enabled & initialized.
|
||||
|
||||
`The sqlite3 module must be built with loadable extension support
|
||||
<https://docs.python.org/3/library/sqlite3.html#f1>`_ and
|
||||
`SpatiaLite <https://www.gaia-gis.it/fossil/libspatialite/index>`_
|
||||
must be available on the system as a SQLite module.
|
||||
Packages available on Anaconda meet requirements.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
``AttributeError`` on missing support for loadable SQLite extensions
|
||||
``sqlite3.OperationalError`` on missing SpatiaLite
|
||||
"""
|
||||
sqlite3 = pytest.importorskip("sqlite3")
|
||||
try:
|
||||
with sqlite3.connect(":memory:") as con:
|
||||
con.enable_load_extension(True)
|
||||
con.load_extension("mod_spatialite")
|
||||
con.execute("SELECT InitSpatialMetaData(TRUE)")
|
||||
except Exception:
|
||||
con.close()
|
||||
pytest.skip("Cannot setup spatialite database")
|
||||
|
||||
yield con
|
||||
con.close()
|
||||
|
||||
|
||||
def drop_table_if_exists(conn_or_engine, table):
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
|
||||
if sqlalchemy.inspect(conn_or_engine).has_table(table):
|
||||
metadata = sqlalchemy.MetaData()
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="Did not recognize type 'geometry' of column.*"
|
||||
)
|
||||
metadata.reflect(conn_or_engine)
|
||||
table = metadata.tables.get(table)
|
||||
if table is not None:
|
||||
table.drop(conn_or_engine, checkfirst=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_mixed_single_and_multi():
|
||||
from shapely.geometry import Point, LineString, MultiLineString
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
|
||||
Point(0, 1),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_geom_collection():
|
||||
from shapely.geometry import Point, LineString, Polygon, GeometryCollection
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
GeometryCollection(
|
||||
[
|
||||
Polygon([(0, 0), (1, 1), (0, 1)]),
|
||||
LineString([(0, 0), (1, 1)]),
|
||||
Point(0, 0),
|
||||
]
|
||||
)
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_linear_ring():
|
||||
from shapely.geometry import LinearRing
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{"geometry": [LinearRing(((0, 0), (0, 1), (1, 1), (1, 0)))]}, crs="epsg:4326"
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_3D_geoms():
|
||||
from shapely.geometry import Point, LineString, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
"geometry": [
|
||||
LineString([(0, 0, 0), (1, 1, 1)]),
|
||||
Polygon([(0, 0, 0), (1, 1, 1), (0, 1, 1)]),
|
||||
Point(0, 1, 2),
|
||||
]
|
||||
},
|
||||
crs="epsg:4326",
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestIO:
|
||||
def test_get_conn(self, engine_postgis):
|
||||
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
|
||||
|
||||
engine = engine_postgis
|
||||
with get_conn(engine) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with engine.connect() as conn:
|
||||
with get_conn(conn) as output:
|
||||
assert isinstance(output, Connection)
|
||||
with pytest.raises(ValueError):
|
||||
with get_conn(object()):
|
||||
pass
|
||||
|
||||
def test_read_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
|
||||
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
|
||||
con = connection_postgis
|
||||
orig_geom = "geom"
|
||||
out_geom = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=orig_geom)
|
||||
|
||||
sql = """SELECT borocode, boroname, shape_leng, shape_area,
|
||||
{} as {} FROM nybb;""".format(
|
||||
orig_geom, out_geom
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=out_geom)
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
|
||||
con = connection_postgis
|
||||
crs = "epsg:4269"
|
||||
df_reproj = df_nybb.to_crs(crs)
|
||||
create_postgis(con, df_reproj, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == crs
|
||||
|
||||
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that a user specified CRS overrides the geodatabase SRID."""
|
||||
con = connection_postgis
|
||||
orig_crs = df_nybb.crs
|
||||
create_postgis(con, df_nybb, srid=4269)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con, crs=orig_crs)
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == orig_crs
|
||||
|
||||
def test_from_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
create_postgis(con, df_nybb, geom_col=geom_col)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col)
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
def test_read_postgis_null_geom(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry with NULL is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
df_nybb.geometry.iat[0] = None
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'AsEWKB("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_binary(self, connection_spatialite, df_nybb):
|
||||
"""Tests that geometry read as binary is accepted."""
|
||||
con = connection_spatialite
|
||||
geom_col = df_nybb.geometry.name
|
||||
create_spatialite(con, df_nybb)
|
||||
sql = (
|
||||
"SELECT ogc_fid, borocode, boroname, shape_leng, shape_area, "
|
||||
'ST_AsBinary("{0}") AS "{0}" FROM nybb'.format(geom_col)
|
||||
)
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
# no crs defined on the created geodatabase, and none specified
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
def test_read_postgis_privacy(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.warns(FutureWarning):
|
||||
geopandas.io.sql.read_postgis(sql, con)
|
||||
|
||||
def test_write_postgis_default(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
engine = engine_postgis
|
||||
table = "nybb"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
|
||||
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
|
||||
engine = engine_postgis
|
||||
table = "aTestTable"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text('SELECT * FROM "{table}";'.format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
with engine_postgis.begin() as con:
|
||||
table = "nybb_con"
|
||||
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(con, table)
|
||||
|
||||
# Write to db
|
||||
write_postgis(df_nybb, con=con, name=table, if_exists="fail")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, con, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that uploading the same table raises error when: if_replace='fail'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
try:
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="fail")
|
||||
except ValueError as e:
|
||||
if "already exists" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise e
|
||||
|
||||
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that replacing a table is possible when: if_replace='replace'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Ensure table exists
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Overwrite
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that appending to existing table produces correct results when:
|
||||
if_replace='append'.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
orig_rows, orig_cols = df_nybb.shape
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
# Validate
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
new_rows, new_cols = df.shape
|
||||
|
||||
# There should be twice as many rows in the new table
|
||||
assert new_rows == orig_rows * 2, (
|
||||
"There should be {target} rows,"
|
||||
"found: {current}".format(target=orig_rows * 2, current=new_rows),
|
||||
)
|
||||
# Number of columns should stay the same
|
||||
assert new_cols == orig_cols, (
|
||||
"There should be {target} columns,"
|
||||
"found: {current}".format(target=orig_cols, current=new_cols),
|
||||
)
|
||||
|
||||
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS without CRS information.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb = df_nybb
|
||||
df_nybb.crs = None
|
||||
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is -1
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
|
||||
|
||||
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
|
||||
CRS information (GH #2414).
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb_esri = df_nybb.to_crs("ESRI:102003")
|
||||
write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is 102003
|
||||
sql = text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema="public", table=table, geom_col="geometry"
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
|
||||
|
||||
def test_write_postgis_geometry_collection(
|
||||
self, engine_postgis, df_geom_collection
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of different geometry types is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_geom_collection, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
|
||||
assert geom_type.upper() == "GEOMETRYCOLLECTION"
|
||||
assert df.geom_type.unique()[0] == "GeometryCollection"
|
||||
|
||||
def test_write_postgis_mixed_geometry_types(
|
||||
self, engine_postgis, df_mixed_single_and_multi
|
||||
):
|
||||
"""
|
||||
Tests that writing a mix of single and MultiGeometries is possible.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi, con=engine, name=table, if_exists="replace"
|
||||
)
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
|
||||
"""
|
||||
Tests that writing a LinearRing.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_linear_ring, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT(GeometryType(geometry)) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
geom_type = conn.execute(sql).fetchone()[0]
|
||||
|
||||
assert geom_type.upper() == "LINESTRING"
|
||||
|
||||
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
|
||||
"""
|
||||
Tests writing a LinearRing works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(
|
||||
df_mixed_single_and_multi,
|
||||
con=engine,
|
||||
name=table,
|
||||
if_exists="replace",
|
||||
chunksize=1,
|
||||
)
|
||||
# Validate row count
|
||||
sql = text("SELECT COUNT(geometry) FROM {table};".format(table=table))
|
||||
with engine.connect() as conn:
|
||||
row_cnt = conn.execute(sql).fetchone()[0]
|
||||
assert row_cnt == 3
|
||||
|
||||
# Validate geometry type
|
||||
sql = text(
|
||||
"SELECT DISTINCT GeometryType(geometry) FROM {table} ORDER BY 1;".format(
|
||||
table=table
|
||||
)
|
||||
)
|
||||
with engine.connect() as conn:
|
||||
res = conn.execute(sql).fetchall()
|
||||
assert res[0][0].upper() == "LINESTRING"
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_to_different_schema_when_table_exists(
|
||||
self, engine_postgis, df_nybb
|
||||
):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
schema_to_use = "test"
|
||||
sql = text("CREATE SCHEMA IF NOT EXISTS {schema};".format(schema=schema_to_use))
|
||||
with engine.begin() as conn:
|
||||
conn.execute(sql)
|
||||
|
||||
try:
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="fail", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(
|
||||
schema=schema_to_use, table=table
|
||||
)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
# Should raise a ValueError when table exists
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Try with replace flag on
|
||||
write_postgis(
|
||||
df_nybb, con=engine, name=table, if_exists="replace", schema=schema_to_use
|
||||
)
|
||||
# Validate
|
||||
sql = text(
|
||||
"SELECT * FROM {schema}.{table};".format(schema=schema_to_use, table=table)
|
||||
)
|
||||
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
|
||||
"""
|
||||
Tests writing a geometries with 3 dimensions works.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "geomtype_tests"
|
||||
|
||||
write_postgis(df_3D_geoms, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that all geometries have 3 dimensions
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert list(df.geometry.has_z) == [True, True, True]
|
||||
|
||||
def test_row_order(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the row order in db table follows the order of the original frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "row_order_test"
|
||||
correct_order = df_nybb["BoroCode"].tolist()
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert df["BoroCode"].tolist() == correct_order
|
||||
|
||||
def test_append_before_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that insert works with if_exists='append' when table does not exist yet.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
# If table exists, delete it before trying to write with defaults
|
||||
drop_table_if_exists(engine, table)
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="append")
|
||||
|
||||
# Check that the row order matches
|
||||
sql = text("SELECT * FROM {table};".format(table=table))
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
def test_append_with_different_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the warning is raised if table CRS differs from frame.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
table = "nybb"
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
|
||||
# Reproject
|
||||
df_nybb2 = df_nybb.to_crs(epsg=4326)
|
||||
|
||||
# Should raise error when appending
|
||||
with pytest.raises(ValueError, match="CRS of the target table"):
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.xfail(
|
||||
compat.PANDAS_GE_20 and not compat.PANDAS_GE_21,
|
||||
reason="Duplicate columns are dropped in read_sql with pandas 2.0.x",
|
||||
)
|
||||
def test_duplicate_geometry_column_fails(self, engine_postgis):
|
||||
"""
|
||||
Tests that a ValueError is raised if an SQL query returns two geometry columns.
|
||||
"""
|
||||
engine = engine_postgis
|
||||
|
||||
sql = "select ST_MakePoint(0, 0) as geom, ST_MakePoint(0, 0) as geom;"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
read_postgis(sql, engine, geom_col="geom")
|
||||
988
.venv/lib/python3.12/site-packages/geopandas/plotting.py
Normal file
988
.venv/lib/python3.12/site-packages/geopandas/plotting.py
Normal file
@@ -0,0 +1,988 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.plotting import PlotAccessor
|
||||
from pandas import CategoricalDtype
|
||||
|
||||
import geopandas
|
||||
|
||||
from packaging.version import Version
|
||||
|
||||
from ._decorator import doc
|
||||
|
||||
|
||||
def deprecated(new, warning_type=FutureWarning):
|
||||
"""Helper to provide deprecation warning."""
|
||||
|
||||
def old(*args, **kwargs):
|
||||
warnings.warn(
|
||||
"{} is intended for internal ".format(new.__name__[1:])
|
||||
+ "use only, and will be deprecated.",
|
||||
warning_type,
|
||||
stacklevel=2,
|
||||
)
|
||||
new(*args, **kwargs)
|
||||
|
||||
return old
|
||||
|
||||
|
||||
def _sanitize_geoms(geoms, prefix="Multi"):
|
||||
"""
|
||||
Returns Series like geoms and index, except that any Multi geometries
|
||||
are split into their components and indices are repeated for all component
|
||||
in the same Multi geometry. At the same time, empty or missing geometries are
|
||||
filtered out. Maintains 1:1 matching of geometry to value.
|
||||
|
||||
Prefix specifies type of geometry to be flatten. 'Multi' for MultiPoint and similar,
|
||||
"Geom" for GeometryCollection.
|
||||
|
||||
Returns
|
||||
-------
|
||||
components : list of geometry
|
||||
|
||||
component_index : index array
|
||||
indices are repeated for all components in the same Multi geometry
|
||||
"""
|
||||
# TODO(shapely) look into simplifying this with
|
||||
# shapely.get_parts(geoms, return_index=True) from shapely 2.0
|
||||
components, component_index = [], []
|
||||
|
||||
if (
|
||||
not geoms.geom_type.str.startswith(prefix).any()
|
||||
and not geoms.is_empty.any()
|
||||
and not geoms.isna().any()
|
||||
):
|
||||
return geoms, np.arange(len(geoms))
|
||||
|
||||
for ix, geom in enumerate(geoms):
|
||||
if geom is not None and geom.geom_type.startswith(prefix) and not geom.is_empty:
|
||||
for poly in geom.geoms:
|
||||
components.append(poly)
|
||||
component_index.append(ix)
|
||||
elif geom is None or geom.is_empty:
|
||||
continue
|
||||
else:
|
||||
components.append(geom)
|
||||
component_index.append(ix)
|
||||
|
||||
return components, np.array(component_index)
|
||||
|
||||
|
||||
def _expand_kwargs(kwargs, multiindex):
|
||||
"""
|
||||
Most arguments to the plot functions must be a (single) value, or a sequence
|
||||
of values. This function checks each key-value pair in 'kwargs' and expands
|
||||
it (in place) to the correct length/formats with help of 'multiindex', unless
|
||||
the value appears to already be a valid (single) value for the key.
|
||||
"""
|
||||
import matplotlib
|
||||
from matplotlib.colors import is_color_like
|
||||
from typing import Iterable
|
||||
|
||||
mpl = Version(matplotlib.__version__)
|
||||
if mpl >= Version("3.4"):
|
||||
# alpha is supported as array argument with matplotlib 3.4+
|
||||
scalar_kwargs = ["marker", "path_effects"]
|
||||
else:
|
||||
scalar_kwargs = ["marker", "alpha", "path_effects"]
|
||||
|
||||
for att, value in kwargs.items():
|
||||
if "color" in att: # color(s), edgecolor(s), facecolor(s)
|
||||
if is_color_like(value):
|
||||
continue
|
||||
elif "linestyle" in att: # linestyle(s)
|
||||
# A single linestyle can be 2-tuple of a number and an iterable.
|
||||
if (
|
||||
isinstance(value, tuple)
|
||||
and len(value) == 2
|
||||
and isinstance(value[1], Iterable)
|
||||
):
|
||||
continue
|
||||
elif att in scalar_kwargs:
|
||||
# For these attributes, only a single value is allowed, so never expand.
|
||||
continue
|
||||
|
||||
if pd.api.types.is_list_like(value):
|
||||
kwargs[att] = np.take(value, multiindex, axis=0)
|
||||
|
||||
|
||||
def _PolygonPatch(polygon, **kwargs):
|
||||
"""Constructs a matplotlib patch from a Polygon geometry
|
||||
|
||||
The `kwargs` are those supported by the matplotlib.patches.PathPatch class
|
||||
constructor. Returns an instance of matplotlib.patches.PathPatch.
|
||||
|
||||
Example (using Shapely Point and a matplotlib axes)::
|
||||
|
||||
b = shapely.geometry.Point(0, 0).buffer(1.0)
|
||||
patch = _PolygonPatch(b, fc='blue', ec='blue', alpha=0.5)
|
||||
ax.add_patch(patch)
|
||||
|
||||
GeoPandas originally relied on the descartes package by Sean Gillies
|
||||
(BSD license, https://pypi.org/project/descartes) for PolygonPatch, but
|
||||
this dependency was removed in favor of the below matplotlib code.
|
||||
"""
|
||||
from matplotlib.patches import PathPatch
|
||||
from matplotlib.path import Path
|
||||
|
||||
path = Path.make_compound_path(
|
||||
Path(np.asarray(polygon.exterior.coords)[:, :2]),
|
||||
*[Path(np.asarray(ring.coords)[:, :2]) for ring in polygon.interiors],
|
||||
)
|
||||
return PathPatch(path, **kwargs)
|
||||
|
||||
|
||||
def _plot_polygon_collection(
|
||||
ax, geoms, values=None, color=None, cmap=None, vmin=None, vmax=None, **kwargs
|
||||
):
|
||||
"""
|
||||
Plots a collection of Polygon and MultiPolygon geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : a sequence of `N` Polygons and/or MultiPolygons (can be mixed)
|
||||
|
||||
values : a sequence of `N` values, optional
|
||||
Values will be mapped to colors using vmin/vmax/cmap. They should
|
||||
have 1:1 correspondence with the geometries (not their components).
|
||||
Otherwise follows `color` / `facecolor` kwargs.
|
||||
edgecolor : single color or sequence of `N` colors
|
||||
Color for the edge of the polygons
|
||||
facecolor : single color or sequence of `N` colors
|
||||
Color to fill the polygons. Cannot be used together with `values`.
|
||||
color : single color or sequence of `N` colors
|
||||
Sets both `edgecolor` and `facecolor`
|
||||
**kwargs
|
||||
Additional keyword arguments passed to the collection
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
from matplotlib.collections import PatchCollection
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
if values is not None:
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
|
||||
# PatchCollection does not accept some kwargs.
|
||||
kwargs = {
|
||||
att: value
|
||||
for att, value in kwargs.items()
|
||||
if att not in ["markersize", "marker"]
|
||||
}
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
collection = PatchCollection([_PolygonPatch(poly) for poly in geoms], **kwargs)
|
||||
|
||||
if values is not None:
|
||||
collection.set_array(np.asarray(values))
|
||||
collection.set_cmap(cmap)
|
||||
if "norm" not in kwargs:
|
||||
collection.set_clim(vmin, vmax)
|
||||
|
||||
ax.add_collection(collection, autolim=True)
|
||||
ax.autoscale_view()
|
||||
return collection
|
||||
|
||||
|
||||
plot_polygon_collection = deprecated(_plot_polygon_collection)
|
||||
|
||||
|
||||
def _plot_linestring_collection(
|
||||
ax, geoms, values=None, color=None, cmap=None, vmin=None, vmax=None, **kwargs
|
||||
):
|
||||
"""
|
||||
Plots a collection of LineString and MultiLineString geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : a sequence of `N` LineStrings and/or MultiLineStrings (can be
|
||||
mixed)
|
||||
values : a sequence of `N` values, optional
|
||||
Values will be mapped to colors using vmin/vmax/cmap. They should
|
||||
have 1:1 correspondence with the geometries (not their components).
|
||||
color : single color or sequence of `N` colors
|
||||
Cannot be used together with `values`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
from matplotlib.collections import LineCollection
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
if values is not None:
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
|
||||
# LineCollection does not accept some kwargs.
|
||||
kwargs = {
|
||||
att: value
|
||||
for att, value in kwargs.items()
|
||||
if att not in ["markersize", "marker"]
|
||||
}
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
segments = [np.array(linestring.coords)[:, :2] for linestring in geoms]
|
||||
collection = LineCollection(segments, **kwargs)
|
||||
|
||||
if values is not None:
|
||||
collection.set_array(np.asarray(values))
|
||||
collection.set_cmap(cmap)
|
||||
if "norm" not in kwargs:
|
||||
collection.set_clim(vmin, vmax)
|
||||
|
||||
ax.add_collection(collection, autolim=True)
|
||||
ax.autoscale_view()
|
||||
return collection
|
||||
|
||||
|
||||
plot_linestring_collection = deprecated(_plot_linestring_collection)
|
||||
|
||||
|
||||
def _plot_point_collection(
|
||||
ax,
|
||||
geoms,
|
||||
values=None,
|
||||
color=None,
|
||||
cmap=None,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
marker="o",
|
||||
markersize=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Plots a collection of Point and MultiPoint geometries to `ax`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib.axes.Axes
|
||||
where shapes will be plotted
|
||||
geoms : sequence of `N` Points or MultiPoints
|
||||
|
||||
values : a sequence of `N` values, optional
|
||||
Values mapped to colors using vmin, vmax, and cmap.
|
||||
Cannot be specified together with `color`.
|
||||
markersize : scalar or array-like, optional
|
||||
Size of the markers. Note that under the hood ``scatter`` is
|
||||
used, so the specified value will be proportional to the
|
||||
area of the marker (size in points^2).
|
||||
|
||||
Returns
|
||||
-------
|
||||
collection : matplotlib.collections.Collection that was plotted
|
||||
"""
|
||||
if values is not None and color is not None:
|
||||
raise ValueError("Can only specify one of 'values' and 'color' kwargs")
|
||||
|
||||
geoms, multiindex = _sanitize_geoms(geoms)
|
||||
# values are expanded below as kwargs["c"]
|
||||
|
||||
x = [p.x if not p.is_empty else None for p in geoms]
|
||||
y = [p.y if not p.is_empty else None for p in geoms]
|
||||
|
||||
# matplotlib 1.4 does not support c=None, and < 2.0 does not support s=None
|
||||
if values is not None:
|
||||
kwargs["c"] = values
|
||||
if markersize is not None:
|
||||
kwargs["s"] = markersize
|
||||
|
||||
# Add to kwargs for easier checking below.
|
||||
if color is not None:
|
||||
kwargs["color"] = color
|
||||
if marker is not None:
|
||||
kwargs["marker"] = marker
|
||||
_expand_kwargs(kwargs, multiindex)
|
||||
|
||||
if "norm" not in kwargs:
|
||||
collection = ax.scatter(x, y, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs)
|
||||
else:
|
||||
collection = ax.scatter(x, y, cmap=cmap, **kwargs)
|
||||
|
||||
return collection
|
||||
|
||||
|
||||
plot_point_collection = deprecated(_plot_point_collection)
|
||||
|
||||
|
||||
def plot_series(
|
||||
s, cmap=None, color=None, ax=None, figsize=None, aspect="auto", **style_kwds
|
||||
):
|
||||
"""
|
||||
Plot a GeoSeries.
|
||||
|
||||
Generate a plot of a GeoSeries geometry with matplotlib.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s : Series
|
||||
The GeoSeries to be plotted. Currently Polygon,
|
||||
MultiPolygon, LineString, MultiLineString, Point and MultiPoint
|
||||
geometries can be plotted.
|
||||
cmap : str (default None)
|
||||
The name of a colormap recognized by matplotlib. Any
|
||||
colormap will work, but categorical colormaps are
|
||||
generally recommended. Examples of useful discrete
|
||||
colormaps include:
|
||||
|
||||
tab10, tab20, Accent, Dark2, Paired, Pastel1, Set1, Set2
|
||||
|
||||
color : str, np.array, pd.Series, List (default None)
|
||||
If specified, all objects will be colored uniformly.
|
||||
ax : matplotlib.pyplot.Artist (default None)
|
||||
axes on which to draw the plot
|
||||
figsize : pair of floats (default None)
|
||||
Size of the resulting matplotlib.figure.Figure. If the argument
|
||||
ax is given explicitly, figsize is ignored.
|
||||
aspect : 'auto', 'equal', None or float (default 'auto')
|
||||
Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
|
||||
however data are not projected (coordinates are long/lat), the aspect is by
|
||||
default set to 1/cos(s_y * pi/180) with s_y the y coordinate of the middle of
|
||||
the GeoSeries (the mean of the y range of bounding box) so that a long/lat
|
||||
square appears square in the middle of the plot. This implies an
|
||||
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
|
||||
also be set manually (float) as the ratio of y-unit to x-unit.
|
||||
**style_kwds : dict
|
||||
Color options to be passed on to the actual plot function, such
|
||||
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
|
||||
``alpha``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ax : matplotlib axes instance
|
||||
"""
|
||||
if "colormap" in style_kwds:
|
||||
warnings.warn(
|
||||
"'colormap' is deprecated, please use 'cmap' instead "
|
||||
"(for consistency with matplotlib)",
|
||||
FutureWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
cmap = style_kwds.pop("colormap")
|
||||
if "axes" in style_kwds:
|
||||
warnings.warn(
|
||||
"'axes' is deprecated, please use 'ax' instead "
|
||||
"(for consistency with pandas)",
|
||||
FutureWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
ax = style_kwds.pop("axes")
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The matplotlib package is required for plotting in geopandas. "
|
||||
"You can install it using 'conda install -c conda-forge matplotlib' or "
|
||||
"'pip install matplotlib'."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots(figsize=figsize)
|
||||
|
||||
if aspect == "auto":
|
||||
if s.crs and s.crs.is_geographic:
|
||||
bounds = s.total_bounds
|
||||
y_coord = np.mean([bounds[1], bounds[3]])
|
||||
ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
|
||||
# formula ported from R package sp
|
||||
# https://github.com/edzer/sp/blob/master/R/mapasp.R
|
||||
else:
|
||||
ax.set_aspect("equal")
|
||||
elif aspect is not None:
|
||||
ax.set_aspect(aspect)
|
||||
|
||||
if s.empty:
|
||||
warnings.warn(
|
||||
"The GeoSeries you are attempting to plot is "
|
||||
"empty. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
if s.is_empty.all():
|
||||
warnings.warn(
|
||||
"The GeoSeries you are attempting to plot is "
|
||||
"composed of empty geometries. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
# have colors been given for all geometries?
|
||||
color_given = pd.api.types.is_list_like(color) and len(color) == len(s)
|
||||
|
||||
# if cmap is specified, create range of colors based on cmap
|
||||
values = None
|
||||
if cmap is not None:
|
||||
values = np.arange(len(s))
|
||||
if hasattr(cmap, "N"):
|
||||
values = values % cmap.N
|
||||
style_kwds["vmin"] = style_kwds.get("vmin", values.min())
|
||||
style_kwds["vmax"] = style_kwds.get("vmax", values.max())
|
||||
|
||||
# decompose GeometryCollections
|
||||
geoms, multiindex = _sanitize_geoms(s.geometry, prefix="Geom")
|
||||
values = np.take(values, multiindex, axis=0) if cmap else None
|
||||
# ensure indexes are consistent
|
||||
if color_given and isinstance(color, pd.Series):
|
||||
color = color.reindex(s.index)
|
||||
expl_color = np.take(color, multiindex, axis=0) if color_given else color
|
||||
expl_series = geopandas.GeoSeries(geoms)
|
||||
|
||||
geom_types = expl_series.geom_type
|
||||
poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
|
||||
line_idx = np.asarray(
|
||||
(geom_types == "LineString")
|
||||
| (geom_types == "MultiLineString")
|
||||
| (geom_types == "LinearRing")
|
||||
)
|
||||
point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
|
||||
|
||||
# plot all Polygons and all MultiPolygon components in the same collection
|
||||
polys = expl_series[poly_idx]
|
||||
if not polys.empty:
|
||||
# color overrides both face and edgecolor. As we want people to be
|
||||
# able to use edgecolor as well, pass color to facecolor
|
||||
facecolor = style_kwds.pop("facecolor", None)
|
||||
color_ = expl_color[poly_idx] if color_given else color
|
||||
if color is not None:
|
||||
facecolor = color_
|
||||
|
||||
values_ = values[poly_idx] if cmap else None
|
||||
_plot_polygon_collection(
|
||||
ax, polys, values_, facecolor=facecolor, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
# plot all LineStrings and MultiLineString components in same collection
|
||||
lines = expl_series[line_idx]
|
||||
if not lines.empty:
|
||||
values_ = values[line_idx] if cmap else None
|
||||
color_ = expl_color[line_idx] if color_given else color
|
||||
|
||||
_plot_linestring_collection(
|
||||
ax, lines, values_, color=color_, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
# plot all Points in the same collection
|
||||
points = expl_series[point_idx]
|
||||
if not points.empty:
|
||||
values_ = values[point_idx] if cmap else None
|
||||
color_ = expl_color[point_idx] if color_given else color
|
||||
|
||||
_plot_point_collection(
|
||||
ax, points, values_, color=color_, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
plt.draw()
|
||||
return ax
|
||||
|
||||
|
||||
def plot_dataframe(
|
||||
df,
|
||||
column=None,
|
||||
cmap=None,
|
||||
color=None,
|
||||
ax=None,
|
||||
cax=None,
|
||||
categorical=False,
|
||||
legend=False,
|
||||
scheme=None,
|
||||
k=5,
|
||||
vmin=None,
|
||||
vmax=None,
|
||||
markersize=None,
|
||||
figsize=None,
|
||||
legend_kwds=None,
|
||||
categories=None,
|
||||
classification_kwds=None,
|
||||
missing_kwds=None,
|
||||
aspect="auto",
|
||||
**style_kwds,
|
||||
):
|
||||
"""
|
||||
Plot a GeoDataFrame.
|
||||
|
||||
Generate a plot of a GeoDataFrame with matplotlib. If a
|
||||
column is specified, the plot coloring will be based on values
|
||||
in that column.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
column : str, np.array, pd.Series (default None)
|
||||
The name of the dataframe column, np.array, or pd.Series to be plotted.
|
||||
If np.array or pd.Series are used then it must have same length as
|
||||
dataframe. Values are used to color the plot. Ignored if `color` is
|
||||
also set.
|
||||
kind: str
|
||||
The kind of plots to produce. The default is to create a map ("geo").
|
||||
Other supported kinds of plots from pandas:
|
||||
|
||||
- 'line' : line plot
|
||||
- 'bar' : vertical bar plot
|
||||
- 'barh' : horizontal bar plot
|
||||
- 'hist' : histogram
|
||||
- 'box' : BoxPlot
|
||||
- 'kde' : Kernel Density Estimation plot
|
||||
- 'density' : same as 'kde'
|
||||
- 'area' : area plot
|
||||
- 'pie' : pie plot
|
||||
- 'scatter' : scatter plot
|
||||
- 'hexbin' : hexbin plot.
|
||||
cmap : str (default None)
|
||||
The name of a colormap recognized by matplotlib.
|
||||
color : str, np.array, pd.Series (default None)
|
||||
If specified, all objects will be colored uniformly.
|
||||
ax : matplotlib.pyplot.Artist (default None)
|
||||
axes on which to draw the plot
|
||||
cax : matplotlib.pyplot Artist (default None)
|
||||
axes on which to draw the legend in case of color map.
|
||||
categorical : bool (default False)
|
||||
If False, cmap will reflect numerical values of the
|
||||
column being plotted. For non-numerical columns, this
|
||||
will be set to True.
|
||||
legend : bool (default False)
|
||||
Plot a legend. Ignored if no `column` is given, or if `color` is given.
|
||||
scheme : str (default None)
|
||||
Name of a choropleth classification scheme (requires mapclassify).
|
||||
A mapclassify.MapClassifier object will be used
|
||||
under the hood. Supported are all schemes provided by mapclassify (e.g.
|
||||
'BoxPlot', 'EqualInterval', 'FisherJenks', 'FisherJenksSampled',
|
||||
'HeadTailBreaks', 'JenksCaspall', 'JenksCaspallForced',
|
||||
'JenksCaspallSampled', 'MaxP', 'MaximumBreaks',
|
||||
'NaturalBreaks', 'Quantiles', 'Percentiles', 'StdMean',
|
||||
'UserDefined'). Arguments can be passed in classification_kwds.
|
||||
k : int (default 5)
|
||||
Number of classes (ignored if scheme is None)
|
||||
vmin : None or float (default None)
|
||||
Minimum value of cmap. If None, the minimum data value
|
||||
in the column to be plotted is used.
|
||||
vmax : None or float (default None)
|
||||
Maximum value of cmap. If None, the maximum data value
|
||||
in the column to be plotted is used.
|
||||
markersize : str or float or sequence (default None)
|
||||
Only applies to point geometries within a frame.
|
||||
If a str, will use the values in the column of the frame specified
|
||||
by markersize to set the size of markers. Otherwise can be a value
|
||||
to apply to all points, or a sequence of the same length as the
|
||||
number of points.
|
||||
figsize : tuple of integers (default None)
|
||||
Size of the resulting matplotlib.figure.Figure. If the argument
|
||||
axes is given explicitly, figsize is ignored.
|
||||
legend_kwds : dict (default None)
|
||||
Keyword arguments to pass to :func:`matplotlib.pyplot.legend` or
|
||||
:func:`matplotlib.pyplot.colorbar`.
|
||||
Additional accepted keywords when `scheme` is specified:
|
||||
|
||||
fmt : string
|
||||
A formatting specification for the bin edges of the classes in the
|
||||
legend. For example, to have no decimals: ``{"fmt": "{:.0f}"}``.
|
||||
labels : list-like
|
||||
A list of legend labels to override the auto-generated labels.
|
||||
Needs to have the same number of elements as the number of
|
||||
classes (`k`).
|
||||
interval : boolean (default False)
|
||||
An option to control brackets from mapclassify legend.
|
||||
If True, open/closed interval brackets are shown in the legend.
|
||||
categories : list-like
|
||||
Ordered list-like object of categories to be used for categorical plot.
|
||||
classification_kwds : dict (default None)
|
||||
Keyword arguments to pass to mapclassify
|
||||
missing_kwds : dict (default None)
|
||||
Keyword arguments specifying color options (as style_kwds)
|
||||
to be passed on to geometries with missing values in addition to
|
||||
or overwriting other style kwds. If None, geometries with missing
|
||||
values are not plotted.
|
||||
aspect : 'auto', 'equal', None or float (default 'auto')
|
||||
Set aspect of axis. If 'auto', the default aspect for map plots is 'equal'; if
|
||||
however data are not projected (coordinates are long/lat), the aspect is by
|
||||
default set to 1/cos(df_y * pi/180) with df_y the y coordinate of the middle of
|
||||
the GeoDataFrame (the mean of the y range of bounding box) so that a long/lat
|
||||
square appears square in the middle of the plot. This implies an
|
||||
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
|
||||
also be set manually (float) as the ratio of y-unit to x-unit.
|
||||
|
||||
**style_kwds : dict
|
||||
Style options to be passed on to the actual plot function, such
|
||||
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
|
||||
``alpha``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ax : matplotlib axes instance
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import geodatasets
|
||||
>>> df = geopandas.read_file(geodatasets.get_path("nybb"))
|
||||
>>> df.head() # doctest: +SKIP
|
||||
BoroCode ... geometry
|
||||
0 5 ... MULTIPOLYGON (((970217.022 145643.332, 970227....
|
||||
1 4 ... MULTIPOLYGON (((1029606.077 156073.814, 102957...
|
||||
2 3 ... MULTIPOLYGON (((1021176.479 151374.797, 102100...
|
||||
3 1 ... MULTIPOLYGON (((981219.056 188655.316, 980940....
|
||||
4 2 ... MULTIPOLYGON (((1012821.806 229228.265, 101278...
|
||||
|
||||
>>> df.plot("BoroName", cmap="Set1") # doctest: +SKIP
|
||||
|
||||
See the User Guide page :doc:`../../user_guide/mapping` for details.
|
||||
|
||||
"""
|
||||
if "colormap" in style_kwds:
|
||||
warnings.warn(
|
||||
"'colormap' is deprecated, please use 'cmap' instead "
|
||||
"(for consistency with matplotlib)",
|
||||
FutureWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
cmap = style_kwds.pop("colormap")
|
||||
if "axes" in style_kwds:
|
||||
warnings.warn(
|
||||
"'axes' is deprecated, please use 'ax' instead "
|
||||
"(for consistency with pandas)",
|
||||
FutureWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
ax = style_kwds.pop("axes")
|
||||
if column is not None and color is not None:
|
||||
warnings.warn(
|
||||
"Only specify one of 'column' or 'color'. Using 'color'.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
column = None
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The matplotlib package is required for plotting in geopandas. "
|
||||
"You can install it using 'conda install -c conda-forge matplotlib' or "
|
||||
"'pip install matplotlib'."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
if cax is not None:
|
||||
raise ValueError("'ax' can not be None if 'cax' is not.")
|
||||
fig, ax = plt.subplots(figsize=figsize)
|
||||
|
||||
if aspect == "auto":
|
||||
if df.crs and df.crs.is_geographic:
|
||||
bounds = df.total_bounds
|
||||
y_coord = np.mean([bounds[1], bounds[3]])
|
||||
ax.set_aspect(1 / np.cos(y_coord * np.pi / 180))
|
||||
# formula ported from R package sp
|
||||
# https://github.com/edzer/sp/blob/master/R/mapasp.R
|
||||
else:
|
||||
ax.set_aspect("equal")
|
||||
elif aspect is not None:
|
||||
ax.set_aspect(aspect)
|
||||
|
||||
# GH 1555
|
||||
# if legend_kwds set, copy so we don't update it in place
|
||||
if legend_kwds is not None:
|
||||
legend_kwds = legend_kwds.copy()
|
||||
|
||||
if df.empty:
|
||||
warnings.warn(
|
||||
"The GeoDataFrame you are attempting to plot is "
|
||||
"empty. Nothing has been displayed.",
|
||||
UserWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
return ax
|
||||
|
||||
if isinstance(markersize, str):
|
||||
markersize = df[markersize].values
|
||||
|
||||
if column is None:
|
||||
return plot_series(
|
||||
df.geometry,
|
||||
cmap=cmap,
|
||||
color=color,
|
||||
ax=ax,
|
||||
figsize=figsize,
|
||||
markersize=markersize,
|
||||
aspect=aspect,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
# To accept pd.Series and np.arrays as column
|
||||
if isinstance(column, (np.ndarray, pd.Series)):
|
||||
if column.shape[0] != df.shape[0]:
|
||||
raise ValueError(
|
||||
"The dataframe and given column have different number of rows."
|
||||
)
|
||||
else:
|
||||
values = column
|
||||
|
||||
# Make sure index of a Series matches index of df
|
||||
if isinstance(values, pd.Series):
|
||||
values = values.reindex(df.index)
|
||||
else:
|
||||
values = df[column]
|
||||
|
||||
if isinstance(values.dtype, CategoricalDtype):
|
||||
if categories is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify 'categories' when column has categorical dtype"
|
||||
)
|
||||
categorical = True
|
||||
elif (
|
||||
pd.api.types.is_object_dtype(values.dtype)
|
||||
or pd.api.types.is_bool_dtype(values.dtype)
|
||||
or pd.api.types.is_string_dtype(values.dtype)
|
||||
or categories
|
||||
):
|
||||
categorical = True
|
||||
|
||||
nan_idx = np.asarray(pd.isna(values), dtype="bool")
|
||||
|
||||
if scheme is not None:
|
||||
mc_err = (
|
||||
"The 'mapclassify' package (>= 2.4.0) is "
|
||||
"required to use the 'scheme' keyword."
|
||||
)
|
||||
try:
|
||||
import mapclassify
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(mc_err)
|
||||
|
||||
if Version(mapclassify.__version__) < Version("2.4.0"):
|
||||
raise ImportError(mc_err)
|
||||
|
||||
if classification_kwds is None:
|
||||
classification_kwds = {}
|
||||
if "k" not in classification_kwds:
|
||||
classification_kwds["k"] = k
|
||||
|
||||
binning = mapclassify.classify(
|
||||
np.asarray(values[~nan_idx]), scheme, **classification_kwds
|
||||
)
|
||||
# set categorical to True for creating the legend
|
||||
categorical = True
|
||||
if legend_kwds is not None and "labels" in legend_kwds:
|
||||
if len(legend_kwds["labels"]) != binning.k:
|
||||
raise ValueError(
|
||||
"Number of labels must match number of bins, "
|
||||
"received {} labels for {} bins".format(
|
||||
len(legend_kwds["labels"]), binning.k
|
||||
)
|
||||
)
|
||||
else:
|
||||
labels = list(legend_kwds.pop("labels"))
|
||||
else:
|
||||
fmt = "{:.2f}"
|
||||
if legend_kwds is not None and "fmt" in legend_kwds:
|
||||
fmt = legend_kwds.pop("fmt")
|
||||
|
||||
labels = binning.get_legend_classes(fmt)
|
||||
if legend_kwds is not None:
|
||||
show_interval = legend_kwds.pop("interval", False)
|
||||
else:
|
||||
show_interval = False
|
||||
if not show_interval:
|
||||
labels = [c[1:-1] for c in labels]
|
||||
|
||||
values = pd.Categorical(
|
||||
[np.nan] * len(values), categories=binning.bins, ordered=True
|
||||
)
|
||||
values[~nan_idx] = pd.Categorical.from_codes(
|
||||
binning.yb, categories=binning.bins, ordered=True
|
||||
)
|
||||
if cmap is None:
|
||||
cmap = "viridis"
|
||||
|
||||
# Define `values` as a Series
|
||||
if categorical:
|
||||
if cmap is None:
|
||||
cmap = "tab10"
|
||||
|
||||
cat = pd.Categorical(values, categories=categories)
|
||||
categories = list(cat.categories)
|
||||
|
||||
# values missing in the Categorical but not in original values
|
||||
missing = list(np.unique(values[~nan_idx & cat.isna()]))
|
||||
if missing:
|
||||
raise ValueError(
|
||||
"Column contains values not listed in categories. "
|
||||
"Missing categories: {}.".format(missing)
|
||||
)
|
||||
|
||||
values = cat.codes[~nan_idx]
|
||||
vmin = 0 if vmin is None else vmin
|
||||
vmax = len(categories) - 1 if vmax is None else vmax
|
||||
|
||||
# fill values with placeholder where were NaNs originally to map them properly
|
||||
# (after removing them in categorical or scheme)
|
||||
if categorical:
|
||||
for n in np.where(nan_idx)[0]:
|
||||
values = np.insert(values, n, values[0])
|
||||
|
||||
mn = values[~np.isnan(values)].min() if vmin is None else vmin
|
||||
mx = values[~np.isnan(values)].max() if vmax is None else vmax
|
||||
|
||||
# decompose GeometryCollections
|
||||
geoms, multiindex = _sanitize_geoms(df.geometry, prefix="Geom")
|
||||
values = np.take(values, multiindex, axis=0)
|
||||
nan_idx = np.take(nan_idx, multiindex, axis=0)
|
||||
expl_series = geopandas.GeoSeries(geoms)
|
||||
|
||||
geom_types = expl_series.geom_type
|
||||
poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
|
||||
line_idx = np.asarray(
|
||||
(geom_types == "LineString")
|
||||
| (geom_types == "MultiLineString")
|
||||
| (geom_types == "LinearRing")
|
||||
)
|
||||
point_idx = np.asarray((geom_types == "Point") | (geom_types == "MultiPoint"))
|
||||
|
||||
# plot all Polygons and all MultiPolygon components in the same collection
|
||||
polys = expl_series[poly_idx & np.invert(nan_idx)]
|
||||
subset = values[poly_idx & np.invert(nan_idx)]
|
||||
if not polys.empty:
|
||||
_plot_polygon_collection(
|
||||
ax, polys, subset, vmin=mn, vmax=mx, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
# plot all LineStrings and MultiLineString components in same collection
|
||||
lines = expl_series[line_idx & np.invert(nan_idx)]
|
||||
subset = values[line_idx & np.invert(nan_idx)]
|
||||
if not lines.empty:
|
||||
_plot_linestring_collection(
|
||||
ax, lines, subset, vmin=mn, vmax=mx, cmap=cmap, **style_kwds
|
||||
)
|
||||
|
||||
# plot all Points in the same collection
|
||||
points = expl_series[point_idx & np.invert(nan_idx)]
|
||||
subset = values[point_idx & np.invert(nan_idx)]
|
||||
if not points.empty:
|
||||
if isinstance(markersize, np.ndarray):
|
||||
markersize = np.take(markersize, multiindex, axis=0)
|
||||
markersize = markersize[point_idx & np.invert(nan_idx)]
|
||||
_plot_point_collection(
|
||||
ax,
|
||||
points,
|
||||
subset,
|
||||
vmin=mn,
|
||||
vmax=mx,
|
||||
markersize=markersize,
|
||||
cmap=cmap,
|
||||
**style_kwds,
|
||||
)
|
||||
|
||||
missing_data = not expl_series[nan_idx].empty
|
||||
if missing_kwds is not None and missing_data:
|
||||
if color:
|
||||
if "color" not in missing_kwds:
|
||||
missing_kwds["color"] = color
|
||||
|
||||
merged_kwds = style_kwds.copy()
|
||||
merged_kwds.update(missing_kwds)
|
||||
|
||||
plot_series(expl_series[nan_idx], ax=ax, **merged_kwds)
|
||||
|
||||
if legend and not color:
|
||||
if legend_kwds is None:
|
||||
legend_kwds = {}
|
||||
if "fmt" in legend_kwds:
|
||||
legend_kwds.pop("fmt")
|
||||
|
||||
from matplotlib.lines import Line2D
|
||||
from matplotlib.colors import Normalize
|
||||
from matplotlib import cm
|
||||
|
||||
norm = style_kwds.get("norm", None)
|
||||
if not norm:
|
||||
norm = Normalize(vmin=mn, vmax=mx)
|
||||
n_cmap = cm.ScalarMappable(norm=norm, cmap=cmap)
|
||||
if categorical:
|
||||
if scheme is not None:
|
||||
categories = labels
|
||||
patches = []
|
||||
for value, cat in enumerate(categories):
|
||||
patches.append(
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
linestyle="none",
|
||||
marker="o",
|
||||
alpha=style_kwds.get("alpha", 1),
|
||||
markersize=10,
|
||||
markerfacecolor=n_cmap.to_rgba(value),
|
||||
markeredgewidth=0,
|
||||
)
|
||||
)
|
||||
if missing_kwds is not None and missing_data:
|
||||
if "color" in merged_kwds:
|
||||
merged_kwds["facecolor"] = merged_kwds["color"]
|
||||
patches.append(
|
||||
Line2D(
|
||||
[0],
|
||||
[0],
|
||||
linestyle="none",
|
||||
marker="o",
|
||||
alpha=merged_kwds.get("alpha", 1),
|
||||
markersize=10,
|
||||
markerfacecolor=merged_kwds.get("facecolor", None),
|
||||
markeredgecolor=merged_kwds.get("edgecolor", None),
|
||||
markeredgewidth=merged_kwds.get(
|
||||
"linewidth", 1 if merged_kwds.get("edgecolor", False) else 0
|
||||
),
|
||||
)
|
||||
)
|
||||
categories.append(merged_kwds.get("label", "NaN"))
|
||||
legend_kwds.setdefault("numpoints", 1)
|
||||
legend_kwds.setdefault("loc", "best")
|
||||
legend_kwds.setdefault("handles", patches)
|
||||
legend_kwds.setdefault("labels", categories)
|
||||
ax.legend(**legend_kwds)
|
||||
else:
|
||||
if cax is not None:
|
||||
legend_kwds.setdefault("cax", cax)
|
||||
else:
|
||||
legend_kwds.setdefault("ax", ax)
|
||||
|
||||
n_cmap.set_array(np.array([]))
|
||||
ax.get_figure().colorbar(n_cmap, **legend_kwds)
|
||||
|
||||
plt.draw()
|
||||
return ax
|
||||
|
||||
|
||||
@doc(plot_dataframe)
|
||||
class GeoplotAccessor(PlotAccessor):
|
||||
_pandas_kinds = PlotAccessor._all_kinds
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
data = self._parent.copy()
|
||||
kind = kwargs.pop("kind", "geo")
|
||||
if kind == "geo":
|
||||
return plot_dataframe(data, *args, **kwargs)
|
||||
if kind in self._pandas_kinds:
|
||||
# Access pandas plots
|
||||
return PlotAccessor(data)(kind=kind, **kwargs)
|
||||
else:
|
||||
# raise error
|
||||
raise ValueError(f"{kind} is not a valid plot kind")
|
||||
|
||||
def geo(self, *args, **kwargs):
|
||||
return self(kind="geo", *args, **kwargs) # noqa: B026
|
||||
948
.venv/lib/python3.12/site-packages/geopandas/sindex.py
Normal file
948
.venv/lib/python3.12/site-packages/geopandas/sindex.py
Normal file
@@ -0,0 +1,948 @@
|
||||
import warnings
|
||||
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from . import _compat as compat
|
||||
from ._decorator import doc
|
||||
|
||||
|
||||
def _get_sindex_class():
|
||||
"""Dynamically chooses a spatial indexing backend.
|
||||
|
||||
Required to comply with _compat.USE_PYGEOS.
|
||||
The selection order goes PyGEOS > RTree > Error.
|
||||
"""
|
||||
if compat.USE_SHAPELY_20 or compat.USE_PYGEOS:
|
||||
return PyGEOSSTRTreeIndex
|
||||
if compat.HAS_RTREE:
|
||||
return RTreeIndex
|
||||
raise ImportError(
|
||||
"Spatial indexes require either `rtree` or `pygeos`. "
|
||||
"See installation instructions at https://geopandas.org/install.html"
|
||||
)
|
||||
|
||||
|
||||
class BaseSpatialIndex:
|
||||
@property
|
||||
def valid_query_predicates(self):
|
||||
"""Returns valid predicates for this spatial index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
set
|
||||
Set of valid predicates for this spatial index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries([Point(0, 0), Point(1, 1)])
|
||||
>>> s.sindex.valid_query_predicates # doctest: +SKIP
|
||||
{'contains', 'crosses', 'intersects', 'within', 'touches', \
|
||||
'overlaps', None, 'covers', 'contains_properly'}
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def query(self, geometry, predicate=None, sort=False):
|
||||
"""
|
||||
Return the integer indices of all combinations of each input geometry
|
||||
and tree geometries where the bounding box of each input geometry
|
||||
intersects the bounding box of a tree geometry.
|
||||
|
||||
If the input geometry is a scalar, this returns an array of shape (n, ) with
|
||||
the indices of the matching tree geometries. If the input geometry is an
|
||||
array_like, this returns an array with shape (2,n) where the subarrays
|
||||
correspond to the indices of the input geometries and indices of the
|
||||
tree geometries associated with each. To generate an array of pairs of
|
||||
input geometry index and tree geometry index, simply transpose the
|
||||
result.
|
||||
|
||||
If a predicate is provided, the tree geometries are first queried based
|
||||
on the bounding box of the input geometry and then are further filtered
|
||||
to those that meet the predicate when comparing the input geometry to
|
||||
the tree geometry: ``predicate(geometry, tree_geometry)``.
|
||||
|
||||
Bounding boxes are limited to two dimensions and are axis-aligned
|
||||
(equivalent to the ``bounds`` property of a geometry); any Z values
|
||||
present in input geometries are ignored when querying the tree.
|
||||
|
||||
Any input geometry that is None or empty will never match geometries in
|
||||
the tree.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : shapely.Geometry or array-like of geometries \
|
||||
(numpy.ndarray, GeoSeries, GeometryArray)
|
||||
A single shapely geometry or array of geometries to query against
|
||||
the spatial index. For array-like, accepts both GeoPandas geometry
|
||||
iterables (GeoSeries, GeometryArray) or a numpy array of Shapely
|
||||
or PyGEOS geometries.
|
||||
predicate : {None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "intersects", "overlaps", "touches", "within"}, optional
|
||||
If predicate is provided, the input geometries are tested
|
||||
using the predicate function against each item in the tree
|
||||
whose extent intersects the envelope of the input geometry:
|
||||
``predicate(input_geometry, tree_geometry)``.
|
||||
If possible, prepared geometries are used to help speed up the
|
||||
predicate operation.
|
||||
sort : bool, default False
|
||||
If True, the results will be sorted in ascending order. In case
|
||||
of 2D array, the result is sorted lexicographically using the
|
||||
geometries' indexes as the primary key and the sindex's indexes
|
||||
as the secondary key.
|
||||
If False, no additional sorting is applied (results are often
|
||||
sorted but there is no guarantee).
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray with shape (n,) if geometry is a scalar
|
||||
Integer indices for matching geometries from the spatial index
|
||||
tree geometries.
|
||||
|
||||
OR
|
||||
|
||||
ndarray with shape (2, n) if geometry is an array_like
|
||||
The first subarray contains input geometry integer indices.
|
||||
The second subarray contains tree geometry integer indices.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
3 POINT (3.00000 3.00000)
|
||||
4 POINT (4.00000 4.00000)
|
||||
5 POINT (5.00000 5.00000)
|
||||
6 POINT (6.00000 6.00000)
|
||||
7 POINT (7.00000 7.00000)
|
||||
8 POINT (8.00000 8.00000)
|
||||
9 POINT (9.00000 9.00000)
|
||||
dtype: geometry
|
||||
|
||||
Querying the tree with a scalar geometry:
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3))
|
||||
array([1, 2, 3])
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3), predicate="contains")
|
||||
array([2])
|
||||
|
||||
Querying the tree with an array of geometries:
|
||||
|
||||
>>> s2 = geopandas.GeoSeries([box(2, 2, 4, 4), box(5, 5, 6, 6)])
|
||||
>>> s2
|
||||
0 POLYGON ((4.00000 2.00000, 4.00000 4.00000, 2....
|
||||
1 POLYGON ((6.00000 5.00000, 6.00000 6.00000, 5....
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.query(s2)
|
||||
array([[0, 0, 0, 1, 1],
|
||||
[2, 3, 4, 5, 6]])
|
||||
|
||||
>>> s.sindex.query(s2, predicate="contains")
|
||||
array([[0],
|
||||
[3]])
|
||||
|
||||
Notes
|
||||
-----
|
||||
In the context of a spatial join, input geometries are the "left"
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are "right" geometries that are joined against the left geometries. This
|
||||
effectively performs an inner join, where only those combinations of
|
||||
geometries that can be joined based on overlapping bounding boxes or
|
||||
optional predicate are returned.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def query_bulk(self, geometry, predicate=None, sort=False):
|
||||
"""
|
||||
DEPRECATED: use `query` instead.
|
||||
|
||||
Returns all combinations of each input geometry and geometries in
|
||||
the tree where the envelope of each input geometry intersects with
|
||||
the envelope of a tree geometry.
|
||||
|
||||
In the context of a spatial join, input geometries are the “left”
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are “right” geometries that are joined against the left geometries.
|
||||
This effectively performs an inner join, where only those combinations
|
||||
of geometries that can be joined based on envelope overlap or optional
|
||||
predicate are returned.
|
||||
|
||||
When using the ``rtree`` package, this is not a vectorized function
|
||||
and may be slow. If speed is important, please use PyGEOS.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : {GeoSeries, GeometryArray, numpy.array of PyGEOS geometries}
|
||||
Accepts GeoPandas geometry iterables (GeoSeries, GeometryArray)
|
||||
or a numpy array of PyGEOS geometries.
|
||||
predicate : {None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "intersects", "overlaps", "touches", "within"}, optional
|
||||
If predicate is provided, the input geometries are tested using
|
||||
the predicate function against each item in the tree whose extent
|
||||
intersects the envelope of the each input geometry:
|
||||
predicate(input_geometry, tree_geometry). If possible, prepared
|
||||
geometries are used to help speed up the predicate operation.
|
||||
sort : bool, default False
|
||||
If True, results sorted lexicographically using
|
||||
geometry's indexes as the primary key and the sindex's indexes as the
|
||||
secondary key. If False, no additional sorting is applied.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray with shape (2, n)
|
||||
The first subarray contains input geometry integer indexes.
|
||||
The second subarray contains tree geometry integer indexes.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
3 POINT (3.00000 3.00000)
|
||||
4 POINT (4.00000 4.00000)
|
||||
5 POINT (5.00000 5.00000)
|
||||
6 POINT (6.00000 6.00000)
|
||||
7 POINT (7.00000 7.00000)
|
||||
8 POINT (8.00000 8.00000)
|
||||
9 POINT (9.00000 9.00000)
|
||||
dtype: geometry
|
||||
>>> s2 = geopandas.GeoSeries([box(2, 2, 4, 4), box(5, 5, 6, 6)])
|
||||
>>> s2
|
||||
0 POLYGON ((4.00000 2.00000, 4.00000 4.00000, 2....
|
||||
1 POLYGON ((6.00000 5.00000, 6.00000 6.00000, 5....
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.query_bulk(s2)
|
||||
array([[0, 0, 0, 1, 1],
|
||||
[2, 3, 4, 5, 6]])
|
||||
|
||||
>>> s.sindex.query_bulk(s2, predicate="contains")
|
||||
array([[0],
|
||||
[3]])
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def nearest(
|
||||
self,
|
||||
geometry,
|
||||
return_all=True,
|
||||
max_distance=None,
|
||||
return_distance=False,
|
||||
exclusive=False,
|
||||
):
|
||||
"""
|
||||
Return the nearest geometry in the tree for each input geometry in
|
||||
``geometry``.
|
||||
|
||||
.. note::
|
||||
``nearest`` currently only works with PyGEOS >= 0.10.
|
||||
|
||||
Note that if PyGEOS is not available, geopandas will use rtree
|
||||
for the spatial index, where nearest has a different
|
||||
function signature to temporarily preserve existing
|
||||
functionality. See the documentation of
|
||||
:meth:`rtree.index.Index.nearest` for the details on the
|
||||
``rtree``-based implementation.
|
||||
|
||||
If multiple tree geometries have the same distance from an input geometry,
|
||||
multiple results will be returned for that input geometry by default.
|
||||
Specify ``return_all=False`` to only get a single nearest geometry
|
||||
(non-deterministic which nearest is returned).
|
||||
|
||||
In the context of a spatial join, input geometries are the "left"
|
||||
geometries that determine the order of the results, and tree geometries
|
||||
are "right" geometries that are joined against the left geometries.
|
||||
If ``max_distance`` is not set, this will effectively be a left join
|
||||
because every geometry in ``geometry`` will have a nearest geometry in
|
||||
the tree. However, if ``max_distance`` is used, this becomes an
|
||||
inner join, since some geometries in ``geometry`` may not have a match
|
||||
in the tree.
|
||||
|
||||
For performance reasons, it is highly recommended that you set
|
||||
the ``max_distance`` parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : {shapely.geometry, GeoSeries, GeometryArray, numpy.array of PyGEOS \
|
||||
geometries}
|
||||
A single shapely geometry, one of the GeoPandas geometry iterables
|
||||
(GeoSeries, GeometryArray), or a numpy array of PyGEOS geometries to query
|
||||
against the spatial index.
|
||||
return_all : bool, default True
|
||||
If there are multiple equidistant or intersecting nearest
|
||||
geometries, return all those geometries instead of a single
|
||||
nearest geometry.
|
||||
max_distance : float, optional
|
||||
Maximum distance within which to query for nearest items in tree.
|
||||
Must be greater than 0. By default None, indicating no distance limit.
|
||||
return_distance : bool, optional
|
||||
If True, will return distances in addition to indexes. By default False
|
||||
exclusive : bool, optional
|
||||
if True, the nearest geometries that are equal to the input geometry
|
||||
will not be returned. By default False. Requires Shapely >= 2.0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Indices or tuple of (indices, distances)
|
||||
Indices is an ndarray of shape (2,n) and distances (if present) an
|
||||
ndarray of shape (n).
|
||||
The first subarray of indices contains input geometry indices.
|
||||
The second subarray of indices contains tree geometry indices.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s.head()
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
3 POINT (3.00000 3.00000)
|
||||
4 POINT (4.00000 4.00000)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.nearest(Point(1, 1))
|
||||
array([[0],
|
||||
[1]])
|
||||
|
||||
>>> s.sindex.nearest([box(4.9, 4.9, 5.1, 5.1)])
|
||||
array([[0],
|
||||
[5]])
|
||||
|
||||
>>> s2 = geopandas.GeoSeries(geopandas.points_from_xy([7.6, 10], [7.6, 10]))
|
||||
>>> s2
|
||||
0 POINT (7.60000 7.60000)
|
||||
1 POINT (10.00000 10.00000)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.nearest(s2)
|
||||
array([[0, 1],
|
||||
[8, 9]])
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def intersection(self, coordinates):
|
||||
"""Compatibility wrapper for rtree.index.Index.intersection,
|
||||
use ``query`` instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
coordinates : sequence or array
|
||||
Sequence of the form (min_x, min_y, max_x, max_y)
|
||||
to query a rectangle or (x, y) to query a point.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point, box
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
3 POINT (3.00000 3.00000)
|
||||
4 POINT (4.00000 4.00000)
|
||||
5 POINT (5.00000 5.00000)
|
||||
6 POINT (6.00000 6.00000)
|
||||
7 POINT (7.00000 7.00000)
|
||||
8 POINT (8.00000 8.00000)
|
||||
9 POINT (9.00000 9.00000)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.intersection(box(1, 1, 3, 3).bounds)
|
||||
array([1, 2, 3])
|
||||
|
||||
Alternatively, you can use ``query``:
|
||||
|
||||
>>> s.sindex.query(box(1, 1, 3, 3))
|
||||
array([1, 2, 3])
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
"""Size of the spatial index
|
||||
|
||||
Number of leaves (input geometries) in the index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
3 POINT (3.00000 3.00000)
|
||||
4 POINT (4.00000 4.00000)
|
||||
5 POINT (5.00000 5.00000)
|
||||
6 POINT (6.00000 6.00000)
|
||||
7 POINT (7.00000 7.00000)
|
||||
8 POINT (8.00000 8.00000)
|
||||
9 POINT (9.00000 9.00000)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.size
|
||||
10
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
"""Check if the spatial index is empty
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(10), range(10)))
|
||||
>>> s
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
3 POINT (3.00000 3.00000)
|
||||
4 POINT (4.00000 4.00000)
|
||||
5 POINT (5.00000 5.00000)
|
||||
6 POINT (6.00000 6.00000)
|
||||
7 POINT (7.00000 7.00000)
|
||||
8 POINT (8.00000 8.00000)
|
||||
9 POINT (9.00000 9.00000)
|
||||
dtype: geometry
|
||||
|
||||
>>> s.sindex.is_empty
|
||||
False
|
||||
|
||||
>>> s2 = geopandas.GeoSeries()
|
||||
>>> s2.sindex.is_empty
|
||||
True
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
if compat.HAS_RTREE:
|
||||
import rtree.index
|
||||
from rtree.core import RTreeError
|
||||
from shapely.prepared import prep
|
||||
|
||||
class SpatialIndex(rtree.index.Index, BaseSpatialIndex):
|
||||
"""Original rtree wrapper, kept for backwards compatibility."""
|
||||
|
||||
def __init__(self, *args):
|
||||
warnings.warn(
|
||||
"Directly using SpatialIndex is deprecated, and the class will be "
|
||||
"removed in a future version. Access the spatial index through the "
|
||||
"`GeoSeries.sindex` attribute, or use `rtree.index.Index` directly.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
super().__init__(*args)
|
||||
|
||||
@doc(BaseSpatialIndex.intersection)
|
||||
def intersection(self, coordinates, *args, **kwargs):
|
||||
return super().intersection(coordinates, *args, **kwargs)
|
||||
|
||||
@doc(BaseSpatialIndex.nearest)
|
||||
def nearest(self, *args, **kwargs):
|
||||
return super().nearest(*args, **kwargs)
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.size)
|
||||
def size(self):
|
||||
return len(self.leaves()[0][1])
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.is_empty)
|
||||
def is_empty(self):
|
||||
if len(self.leaves()) > 1:
|
||||
return False
|
||||
return self.size < 1
|
||||
|
||||
class RTreeIndex(rtree.index.Index):
|
||||
"""A simple wrapper around rtree's RTree Index
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : np.array of Shapely geometries
|
||||
Geometries from which to build the spatial index.
|
||||
"""
|
||||
|
||||
def __init__(self, geometry):
|
||||
stream = (
|
||||
(i, item.bounds, None)
|
||||
for i, item in enumerate(geometry)
|
||||
if pd.notnull(item) and not item.is_empty
|
||||
)
|
||||
try:
|
||||
super().__init__(stream)
|
||||
except RTreeError:
|
||||
# What we really want here is an empty generator error, or
|
||||
# for the bulk loader to log that the generator was empty
|
||||
# and move on.
|
||||
# See https://github.com/Toblerity/rtree/issues/20.
|
||||
super().__init__()
|
||||
|
||||
# store reference to geometries for predicate queries
|
||||
self.geometries = geometry
|
||||
# create a prepared geometry cache
|
||||
self._prepared_geometries = np.array(
|
||||
[None] * self.geometries.size, dtype=object
|
||||
)
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.valid_query_predicates)
|
||||
def valid_query_predicates(self):
|
||||
return {
|
||||
None,
|
||||
"intersects",
|
||||
"within",
|
||||
"contains",
|
||||
"overlaps",
|
||||
"crosses",
|
||||
"touches",
|
||||
"covered_by",
|
||||
"covers",
|
||||
"contains_properly",
|
||||
}
|
||||
|
||||
@doc(BaseSpatialIndex.query)
|
||||
def query(self, geometry, predicate=None, sort=False):
|
||||
# handle invalid predicates
|
||||
if predicate not in self.valid_query_predicates:
|
||||
raise ValueError(
|
||||
"Got `predicate` = `{}`, `predicate` must be one of {}".format(
|
||||
predicate, self.valid_query_predicates
|
||||
)
|
||||
)
|
||||
|
||||
if hasattr(geometry, "__array__") and not isinstance(
|
||||
geometry, BaseGeometry
|
||||
):
|
||||
# Iterates over geometry, applying func.
|
||||
tree_index = []
|
||||
input_geometry_index = []
|
||||
|
||||
for i, geo in enumerate(geometry):
|
||||
res = self.query(geo, predicate=predicate, sort=sort)
|
||||
tree_index.extend(res)
|
||||
input_geometry_index.extend([i] * len(res))
|
||||
return np.vstack([input_geometry_index, tree_index])
|
||||
|
||||
# handle empty / invalid geometries
|
||||
if geometry is None:
|
||||
# return an empty integer array, similar to pygeos.STRtree.query.
|
||||
return np.array([], dtype=np.intp)
|
||||
|
||||
if not isinstance(geometry, BaseGeometry):
|
||||
raise TypeError(
|
||||
"Got `geometry` of type `{}`, `geometry` must be ".format(
|
||||
type(geometry)
|
||||
)
|
||||
+ "a shapely geometry."
|
||||
)
|
||||
|
||||
if geometry.is_empty:
|
||||
return np.array([], dtype=np.intp)
|
||||
|
||||
# query tree
|
||||
bounds = geometry.bounds # rtree operates on bounds
|
||||
tree_idx = list(self.intersection(bounds))
|
||||
|
||||
if not tree_idx:
|
||||
return np.array([], dtype=np.intp)
|
||||
|
||||
# Check predicate
|
||||
# This is checked as input_geometry.predicate(tree_geometry)
|
||||
# When possible, we use prepared geometries.
|
||||
# Prepared geometries only support "intersects" and "contains"
|
||||
# For the special case of "within", we are able to flip the
|
||||
# comparison and check if tree_geometry.contains(input_geometry)
|
||||
# to still take advantage of prepared geometries.
|
||||
if predicate == "within":
|
||||
# To use prepared geometries for within,
|
||||
# we compare tree_geom.contains(input_geom)
|
||||
# Since we are preparing the tree geometries,
|
||||
# we cache them for multiple comparisons.
|
||||
res = []
|
||||
for index_in_tree in tree_idx:
|
||||
if self._prepared_geometries[index_in_tree] is None:
|
||||
# if not already prepared, prepare and cache
|
||||
self._prepared_geometries[index_in_tree] = prep(
|
||||
self.geometries[index_in_tree]
|
||||
)
|
||||
if self._prepared_geometries[index_in_tree].contains(geometry):
|
||||
res.append(index_in_tree)
|
||||
tree_idx = res
|
||||
elif predicate is not None:
|
||||
# For the remaining predicates,
|
||||
# we compare input_geom.predicate(tree_geom)
|
||||
if predicate in (
|
||||
"contains",
|
||||
"intersects",
|
||||
"covered_by",
|
||||
"covers",
|
||||
"contains_properly",
|
||||
):
|
||||
# prepare this input geometry
|
||||
geometry = prep(geometry)
|
||||
tree_idx = [
|
||||
index_in_tree
|
||||
for index_in_tree in tree_idx
|
||||
if getattr(geometry, predicate)(self.geometries[index_in_tree])
|
||||
]
|
||||
|
||||
# sort if requested
|
||||
if sort:
|
||||
# sorted
|
||||
return np.sort(np.array(tree_idx, dtype=np.intp))
|
||||
|
||||
# unsorted
|
||||
return np.array(tree_idx, dtype=np.intp)
|
||||
|
||||
@doc(BaseSpatialIndex.query_bulk)
|
||||
def query_bulk(self, geometry, predicate=None, sort=False):
|
||||
warnings.warn(
|
||||
"The `query_bulk()` method is deprecated and will be removed in "
|
||||
"GeoPandas 1.0. You can use the `query()` method instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self.query(geometry, predicate=predicate, sort=sort)
|
||||
|
||||
def nearest(self, coordinates, num_results=1, objects=False):
|
||||
"""
|
||||
Returns the nearest object or objects to the given coordinates.
|
||||
|
||||
Requires rtree, and passes parameters directly to
|
||||
:meth:`rtree.index.Index.nearest`.
|
||||
|
||||
This behaviour is deprecated and will be updated to be consistent
|
||||
with the pygeos PyGEOSSTRTreeIndex in a future release.
|
||||
|
||||
If longer-term compatibility is required, use
|
||||
:meth:`rtree.index.Index.nearest` directly instead.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = geopandas.GeoSeries(geopandas.points_from_xy(range(3), range(3)))
|
||||
>>> s
|
||||
0 POINT (0.00000 0.00000)
|
||||
1 POINT (1.00000 1.00000)
|
||||
2 POINT (2.00000 2.00000)
|
||||
dtype: geometry
|
||||
|
||||
>>> list(s.sindex.nearest((0, 0))) # doctest: +SKIP
|
||||
[0]
|
||||
|
||||
>>> list(s.sindex.nearest((0.5, 0.5))) # doctest: +SKIP
|
||||
[0, 1]
|
||||
|
||||
>>> list(s.sindex.nearest((3, 3), num_results=2)) # doctest: +SKIP
|
||||
[2, 1]
|
||||
|
||||
>>> list(super(type(s.sindex), s.sindex).nearest((0, 0),
|
||||
... num_results=2)) # doctest: +SKIP
|
||||
[0, 1]
|
||||
|
||||
Parameters
|
||||
----------
|
||||
coordinates : sequence or array
|
||||
This may be an object that satisfies the numpy array protocol,
|
||||
providing the index’s dimension * 2 coordinate pairs
|
||||
representing the mink and maxk coordinates in each dimension
|
||||
defining the bounds of the query window.
|
||||
num_results : integer
|
||||
The number of results to return nearest to the given
|
||||
coordinates. If two index entries are equidistant, both are
|
||||
returned. This property means that num_results may return more
|
||||
items than specified
|
||||
objects : True / False / ‘raw’
|
||||
If True, the nearest method will return index objects that were
|
||||
pickled when they were stored with each index entry, as well as
|
||||
the id and bounds of the index entries. If ‘raw’, it will
|
||||
return the object as entered into the database without the
|
||||
rtree.index.Item wrapper.
|
||||
"""
|
||||
warnings.warn(
|
||||
"sindex.nearest using the rtree backend was not previously documented "
|
||||
"and this behavior is deprecated in favor of matching the function "
|
||||
"signature provided by the pygeos backend (see "
|
||||
"PyGEOSSTRTreeIndex.nearest for details). This behavior will be "
|
||||
"updated in a future release.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return super().nearest(
|
||||
coordinates, num_results=num_results, objects=objects
|
||||
)
|
||||
|
||||
@doc(BaseSpatialIndex.intersection)
|
||||
def intersection(self, coordinates):
|
||||
return super().intersection(coordinates, objects=False)
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.size)
|
||||
def size(self):
|
||||
if hasattr(self, "_size"):
|
||||
size = self._size
|
||||
else:
|
||||
# self.leaves are lists of tuples of (int, lists...)
|
||||
# index [0][1] always has an element, even for empty sindex
|
||||
# for an empty index, it will be an empty list
|
||||
size = len(self.leaves()[0][1])
|
||||
self._size = size
|
||||
return size
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.is_empty)
|
||||
def is_empty(self):
|
||||
return self.geometries.size == 0 or self.size == 0
|
||||
|
||||
def __len__(self):
|
||||
return self.size
|
||||
|
||||
|
||||
if compat.SHAPELY_GE_20 or compat.HAS_PYGEOS:
|
||||
from . import geoseries
|
||||
from . import array
|
||||
|
||||
if compat.USE_SHAPELY_20:
|
||||
import shapely as mod
|
||||
|
||||
_PYGEOS_PREDICATES = {p.name for p in mod.strtree.BinaryPredicate} | {None}
|
||||
else:
|
||||
import pygeos as mod
|
||||
|
||||
_PYGEOS_PREDICATES = {p.name for p in mod.strtree.BinaryPredicate} | {None}
|
||||
|
||||
class PyGEOSSTRTreeIndex(BaseSpatialIndex):
|
||||
"""A simple wrapper around pygeos's STRTree.
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry : np.array of PyGEOS geometries
|
||||
Geometries from which to build the spatial index.
|
||||
"""
|
||||
|
||||
def __init__(self, geometry):
|
||||
# set empty geometries to None to avoid segfault on GEOS <= 3.6
|
||||
# see:
|
||||
# https://github.com/pygeos/pygeos/issues/146
|
||||
# https://github.com/pygeos/pygeos/issues/147
|
||||
non_empty = geometry.copy()
|
||||
non_empty[mod.is_empty(non_empty)] = None
|
||||
# set empty geometries to None to maintain indexing
|
||||
self._tree = mod.STRtree(non_empty)
|
||||
# store geometries, including empty geometries for user access
|
||||
self.geometries = geometry.copy()
|
||||
|
||||
@property
|
||||
def valid_query_predicates(self):
|
||||
"""Returns valid predicates for the used spatial index.
|
||||
|
||||
Returns
|
||||
-------
|
||||
set
|
||||
Set of valid predicates for this spatial index.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from shapely.geometry import Point
|
||||
>>> s = geopandas.GeoSeries([Point(0, 0), Point(1, 1)])
|
||||
>>> s.sindex.valid_query_predicates # doctest: +SKIP
|
||||
{None, "contains", "contains_properly", "covered_by", "covers", \
|
||||
"crosses", "intersects", "overlaps", "touches", "within"}
|
||||
"""
|
||||
return _PYGEOS_PREDICATES
|
||||
|
||||
@doc(BaseSpatialIndex.query)
|
||||
def query(self, geometry, predicate=None, sort=False):
|
||||
if predicate not in self.valid_query_predicates:
|
||||
raise ValueError(
|
||||
"Got `predicate` = `{}`; ".format(predicate)
|
||||
+ "`predicate` must be one of {}".format(
|
||||
self.valid_query_predicates
|
||||
)
|
||||
)
|
||||
|
||||
geometry = self._as_geometry_array(geometry)
|
||||
|
||||
if compat.USE_SHAPELY_20:
|
||||
indices = self._tree.query(geometry, predicate=predicate)
|
||||
else:
|
||||
if isinstance(geometry, np.ndarray):
|
||||
indices = self._tree.query_bulk(geometry, predicate=predicate)
|
||||
else:
|
||||
indices = self._tree.query(geometry, predicate=predicate)
|
||||
|
||||
if sort:
|
||||
if indices.ndim == 1:
|
||||
return np.sort(indices)
|
||||
else:
|
||||
# sort by first array (geometry) and then second (tree)
|
||||
geo_idx, tree_idx = indices
|
||||
sort_indexer = np.lexsort((tree_idx, geo_idx))
|
||||
return np.vstack((geo_idx[sort_indexer], tree_idx[sort_indexer]))
|
||||
|
||||
return indices
|
||||
|
||||
@staticmethod
|
||||
def _as_geometry_array(geometry):
|
||||
"""Convert geometry into a numpy array of PyGEOS geometries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
geometry
|
||||
An array-like of PyGEOS geometries, a GeoPandas GeoSeries/GeometryArray,
|
||||
shapely.geometry or list of shapely geometries.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
A numpy array of pygeos geometries.
|
||||
"""
|
||||
# to ensure pygeos.Geometry as input is treated the same as shapely
|
||||
# geometrie. TODO can be removed when we remove pygeos support
|
||||
if isinstance(geometry, mod.Geometry):
|
||||
geometry = array._geom_to_shapely(geometry)
|
||||
|
||||
if isinstance(geometry, np.ndarray):
|
||||
return array.from_shapely(geometry)._data
|
||||
elif isinstance(geometry, geoseries.GeoSeries):
|
||||
return geometry.values._data
|
||||
elif isinstance(geometry, array.GeometryArray):
|
||||
return geometry._data
|
||||
elif isinstance(geometry, BaseGeometry):
|
||||
return array._shapely_to_geom(geometry)
|
||||
elif geometry is None:
|
||||
return None
|
||||
elif isinstance(geometry, list):
|
||||
return np.asarray(
|
||||
[
|
||||
array._shapely_to_geom(el)
|
||||
if isinstance(el, BaseGeometry)
|
||||
else el
|
||||
for el in geometry
|
||||
]
|
||||
)
|
||||
else:
|
||||
return np.asarray(geometry)
|
||||
|
||||
@doc(BaseSpatialIndex.query_bulk)
|
||||
def query_bulk(self, geometry, predicate=None, sort=False):
|
||||
warnings.warn(
|
||||
"The `query_bulk()` method is deprecated and will be removed in "
|
||||
"GeoPandas 1.0. You can use the `query()` method instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return self.query(geometry, predicate=predicate, sort=sort)
|
||||
|
||||
@doc(BaseSpatialIndex.nearest)
|
||||
def nearest(
|
||||
self,
|
||||
geometry,
|
||||
return_all=True,
|
||||
max_distance=None,
|
||||
return_distance=False,
|
||||
exclusive=False,
|
||||
):
|
||||
if not (compat.USE_SHAPELY_20 or compat.PYGEOS_GE_010):
|
||||
raise NotImplementedError(
|
||||
"sindex.nearest requires shapely >= 2.0 or pygeos >= 0.10"
|
||||
)
|
||||
|
||||
if exclusive and not compat.USE_SHAPELY_20:
|
||||
raise NotImplementedError(
|
||||
"sindex.nearest exclusive parameter requires shapely >= 2.0"
|
||||
)
|
||||
|
||||
geometry = self._as_geometry_array(geometry)
|
||||
if isinstance(geometry, BaseGeometry) or geometry is None:
|
||||
geometry = [geometry]
|
||||
|
||||
if compat.USE_SHAPELY_20:
|
||||
result = self._tree.query_nearest(
|
||||
geometry,
|
||||
max_distance=max_distance,
|
||||
return_distance=return_distance,
|
||||
all_matches=return_all,
|
||||
exclusive=exclusive,
|
||||
)
|
||||
else:
|
||||
if not return_all and max_distance is None and not return_distance:
|
||||
return self._tree.nearest(geometry)
|
||||
result = self._tree.nearest_all(
|
||||
geometry, max_distance=max_distance, return_distance=return_distance
|
||||
)
|
||||
if return_distance:
|
||||
indices, distances = result
|
||||
else:
|
||||
indices = result
|
||||
|
||||
if not return_all and not compat.USE_SHAPELY_20:
|
||||
# first subarray of geometry indices is sorted, so we can use this
|
||||
# trick to get the first of each index value
|
||||
mask = np.diff(indices[0, :]).astype("bool")
|
||||
# always select the first element
|
||||
mask = np.insert(mask, 0, True)
|
||||
|
||||
indices = indices[:, mask]
|
||||
if return_distance:
|
||||
distances = distances[mask]
|
||||
|
||||
if return_distance:
|
||||
return indices, distances
|
||||
else:
|
||||
return indices
|
||||
|
||||
@doc(BaseSpatialIndex.intersection)
|
||||
def intersection(self, coordinates):
|
||||
# convert bounds to geometry
|
||||
# the old API uses tuples of bound, but pygeos uses geometries
|
||||
try:
|
||||
iter(coordinates)
|
||||
except TypeError:
|
||||
# likely not an iterable
|
||||
# this is a check that rtree does, we mimic it
|
||||
# to ensure a useful failure message
|
||||
raise TypeError(
|
||||
"Invalid coordinates, must be iterable in format "
|
||||
"(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
|
||||
"Got `coordinates` = {}.".format(coordinates)
|
||||
)
|
||||
|
||||
# need to convert tuple of bounds to a geometry object
|
||||
if len(coordinates) == 4:
|
||||
indexes = self._tree.query(mod.box(*coordinates))
|
||||
elif len(coordinates) == 2:
|
||||
indexes = self._tree.query(mod.points(*coordinates))
|
||||
else:
|
||||
raise TypeError(
|
||||
"Invalid coordinates, must be iterable in format "
|
||||
"(minx, miny, maxx, maxy) (for bounds) or (x, y) (for points). "
|
||||
"Got `coordinates` = {}.".format(coordinates)
|
||||
)
|
||||
|
||||
return indexes
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.size)
|
||||
def size(self):
|
||||
return len(self._tree)
|
||||
|
||||
@property
|
||||
@doc(BaseSpatialIndex.is_empty)
|
||||
def is_empty(self):
|
||||
return len(self._tree) == 0
|
||||
|
||||
def __len__(self):
|
||||
return len(self._tree)
|
||||
358
.venv/lib/python3.12/site-packages/geopandas/testing.py
Normal file
358
.venv/lib/python3.12/site-packages/geopandas/testing.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
Testing functionality for geopandas objects.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
from geopandas.array import GeometryDtype
|
||||
from geopandas import _vectorized
|
||||
|
||||
|
||||
def _isna(this):
|
||||
"""isna version that works for both scalars and (Geo)Series"""
|
||||
with warnings.catch_warnings():
|
||||
# GeoSeries.isna will raise a warning about no longer returning True
|
||||
# for empty geometries. This helper is used below always in combination
|
||||
# with an is_empty check to preserve behaviour, and thus we ignore the
|
||||
# warning here to avoid it bubbling up to the user
|
||||
warnings.filterwarnings(
|
||||
"ignore", r"GeoSeries.isna\(\) previously returned", UserWarning
|
||||
)
|
||||
if hasattr(this, "isna"):
|
||||
return this.isna()
|
||||
elif hasattr(this, "isnull"):
|
||||
return this.isnull()
|
||||
else:
|
||||
return pd.isnull(this)
|
||||
|
||||
|
||||
def _geom_equals_mask(this, that):
|
||||
"""
|
||||
Test for geometric equality. Empty or missing geometries are considered
|
||||
equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
attribute)
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
boolean Series, True if geometries in left equal geometries in right
|
||||
"""
|
||||
|
||||
return (
|
||||
this.geom_equals(that)
|
||||
| (this.is_empty & that.is_empty)
|
||||
| (_isna(this) & _isna(that))
|
||||
)
|
||||
|
||||
|
||||
def geom_equals(this, that):
|
||||
"""
|
||||
Test for geometric equality. Empty or missing geometries are considered
|
||||
equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
attribute)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all geometries in left equal geometries in right
|
||||
"""
|
||||
|
||||
return _geom_equals_mask(this, that).all()
|
||||
|
||||
|
||||
def _geom_almost_equals_mask(this, that):
|
||||
"""
|
||||
Test for 'almost' geometric equality. Empty or missing geometries
|
||||
considered equal.
|
||||
|
||||
This method allows small difference in the coordinates, but this
|
||||
requires coordinates be in the same order for all components of a geometry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
boolean Series, True if geometries in left almost equal geometries in right
|
||||
"""
|
||||
|
||||
return (
|
||||
this.geom_equals_exact(that, tolerance=0.5 * 10 ** (-6))
|
||||
| (this.is_empty & that.is_empty)
|
||||
| (_isna(this) & _isna(that))
|
||||
)
|
||||
|
||||
|
||||
def geom_almost_equals(this, that):
|
||||
"""
|
||||
Test for 'almost' geometric equality. Empty or missing geometries
|
||||
considered equal.
|
||||
|
||||
This method allows small difference in the coordinates, but this
|
||||
requires coordinates be in the same order for all components of a geometry.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
this, that : arrays of Geo objects (or anything that has an `is_empty`
|
||||
property)
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if all geometries in left almost equal geometries in right
|
||||
"""
|
||||
if isinstance(this, GeoDataFrame) and isinstance(that, GeoDataFrame):
|
||||
this = this.geometry
|
||||
that = that.geometry
|
||||
|
||||
return _geom_almost_equals_mask(this, that).all()
|
||||
|
||||
|
||||
def assert_geoseries_equal(
|
||||
left,
|
||||
right,
|
||||
check_dtype=True,
|
||||
check_index_type=False,
|
||||
check_series_type=True,
|
||||
check_less_precise=False,
|
||||
check_geom_type=False,
|
||||
check_crs=True,
|
||||
normalize=False,
|
||||
):
|
||||
"""
|
||||
Test util for checking that two GeoSeries are equal.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : two GeoSeries
|
||||
check_dtype : bool, default False
|
||||
If True, check geo dtype [only included so it's a drop-in replacement
|
||||
for assert_series_equal].
|
||||
check_index_type : bool, default False
|
||||
Check that index types are equal.
|
||||
check_series_type : bool, default True
|
||||
Check that both are same type (*and* are GeoSeries). If False,
|
||||
will attempt to convert both into GeoSeries.
|
||||
check_less_precise : bool, default False
|
||||
If True, use geom_equals_exact with relative error of 0.5e-6.
|
||||
If False, use geom_equals.
|
||||
check_geom_type : bool, default False
|
||||
If True, check that all the geom types are equal.
|
||||
check_crs: bool, default True
|
||||
If `check_series_type` is True, then also check that the
|
||||
crs matches.
|
||||
normalize: bool, default False
|
||||
If True, normalize the geometries before comparing equality.
|
||||
Typically useful with ``check_less_precise=True``, which uses
|
||||
``geom_equals_exact`` and requires exact coordinate order.
|
||||
"""
|
||||
assert len(left) == len(right), "%d != %d" % (len(left), len(right))
|
||||
|
||||
if check_dtype:
|
||||
msg = "dtype should be a GeometryDtype, got {0}"
|
||||
assert isinstance(left.dtype, GeometryDtype), msg.format(left.dtype)
|
||||
assert isinstance(right.dtype, GeometryDtype), msg.format(left.dtype)
|
||||
|
||||
if check_index_type:
|
||||
assert isinstance(left.index, type(right.index))
|
||||
|
||||
if check_series_type:
|
||||
assert isinstance(left, GeoSeries)
|
||||
assert isinstance(left, type(right))
|
||||
|
||||
if check_crs:
|
||||
assert left.crs == right.crs
|
||||
else:
|
||||
if not isinstance(left, GeoSeries):
|
||||
left = GeoSeries(left)
|
||||
if not isinstance(right, GeoSeries):
|
||||
right = GeoSeries(right, index=left.index)
|
||||
|
||||
assert left.index.equals(right.index), "index: %s != %s" % (left.index, right.index)
|
||||
|
||||
if check_geom_type:
|
||||
assert (left.geom_type == right.geom_type).all(), "type: %s != %s" % (
|
||||
left.geom_type,
|
||||
right.geom_type,
|
||||
)
|
||||
|
||||
if normalize:
|
||||
left = GeoSeries(_vectorized.normalize(left.array._data))
|
||||
right = GeoSeries(_vectorized.normalize(right.array._data))
|
||||
|
||||
if not check_crs:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", "CRS mismatch", UserWarning)
|
||||
_check_equality(left, right, check_less_precise)
|
||||
else:
|
||||
_check_equality(left, right, check_less_precise)
|
||||
|
||||
|
||||
def _truncated_string(geom):
|
||||
"""Truncated WKT repr of geom"""
|
||||
s = str(geom)
|
||||
if len(s) > 100:
|
||||
return s[:100] + "..."
|
||||
else:
|
||||
return s
|
||||
|
||||
|
||||
def _check_equality(left, right, check_less_precise):
|
||||
assert_error_message = (
|
||||
"{0} out of {1} geometries are not {3}equal.\n"
|
||||
"Indices where geometries are not {3}equal: {2} \n"
|
||||
"The first not {3}equal geometry:\n"
|
||||
"Left: {4}\n"
|
||||
"Right: {5}\n"
|
||||
)
|
||||
if check_less_precise:
|
||||
precise = "almost "
|
||||
equal = _geom_almost_equals_mask(left, right)
|
||||
else:
|
||||
precise = ""
|
||||
equal = _geom_equals_mask(left, right)
|
||||
|
||||
if not equal.all():
|
||||
unequal_left_geoms = left[~equal]
|
||||
unequal_right_geoms = right[~equal]
|
||||
raise AssertionError(
|
||||
assert_error_message.format(
|
||||
len(unequal_left_geoms),
|
||||
len(left),
|
||||
unequal_left_geoms.index.to_list(),
|
||||
precise,
|
||||
_truncated_string(unequal_left_geoms.iloc[0]),
|
||||
_truncated_string(unequal_right_geoms.iloc[0]),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def assert_geodataframe_equal(
|
||||
left,
|
||||
right,
|
||||
check_dtype=True,
|
||||
check_index_type="equiv",
|
||||
check_column_type="equiv",
|
||||
check_frame_type=True,
|
||||
check_like=False,
|
||||
check_less_precise=False,
|
||||
check_geom_type=False,
|
||||
check_crs=True,
|
||||
normalize=False,
|
||||
):
|
||||
"""
|
||||
Check that two GeoDataFrames are equal/
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left, right : two GeoDataFrames
|
||||
check_dtype : bool, default True
|
||||
Whether to check the DataFrame dtype is identical.
|
||||
check_index_type, check_column_type : bool, default 'equiv'
|
||||
Check that index types are equal.
|
||||
check_frame_type : bool, default True
|
||||
Check that both are same type (*and* are GeoDataFrames). If False,
|
||||
will attempt to convert both into GeoDataFrame.
|
||||
check_like : bool, default False
|
||||
If true, ignore the order of rows & columns
|
||||
check_less_precise : bool, default False
|
||||
If True, use geom_equals_exact. if False, use geom_equals.
|
||||
check_geom_type : bool, default False
|
||||
If True, check that all the geom types are equal.
|
||||
check_crs: bool, default True
|
||||
If `check_frame_type` is True, then also check that the
|
||||
crs matches.
|
||||
normalize: bool, default False
|
||||
If True, normalize the geometries before comparing equality.
|
||||
Typically useful with ``check_less_precise=True``, which uses
|
||||
``geom_equals_exact`` and requires exact coordinate order.
|
||||
"""
|
||||
try:
|
||||
# added from pandas 0.20
|
||||
from pandas.testing import assert_frame_equal, assert_index_equal
|
||||
except ImportError:
|
||||
from pandas.util.testing import assert_frame_equal, assert_index_equal
|
||||
|
||||
# instance validation
|
||||
if check_frame_type:
|
||||
assert isinstance(left, GeoDataFrame)
|
||||
assert isinstance(left, type(right))
|
||||
|
||||
if check_crs:
|
||||
# allow if neither left and right has an active geometry column
|
||||
if (
|
||||
left._geometry_column_name is None
|
||||
and right._geometry_column_name is None
|
||||
):
|
||||
pass
|
||||
elif (
|
||||
left._geometry_column_name not in left.columns
|
||||
and right._geometry_column_name not in right.columns
|
||||
):
|
||||
pass
|
||||
# no crs can be either None or {}
|
||||
elif not left.crs and not right.crs:
|
||||
pass
|
||||
else:
|
||||
assert left.crs == right.crs
|
||||
else:
|
||||
if not isinstance(left, GeoDataFrame):
|
||||
left = GeoDataFrame(left)
|
||||
if not isinstance(right, GeoDataFrame):
|
||||
right = GeoDataFrame(right)
|
||||
|
||||
# shape comparison
|
||||
assert left.shape == right.shape, (
|
||||
"GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n"
|
||||
"Left columns: {lcols!r}, right columns: {rcols!r}"
|
||||
).format(
|
||||
lshape=left.shape, rshape=right.shape, lcols=left.columns, rcols=right.columns
|
||||
)
|
||||
|
||||
if check_like:
|
||||
left, right = left.reindex_like(right), right
|
||||
|
||||
# column comparison
|
||||
assert_index_equal(
|
||||
left.columns, right.columns, exact=check_column_type, obj="GeoDataFrame.columns"
|
||||
)
|
||||
|
||||
# geometry comparison
|
||||
for col, dtype in left.dtypes.items():
|
||||
if isinstance(dtype, GeometryDtype):
|
||||
assert_geoseries_equal(
|
||||
left[col],
|
||||
right[col],
|
||||
normalize=normalize,
|
||||
check_dtype=check_dtype,
|
||||
check_less_precise=check_less_precise,
|
||||
check_geom_type=check_geom_type,
|
||||
check_crs=check_crs,
|
||||
)
|
||||
|
||||
# ensure the active geometry column is the same
|
||||
assert left._geometry_column_name == right._geometry_column_name
|
||||
|
||||
# drop geometries and check remaining columns
|
||||
left2 = left.select_dtypes(exclude="geometry")
|
||||
right2 = right.select_dtypes(exclude="geometry")
|
||||
assert_frame_equal(
|
||||
left2,
|
||||
right2,
|
||||
check_dtype=check_dtype,
|
||||
check_index_type=check_index_type,
|
||||
check_column_type=check_column_type,
|
||||
obj="GeoDataFrame",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
|
||||
{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
|
||||
]
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user