This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -5,6 +5,7 @@ from geopandas.geodataframe import GeoDataFrame
from geopandas.array import points_from_xy
from geopandas.io.file import _read_file as read_file
from geopandas.io.file import _list_layers as list_layers
from geopandas.io.arrow import _read_parquet as read_parquet
from geopandas.io.arrow import _read_feather as read_feather
from geopandas.io.sql import _read_postgis as read_postgis

View File

@@ -1,15 +1,11 @@
import contextlib
from packaging.version import Version
import importlib
import os
import warnings
from packaging.version import Version
import numpy as np
import pandas as pd
import shapely
import shapely.geos
# -----------------------------------------------------------------------------
# pandas compat
# -----------------------------------------------------------------------------
@@ -17,182 +13,20 @@ import shapely.geos
PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0")
PANDAS_GE_15 = Version(pd.__version__) >= Version("1.5.0")
PANDAS_GE_20 = Version(pd.__version__) >= Version("2.0.0")
PANDAS_GE_202 = Version(pd.__version__) >= Version("2.0.2")
PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0")
PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0.dev0")
PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0")
PANDAS_GE_30 = Version(pd.__version__) >= Version("3.0.0.dev0")
# -----------------------------------------------------------------------------
# Shapely / PyGEOS compat
# Shapely / GEOS compat
# -----------------------------------------------------------------------------
SHAPELY_GE_182 = Version(shapely.__version__) >= Version("1.8.2")
SHAPELY_GE_20 = Version(shapely.__version__) >= Version("2.0.0.dev0")
SHAPELY_G_20a1 = Version(shapely.__version__) > Version("2.0a1")
SHAPELY_GE_204 = Version(shapely.__version__) >= Version("2.0.4")
GEOS_GE_390 = shapely.geos.geos_version >= (3, 9, 0)
HAS_PYGEOS = None
USE_PYGEOS = None
USE_SHAPELY_20 = None
PYGEOS_SHAPELY_COMPAT = None
PYGEOS_GE_09 = None
PYGEOS_GE_010 = None
INSTALL_PYGEOS_ERROR = "To use PyGEOS within GeoPandas, you need to install PyGEOS: \
'conda install pygeos' or 'pip install pygeos'"
try:
import pygeos
# only automatically use pygeos if version is high enough
if Version(pygeos.__version__) >= Version("0.8"):
HAS_PYGEOS = True
PYGEOS_GE_09 = Version(pygeos.__version__) >= Version("0.9")
PYGEOS_GE_010 = Version(pygeos.__version__) >= Version("0.10")
else:
warnings.warn(
"The installed version of PyGEOS is too old ({0} installed, 0.8 required),"
" and thus GeoPandas will not use PyGEOS.".format(pygeos.__version__),
UserWarning,
stacklevel=2,
)
HAS_PYGEOS = False
except ImportError:
HAS_PYGEOS = False
def set_use_pygeos(val=None):
"""
Set the global configuration on whether to use PyGEOS or not.
The default is use PyGEOS if it is installed. This can be overridden
with an environment variable USE_PYGEOS (this is only checked at
first import, cannot be changed during interactive session).
Alternatively, pass a value here to force a True/False value.
"""
global USE_PYGEOS
global USE_SHAPELY_20
global PYGEOS_SHAPELY_COMPAT
env_use_pygeos = os.getenv("USE_PYGEOS", None)
if val is not None:
USE_PYGEOS = bool(val)
else:
if USE_PYGEOS is None:
if SHAPELY_GE_20:
USE_PYGEOS = False
else:
USE_PYGEOS = HAS_PYGEOS
if env_use_pygeos is not None:
USE_PYGEOS = bool(int(env_use_pygeos))
# validate the pygeos version
if USE_PYGEOS:
try:
import pygeos
# validate the pygeos version
if not Version(pygeos.__version__) >= Version("0.8"):
if SHAPELY_GE_20:
USE_PYGEOS = False
warnings.warn(
"The PyGEOS version is too old, and Shapely >= 2 is installed, "
"thus using Shapely by default and not PyGEOS.",
stacklevel=2,
)
else:
raise ImportError(
"PyGEOS >= 0.8 is required, version {0} is installed".format(
pygeos.__version__
)
)
# Check whether Shapely and PyGEOS use the same GEOS version.
# Based on PyGEOS from_shapely implementation.
from shapely.geos import geos_version_string as shapely_geos_version
from pygeos import geos_capi_version_string
# shapely has something like: "3.6.2-CAPI-1.10.2 4d2925d6"
# pygeos has something like: "3.6.2-CAPI-1.10.2"
if not shapely_geos_version.startswith(geos_capi_version_string):
warnings.warn(
"The Shapely GEOS version ({}) is incompatible with the GEOS "
"version PyGEOS was compiled with ({}). Conversions between both "
"will be slow.".format(
shapely_geos_version, geos_capi_version_string
),
stacklevel=2,
)
PYGEOS_SHAPELY_COMPAT = False
else:
PYGEOS_SHAPELY_COMPAT = True
except ImportError:
raise ImportError(INSTALL_PYGEOS_ERROR)
if USE_PYGEOS:
warnings.warn(
"GeoPandas is set to use PyGEOS over Shapely. PyGEOS support is deprecated"
"and will be removed in GeoPandas 1.0, released in the Q1 of 2024. "
"Please migrate to Shapely 2.0 "
"(https://geopandas.org/en/stable/docs/user_guide/pygeos_to_shapely.html).",
DeprecationWarning,
stacklevel=6,
)
USE_SHAPELY_20 = (not USE_PYGEOS) and SHAPELY_GE_20
set_use_pygeos()
# compat related to deprecation warnings introduced in Shapely 1.8
# -> creating a numpy array from a list-like of Multi-part geometries,
# although doing the correct thing (not expanding in its parts), still raises
# the warning about iteration being deprecated
# This adds a context manager to explicitly ignore this warning
try:
from shapely.errors import ShapelyDeprecationWarning as shapely_warning
except ImportError:
shapely_warning = None
if shapely_warning is not None and not SHAPELY_GE_20:
@contextlib.contextmanager
def ignore_shapely2_warnings():
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "Iteration|The array interface|__len__", shapely_warning
)
yield
elif (Version(np.__version__) >= Version("1.21")) and not SHAPELY_GE_20:
@contextlib.contextmanager
def ignore_shapely2_warnings():
with warnings.catch_warnings():
# warning from numpy for existing Shapely releases (this is fixed
# with Shapely 1.8)
warnings.filterwarnings(
"ignore", "An exception was ignored while fetching", DeprecationWarning
)
yield
else:
@contextlib.contextmanager
def ignore_shapely2_warnings():
yield
GEOS_GE_310 = shapely.geos.geos_version >= (3, 10, 0)
def import_optional_dependency(name: str, extra: str = ""):
@@ -232,20 +66,27 @@ def import_optional_dependency(name: str, extra: str = ""):
return module
# -----------------------------------------------------------------------------
# RTree compat
# -----------------------------------------------------------------------------
HAS_RTREE = None
RTREE_GE_094 = False
try:
import rtree # noqa: F401
HAS_RTREE = True
except ImportError:
HAS_RTREE = False
# -----------------------------------------------------------------------------
# pyproj compat
# -----------------------------------------------------------------------------
try:
import pyproj # noqa: F401
HAS_PYPROJ = True
except ImportError as err:
HAS_PYPROJ = False
pyproj_import_error = str(err)
def requires_pyproj(func):
def wrapper(*args, **kwargs):
if not HAS_PYPROJ:
raise ImportError(
f"The 'pyproj' package is required for {func.__name__} to work. "
"Install it and initialize the object with a CRS before using it."
f"\nImporting pyproj resulted in: {pyproj_import_error}"
)
return func(*args, **kwargs)
return wrapper

View File

@@ -5,9 +5,10 @@ Based on https://github.com/topper-123/optioneer, but simplified (don't deal
with nested options, deprecated options, ..), just the attribute-style dict
like holding the options and giving a nice repr.
"""
from collections import namedtuple
import textwrap
import textwrap
import warnings
from collections import namedtuple
Option = namedtuple("Option", "key default_value doc validator callback")
@@ -86,35 +87,12 @@ display_precision = Option(
)
def _validate_bool(value):
if not isinstance(value, bool):
raise TypeError("Expected bool value, got {0}".format(type(value)))
def _default_use_pygeos():
import geopandas._compat as compat
return compat.USE_PYGEOS
def _callback_use_pygeos(key, value):
assert key == "use_pygeos"
import geopandas._compat as compat
compat.set_use_pygeos(value)
use_pygeos = Option(
key="use_pygeos",
default_value=_default_use_pygeos(),
doc=(
"Whether to use PyGEOS to speed up spatial operations. The default is True "
"if PyGEOS is installed, and follows the USE_PYGEOS environment variable "
"if set."
),
validator=_validate_bool,
callback=_callback_use_pygeos,
)
def _warn_use_pygeos_deprecated(_value):
warnings.warn(
"pygeos support was removed in 1.0. "
"geopandas.use_pygeos is a no-op and will be removed in geopandas 1.1.",
stacklevel=3,
)
def _validate_io_engine(value):
@@ -134,6 +112,17 @@ io_engine = Option(
callback=None,
)
# TODO: deprecate this
use_pygeos = Option(
key="use_pygeos",
default_value=False,
doc=(
"Deprecated option previously used to enable PyGEOS. "
"It will be removed in GeoPandas 1.1."
),
validator=_warn_use_pygeos_deprecated,
callback=None,
)
options = Options(
{

View File

@@ -1,7 +1,6 @@
from textwrap import dedent
from typing import Callable, Union
# doc decorator function ported with modifications from Pandas
# https://github.com/pandas-dev/pandas/blob/master/pandas/util/_decorators.py
@@ -39,9 +38,11 @@ def doc(*docstrings: Union[str, Callable], **params) -> Callable:
# formatting templates and concatenating docstring
decorated.__doc__ = "".join(
component.format(**params)
if isinstance(component, str)
else dedent(component.__doc__ or "")
(
component.format(**params)
if isinstance(component, str)
else dedent(component.__doc__ or "")
)
for component in docstring_components
)

File diff suppressed because it is too large Load Diff

View File

@@ -8,11 +8,11 @@ import json
version_json = '''
{
"date": "2023-11-11T10:29:16+0100",
"date": "2024-07-02T14:23:16+0200",
"dirty": false,
"error": null,
"full-revisionid": "9a9f0974db087ce303b94bfbeabc8ea136be0914",
"version": "0.14.1"
"full-revisionid": "747d66ee6fcf00b819c08f11ecded53736c4652b",
"version": "1.0.1"
}
''' # END VERSION_JSON

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,27 +1,47 @@
import pytest
import os.path
import geopandas
import pytest
from geopandas.tests.util import _NATURALEARTH_CITIES, _NATURALEARTH_LOWRES, _NYBB
@pytest.fixture(autouse=True)
def add_geopandas(doctest_namespace):
doctest_namespace["geopandas"] = geopandas
def pytest_configure(config):
config.addinivalue_line(
"markers",
"skip_no_sindex: skips the tests if there is no spatial index backend",
)
# Datasets used in our tests
try:
geopandas.sindex._get_sindex_class()
has_sindex_backend = True
except ImportError:
has_sindex_backend = False
@pytest.fixture(scope="session")
def naturalearth_lowres() -> str:
# skip if data missing, unless on github actions
if os.path.isfile(_NATURALEARTH_LOWRES) or os.getenv("GITHUB_ACTIONS"):
return _NATURALEARTH_LOWRES
else:
pytest.skip("Naturalearth lowres dataset not found")
def pytest_runtest_setup(item):
skip_no_sindex = any(mark for mark in item.iter_markers(name="skip_no_sindex"))
if skip_no_sindex and not has_sindex_backend:
pytest.skip("Skipped because there is no spatial index backend available")
@pytest.fixture(scope="session")
def naturalearth_cities() -> str:
# skip if data missing, unless on github actions
if os.path.isfile(_NATURALEARTH_CITIES) or os.getenv("GITHUB_ACTIONS"):
return _NATURALEARTH_CITIES
else:
pytest.skip("Naturalearth cities dataset not found")
@pytest.fixture(scope="session")
def nybb_filename() -> str:
# skip if data missing, unless on github actions
if os.path.isfile(_NYBB[len("zip://") :]) or os.getenv("GITHUB_ACTIONS"):
return _NYBB
else:
pytest.skip("NYBB dataset not found")
@pytest.fixture(scope="class")
def _setup_class_nybb_filename(nybb_filename, request):
"""Attach nybb_filename class attribute for unittest style setup_method"""
request.cls.nybb_filename = nybb_filename

View File

@@ -1,59 +1,25 @@
import os
from warnings import warn
__all__ = ["available", "get_path"]
_module_path = os.path.dirname(__file__)
_available_dir = [p for p in next(os.walk(_module_path))[1] if not p.startswith("__")]
_available_zip = {"nybb": "nybb_16a.zip"}
available = _available_dir + list(_available_zip.keys())
__all__ = []
available = [] # previously part of __all__
_prev_available = ["naturalearth_cities", "naturalearth_lowres", "nybb"]
def get_path(dataset):
"""
Get the path to the data file.
Parameters
----------
dataset : str
The name of the dataset. See ``geopandas.datasets.available`` for
all options.
Examples
--------
>>> geopandas.datasets.get_path("naturalearth_lowres") # doctest: +SKIP
'.../python3.8/site-packages/geopandas/datasets/\
naturalearth_lowres/naturalearth_lowres.shp'
"""
ne_message = "https://www.naturalearthdata.com/downloads/110m-cultural-vectors/."
nybb_message = (
"the geodatasets package.\n\nfrom geodatasets import get_path\n"
"path_to_file = get_path('nybb')\n"
)
depr_warning = (
"The geopandas.dataset module is deprecated and will be removed in GeoPandas "
error_msg = (
"The geopandas.dataset has been deprecated and was removed in GeoPandas "
f"1.0. You can get the original '{dataset}' data from "
f"{ne_message if 'natural' in dataset else nybb_message}"
)
if dataset in _available_dir:
warn(
depr_warning,
FutureWarning,
stacklevel=2,
)
return os.path.abspath(os.path.join(_module_path, dataset, dataset + ".shp"))
elif dataset in _available_zip:
warn(
depr_warning,
FutureWarning,
stacklevel=2,
)
fpath = os.path.abspath(os.path.join(_module_path, _available_zip[dataset]))
return "zip://" + fpath
if dataset in _prev_available:
raise AttributeError(error_msg)
else:
msg = "The dataset '{data}' is not available. ".format(data=dataset)
msg += "Available datasets are {}".format(", ".join(available))
raise ValueError(msg)
error_msg = (
"The geopandas.dataset has been deprecated and "
"was removed in GeoPandas 1.0. New sample datasets are now available "
"in the geodatasets package (https://geodatasets.readthedocs.io/en/latest/)"
)
raise AttributeError(error_msg)

View File

@@ -1,336 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" dir="ltr" lang="en-US">
<head profile="http://gmpg.org/xfn/11">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>Populated Places | Natural Earth</title>
<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
<link rel="alternate" type="application/rss+xml" title="Natural Earth RSS Feed" href="http://www.naturalearthdata.com/feed/" />
<link rel="pingback" href="http://www.naturalearthdata.com/xmlrpc.php" />
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/themes/NEV/includes/js/suckerfish.js"></script>
<!--[if lt IE 7]>
<script src="http://ie7-js.googlecode.com/svn/version/2.0(beta3)/IE7.js" type="text/javascript"></script>
<script defer="defer" type="text/javascript" src="http://www.naturalearthdata.com/wp-content/themes/NEV/includes/js/pngfix.js"></script>
<![endif]-->
<link rel="stylesheet" href="http://www.naturalearthdata.com/wp-content/themes/NEV/style.css" type="text/css" media="screen" />
<meta name='Admin Management Xtended WordPress plugin' content='2.1.1' />
<link rel="alternate" type="application/rss+xml" title="Natural Earth &raquo; Populated Places Comments Feed" href="http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-populated-places/feed/" />
<link rel='stylesheet' id='sociable-front-css-css' href='http://www.naturalearthdata.com/wp-content/plugins/sociable/sociable.css?ver=2.9.2' type='text/css' media='' />
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://www.naturalearthdata.com/xmlrpc.php?rsd" />
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://www.naturalearthdata.com/wp-includes/wlwmanifest.xml" />
<link rel='index' title='Natural Earth' href='http://www.naturalearthdata.com' />
<link rel='start' title='Welcome to the Natural Earth Blog' href='http://www.naturalearthdata.com/blog/miscellaneous/test/' />
<link rel='prev' title='Antarctic Ice Shelves' href='http://www.naturalearthdata.com/downloads/10m-physical-vectors/10m-antarctic-ice-shelves/' />
<link rel='next' title='Admin 1 &#8211; States, Provinces' href='http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-1-states-provinces/' />
<meta name="generator" content="WordPress 2.9.2" />
<!-- All in One SEO Pack 1.6.10.2 by Michael Torbert of Semper Fi Web Design[309,448] -->
<meta name="description" content="City and town points, from Tokyo to Wasilla, Cairo to Kandahar About Point symbols with name attributes. Includes all admin-0 and many" />
<link rel="canonical" href="http://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-populated-places/" />
<!-- /all in one seo pack -->
<!-- begin gallery scripts -->
<link rel="stylesheet" href="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/css/jd.gallery.css.php" type="text/css" media="screen" charset="utf-8"/>
<link rel="stylesheet" href="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/css/jd.gallery.css" type="text/css" media="screen" charset="utf-8"/>
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/scripts/mootools.v1.11.js"></script>
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/scripts/jd.gallery.js.php"></script>
<script type="text/javascript" src="http://www.naturalearthdata.com/wp-content/plugins/featured-content-gallery/scripts/jd.gallery.transitions.js"></script>
<!-- end gallery scripts -->
<style type="text/css">.broken_link, a.broken_link {
text-decoration: line-through;
}</style><link href="http://www.naturalearthdata.com/wp-content/themes/NEV/css/default.css" rel="stylesheet" type="text/css" />
<style type="text/css">.recentcomments a{display:inline !important;padding:0 !important;margin:0 !important;}</style>
<!--[if lte IE 7]>
<link rel="stylesheet" type="text/css" href="http://www.naturalearthdata.com/wp-content/themes/NEV/ie.css" />
<![endif]-->
<script src="http://www.naturalearthdata.com/wp-content/themes/NEV/js/jquery-1.2.6.min.js" type="text/javascript" charset="utf-8"></script>
<script>
jQuery.noConflict();
</script>
<script type="text/javascript" charset="utf-8">
$(function(){
var tabContainers = $('div#maintabdiv > div');
tabContainers.hide().filter('#comments').show();
$('div#maintabdiv ul#tabnav a').click(function () {
tabContainers.hide();
tabContainers.filter(this.hash).show();
$('div#maintabdiv ul#tabnav a').removeClass('current');
$(this).addClass('current');
return false;
}).filter('#comments').click();
});
</script>
<script type="text/javascript" language="javascript" src="http://www.naturalearthdata.com/dataTables/media/js/jquery.dataTables.js"></script>
<script type="text/javascript" charset="utf-8">
$(document).ready(function() {
$('#ne_table').dataTable();
} );
</script>
</head>
<body>
<div id="page">
<div id="header">
<div id="headerimg">
<h1><a href="http://www.naturalearthdata.com/"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/nev_logo.png" alt="Natural Earth title="Natural Earth" /></a></h1>
<div class="description">Free vector and raster map data at 1:10m, 1:50m, and 1:110m scales</div>
<div class="header_search"><form method="get" id="searchform" action="http://www.naturalearthdata.com/">
<label class="hidden" for="s">Search for:</label>
<div><input type="text" value="" name="s" id="s" />
<input type="submit" id="searchsubmit" value="Search" />
</div>
</form>
</div>
<!--<div class="translate_panel" style="align:top; margin-left:650px; top:50px;">
<div id="google_translate_element" style="float:left;"></div>
<script>
function googleTranslateElementInit() {
new google.translate.TranslateElement({
pageLanguage: 'en'
}, 'google_translate_element');
}
</script>
<script src="http://translate.google.com/translate_a/element.js?cb=googleTranslateElementInit"></script>
</div>-->
</div>
</div>
<div id="pagemenu" style="align:bottom;">
<ul id="page-list" class="clearfix"><li class="page_item page-item-4"><a href="http://www.naturalearthdata.com" title="Home">Home</a></li>
<li class="page_item page-item-10"><a href="http://www.naturalearthdata.com/features/" title="Features">Features</a></li>
<li class="page_item page-item-12"><a href="http://www.naturalearthdata.com/downloads/" title="Downloads">Downloads</a></li>
<li class="page_item page-item-6 current_page_parent"><a href="http://www.naturalearthdata.com/blog/" title="Blog">Blog</a></li>
<li class="page_item page-item-14"><a href="http://www.naturalearthdata.com/forums" title="Forums">Forums</a></li>
<li class="page_item page-item-366"><a href="http://www.naturalearthdata.com/corrections" title="Corrections">Corrections</a></li>
<li class="page_item page-item-16"><a href="http://www.naturalearthdata.com/about/" title="About">About</a></li>
</ul>
</div>
<hr /> <div id="main">
<div id="content" class="narrowcolumn">
&laquo; <a href="http://www.naturalearthdata.com/downloads/10m-cultural-vectors/">1:10m Cultural Vectors</a>&nbsp;
<div class="post" id="post-472">
<h2>Populated Places</h2>
<div class="entry">
<div class="downloadPromoBlock" style="float: left;">
<div style="float: left; width: 170px;"><img class="alignnone size-full wp-image-1918" title="pop_thumb" src="http://www.naturalearthdata.com/wp-content/uploads/2009/09/pop_thumb.png" alt="pop_thumb" width="150" height="97" /></div>
<div style="float: left; width: 410px;"><em>City and town points, from Tokyo to Wasilla, Cairo to Kandahar</em>
<div class="download-link-div">
<a class="download-link" rel="nofollow" title="Downloaded 26754 times (Shapefile, geoDB, or TIFF format)" onclick="if (window.urchinTracker) urchinTracker ('http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip');" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_populated_places.zip');">Download populated places</a> <span class="download-link-span">(1.51 MB) version 2.0.0</span>
</div> <div class="download-link-div">
<a class="download-link" rel="nofollow" title="Downloaded 2515 times (Shapefile, geoDB, or TIFF format)" onclick="if (window.urchinTracker) urchinTracker ('http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip');" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_populated_places_simple.zip');">Download simple (less columns)</a> <span class="download-link-span">(719.87 KB) version 2.0.0</span>
</div>
<span id="more-472"></span></div>
</div>
<div class="downloadMainBlock" style="float: left;">
<p><strong>About</strong></p>
<p>Point symbols with name attributes. Includes all admin-0 and many admin-1 capitals, major cities and towns, plus a sampling of smaller towns in sparsely inhabited regions. We favor regional significance over population census in determining our selection of places. Use the scale rankings to filter the number of towns that appear on your map.</p>
<p><img class="alignnone size-full wp-image-1920" title="pop_banner" src="http://www.naturalearthdata.com/wp-content/uploads/2009/09/pop_banner.png" alt="pop_banner" width="580" height="150" /></p>
<p><a href="http://www.ornl.gov/sci/landscan/" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.ornl.gov/sci/landscan/');">LandScan</a> derived population estimates are provided for 90% of our cities. Those lacking population estimates are often in sparsely inhabited areas. We provide a range of population values that account for the total &#8220;metropolitan&#8221; population rather than it&#8217;s administrative boundary population. Use the PopMax column to size your town labels. Starting in version 1.1, popMax has been throttled down to the UN estimated metro population for the ~500 largest urban areas in the world. This affects towns in China, India, and parts of Africa where our Landscan counting method usually over estimated.</p>
<p>Population estimates were derived from the LANDSCAN dataset maintained and distributed by the Oak Ridge National Laboratory. These data were converted from raster to vector and pixels with fewer than 200 persons per square kilometer were removed from the dataset as they were classified as rural. Once urban pixels were selected, these pixels were aggregated into contiguous units. Concurrently Thiessen polygons were created based on the selected city points. The Thiessen polygons were used to intersect the contiguous city boundaries to produce bounded areas for the cities. As a result, our estimates capture a metropolitan and micropolitan populations per city regardless of administrative units.</p>
<p>Once intersected, the contiguous polygons were recalculated, using aerial interpolation assuming uniform population distribution within each pixel, to determine the population total. This process was conducted multiple times, for each scale level, to produce population estimates for each city at nested scales of 1:300 million, 1:110 million, 1:50 million, 1:20 million, and 1:10 million. </p>
<div class="download-link-div">
<a class="download-link" rel="nofollow" title="Downloaded 481 times (Shapefile, geoDB, or TIFF format)" onclick="if (window.urchinTracker) urchinTracker ('http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_urban_areas_landscan.zip');" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_urban_areas_landscan.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_urban_areas_landscan.zip');">Download landscan urban areas</a> <span class="download-link-span">(13.01 MB) version 2.0.0</span>
</div>
<p><strong>Population ranks</strong></p>
<p>Are calculated as rank_max and rank_min using this general VB formula that can be pasted into ArcMap Field Calculator advanced area (set your output to x):</p>
<blockquote><p>
a = [pop_max]</p>
<p>if( a > 10000000 ) then
x = 14
elseif( a > 5000000 ) then
x = 13
elseif( a > 1000000 ) then
x = 12
elseif( a > 500000 ) then
x = 11
elseif( a > 200000 ) then
x = 10
elseif( a > 100000 ) then
x = 9
elseif( a > 50000 ) then
x = 8
elseif( a > 20000 ) then
x = 7
elseif( a > 10000 ) then
x = 6
elseif( a > 5000 ) then
x = 5
elseif( a > 2000 ) then
x = 4
elseif( a > 1000 ) then
x = 3
elseif( a > 200 ) then
x = 2
elseif( a > 0 ) then
x = 1
else
x = 0
end if</p></blockquote>
<p><strong>Issues</strong></p>
<p>While we don&#8217;t want to show every admin-1 capital, for those countries where we show most admin-1 capitals, we should have a complete set. If you find we are missing one, please log it in the Cx tool at right.</p>
<p><strong>Version History</strong></p>
<ul>
<li>
<a rel="nofollow" title="Download version 2.0.0 of ne_10m_populated_places.zip" href="http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip" onclick="javascript:pageTracker._trackPageview('/downloads/http///download/10m/cultural/ne_10m_populated_places.zip');">2.0.0</a>
</li>
<li>
1.4.0
</li>
<li>
1.3.0
</li>
<li>
1.1.0
</li>
<li>
0.9.0
</li>
</ul>
<p><a href="https://github.com/nvkelso/natural-earth-vector/blob/master/CHANGELOG" onclick="javascript:pageTracker._trackPageview('/outbound/article/https://github.com/nvkelso/natural-earth-vector/blob/master/CHANGELOG');">The master changelog is available on Github »</a>
</div>
<div class="sociable">
<div class="sociable_tagline">
<strong>Share and Enjoy:</strong>
</div>
<ul>
<li class="sociablefirst"><a rel="nofollow" target="_blank" href="http://twitter.com/home?status=Populated%20Places%20-%20http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://twitter.com/home?status=Populated%20Places%20-%20http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F');" title="Twitter"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Twitter" alt="Twitter" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-343px -55px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://www.facebook.com/share.php?u=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;t=Populated%20Places" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.facebook.com/share.php?u=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;t=Populated%20Places');" title="Facebook"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Facebook" alt="Facebook" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-343px -1px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;bodytext=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;bodytext=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="Digg"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Digg" alt="Digg" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-235px -1px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://delicious.com/post?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;notes=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://delicious.com/post?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;notes=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="del.icio.us"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="del.icio.us" alt="del.icio.us" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-199px -1px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://www.google.com/bookmarks/mark?op=edit&amp;bkmk=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;annotation=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.google.com/bookmarks/mark?op=edit&amp;bkmk=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;annotation=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="Google Bookmarks"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Google Bookmarks" alt="Google Bookmarks" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-91px -19px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://slashdot.org/bookmark.pl?title=Populated%20Places&amp;url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://slashdot.org/bookmark.pl?title=Populated%20Places&amp;url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F');" title="Slashdot"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Slashdot" alt="Slashdot" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-145px -55px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.stumbleupon.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places');" title="StumbleUpon"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="StumbleUpon" alt="StumbleUpon" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-217px -55px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="mailto:?subject=Populated%20Places&amp;body=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F" title="email"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="email" alt="email" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-325px -1px" class="sociable-hovers" /></a></li>
<li><a rel="nofollow" target="_blank" href="http://www.linkedin.com/shareArticle?mini=true&amp;url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;source=Natural+Earth+Free+vector+and+raster+map+data+at+1%3A10m%2C+1%3A50m%2C+and+1%3A110m+scales&amp;summary=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://www.linkedin.com/shareArticle?mini=true&amp;url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places&amp;source=Natural+Earth+Free+vector+and+raster+map+data+at+1%3A10m%2C+1%3A50m%2C+and+1%3A110m+scales&amp;summary=%0D%0A%0D%0ACity%20and%20town%20points%2C%20from%20Tokyo%20to%20Wasilla%2C%20Cairo%20to%20Kandahar%0D%0A%5Bdrain%20file%2039%20show%20nev_download%5D%20%5Bdrain%20file%20224%20show%20nev_download%5D%0D%0A%0D%0A%0D%0A%0D%0A%0D%0AAbout%0D%0A%0D%0APoint%20symbols%20with%20name%20attributes.%20Includes%20all%20admin-0%20and%20many%20admin-1%20capitals%2C%20major%20citie');" title="LinkedIn"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="LinkedIn" alt="LinkedIn" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-1px -37px" class="sociable-hovers" /></a></li>
<li class="sociablelast"><a rel="nofollow" target="_blank" href="http://reddit.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places" onclick="javascript:pageTracker._trackPageview('/outbound/article/http://reddit.com/submit?url=http%3A%2F%2Fwww.naturalearthdata.com%2Fdownloads%2F10m-cultural-vectors%2F10m-populated-places%2F&amp;title=Populated%20Places');" title="Reddit"><img src="http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.gif" title="Reddit" alt="Reddit" style="width: 16px; height: 16px; background: transparent url(http://www.naturalearthdata.com/wp-content/plugins/sociable/images/services-sprite.png) no-repeat; background-position:-55px -55px" class="sociable-hovers" /></a></li>
</ul>
</div>
</div>
</div>
</div>
<div id="sidebar">
<ul><li id='text-5' class='widget widget_text'><h2 class="widgettitle">Stay up to Date</h2>
<div class="textwidget"> Know when a new version of Natural Earth is released by subscribing to our <a href="http://www.naturalearthdata.com/updates/" class="up-to-date-link" >announcement list</a>.</div>
</li></ul><ul><li id='text-2' class='widget widget_text'><h2 class="widgettitle">Find a Problem?</h2>
<div class="textwidget"><div>
<div style="float:left; width:65px;"><a href="/corrections/index.php?a=add"><img class="alignleft" title="New Ticket" src="http://www.naturalearthdata.com/corrections/img/newticket.png" alt="" width="60" height="60" /></a></div><div class="textwidget" style="float:left;width:120px; font-size:1.2em; font-size-adjust:none; font-style:normal;
font-variant:normal; font-weight:normal; line-height:normal;">Submit suggestions and bug reports via our <a href="/corrections/index.php?a=add">correction system</a> and track the progress of your edits.</div>
</div></div>
</li></ul><ul><li id='text-3' class='widget widget_text'><h2 class="widgettitle">Join Our Community</h2>
<div class="textwidget"><div>
<div style="float:left; width:65px;"><a href="/forums/"><img src="http://www.naturalearthdata.com/wp-content/uploads/2009/08/green_globe_chat_bubble_562e.png" alt="forums" title="Chat in the forum!" width="50" height="50" /></a></div><div class="textwidget" style="float:left;width:120px; font-size:1.2em; font-size-adjust:none; font-style:normal;
font-variant:normal; font-weight:normal; line-height:normal;">Talk back and discuss Natural Earth in the <a href="/forums/">Forums</a>.</div>
</div></div>
</li></ul><ul><li id='text-4' class='widget widget_text'><h2 class="widgettitle">Thank You</h2>
<div class="textwidget">Our data downloads are generously hosted by Florida State University.</div>
</li></ul> </div>
</div>
<hr />
<div id="footer">
<div id="footerarea">
<div id="footerlogos">
<p>Supported by:</p>
<div class="footer-ad-box">
<a href="http://www.nacis.org" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/nacis.png" alt="NACIS" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.cartotalk.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/cartotalk_ad.png" alt="Cartotalk" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.mapgiving.org" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/mapgiving.png" alt="Mapgiving" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.geography.wisc.edu/cartography/" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/wisconsin.png" alt="University of Wisconsin Madison - Cartography Dept." /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.shadedrelief.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/shaded_relief.png" alt="Shaded Relief" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.xnrproductions.com " target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/xnr.png" alt="XNR Productions" /></a>
</div>
<p style="clear:both;"></p>
<div class="footer-ad-box">
<a href="http://www.freac.fsu.edu" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/fsu.png" alt="Florida State University - FREAC" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.springercartographics.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/scllc.png" alt="Springer Cartographics LLC" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.washingtonpost.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/wpost.png" alt="Washington Post" /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.redgeographics.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/redgeo.png" alt="Red Geographics" /></a>
</div>
<div class="footer-ad-box">
<a href="http://kelsocartography.com/blog " target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/kelso.png" alt="Kelso Cartography" /></a>
</div>
<p style="clear:both;"></p>
<div class="footer-ad-box">
<a href="http://www.avenza.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/avenza.png" alt="Avenza Systems Inc." /></a>
</div>
<div class="footer-ad-box">
<a href="http://www.stamen.com" target="_blank"><img src="http://www.naturalearthdata.com/wp-content/themes/NEV/images/stamen_ne_logo.png" alt="Stamen Design" /></a>
</div>
</div>
<p style="clear:both;"></p>
<span id="footerleft">
&copy; 2012. Natural Earth. All rights reserved.
</span>
<span id="footerright">
<!-- Please help promote WordPress and simpleX. Do not remove -->
<div>Powered by <a href="http://wordpress.org/">WordPress</a></div>
<div><a href="http://www.naturalearthdata.com/wp-admin">Staff Login &raquo;</a></div>
</span>
</div>
</div>
<!-- Google Analytics for WordPress | http://yoast.com/wordpress/google-analytics/ -->
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
</script>
<script type="text/javascript">
try {
var pageTracker = _gat._getTracker("UA-10168306-1");
} catch(err) {}
</script>
<script src="http://www.naturalearthdata.com/wp-content/plugins/google-analytics-for-wordpress/custom_se.js" type="text/javascript"></script>
<script type="text/javascript">
try {
// Cookied already:
pageTracker._trackPageview();
} catch(err) {}
</script>
<!-- End of Google Analytics code -->
</body>
</html>

View File

@@ -1 +0,0 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]

View File

@@ -1,128 +0,0 @@
"""
Script that generates the included dataset 'naturalearth_lowres.shp'
and 'naturalearth_cities.shp'.
Raw data: https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip
Current version used: see code
"""
import geopandas as gpd
import requests
from pathlib import Path
from zipfile import ZipFile
import tempfile
from shapely.geometry import box
version = "latest"
urlbase = "https://www.naturalearthdata.com/"
urlbase += "http//www.naturalearthdata.com/download/110m/cultural/"
def countries_override(world_raw):
# not ideal - fix some country codes
mask = world_raw["ISO_A3"].eq("-99") & world_raw["TYPE"].isin(
["Sovereign country", "Country"]
)
world_raw.loc[mask, "ISO_A3"] = world_raw.loc[mask, "ADM0_A3"]
# backwards compatibility
return world_raw.rename(columns={"GDP_MD": "GDP_MD_EST"})
# any change between versions?
def df_same(new, old, dataset, log):
assert (new.columns == old.columns).all(), "columns should be the same"
if new.shape != old.shape:
dfc = old.merge(new, on="name", how="outer", suffixes=("_old", "_new")).loc[
lambda d: d.isna().any(axis=1)
]
log.append(f"### {dataset} row count changed ###\n{dfc.to_markdown()}")
return False
dfc = new.compare(old)
if len(dfc) > 0:
log.append(f"### {dataset} data changed ###\n{dfc.to_markdown()}")
return len(dfc) == 0
config = [
{
"file": "ne_110m_populated_places.zip",
"cols": ["NAME", "geometry"],
"current": gpd.datasets.get_path("naturalearth_cities"),
},
{
"file": "ne_110m_admin_0_countries.zip",
"cols": ["POP_EST", "CONTINENT", "NAME", "ISO_A3", "GDP_MD_EST", "geometry"],
"override": countries_override,
"current": gpd.datasets.get_path("naturalearth_lowres"),
},
]
downloads = {}
log = []
for dl in config:
with tempfile.TemporaryDirectory() as tmpdirname:
url = urlbase + dl["file"]
r = requests.get(
url,
stream=True,
headers={"User-Agent": "XY"},
params=None if version == "latest" else {"version": version},
)
assert (
r.status_code == 200
), f"version: {version} does not exist. status: {r.status_code}"
f = Path(tmpdirname).joinpath(dl["file"])
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
# extract the natural earth version
z = ZipFile(f)
version_f = [i for i in z.infolist() if "VERSION" in i.filename]
assert len(version_f) == 1, "failed to find VERSION file"
with open(z.extract(version_f[0], Path(tmpdirname).joinpath("v.txt"))) as f_:
dl_version = f_.read().strip()
# extract geodataframe from zip
gdf = gpd.read_file(f)
# maintain structure that geopandas distributes
if "override" in dl.keys():
gdf = dl["override"](gdf)
gdf = gdf.loc[:, dl["cols"]]
gdf = gdf.rename(columns={c: c.lower() for c in gdf.columns})
# override Crimea #2382
if dl["file"] == "ne_110m_admin_0_countries.zip":
crimean_bbox = box(32.274, 44.139, 36.65, 46.704)
crimea_only = (
gdf.loc[gdf.name == "Russia", "geometry"]
.iloc[0]
.intersection(crimean_bbox)
)
complete_ukraine = (
gdf.loc[gdf.name == "Ukraine", "geometry"].iloc[0].union(crimea_only)
)
correct_russia = (
gdf.loc[gdf.name == "Russia", "geometry"]
.iloc[0]
.difference(crimean_bbox)
)
r_ix = gdf.loc[gdf.name == "Russia"].index[0]
gdf.at[r_ix, "geometry"] = correct_russia
u_ix = gdf.loc[gdf.name == "Ukraine"].index[0]
gdf.at[u_ix, "geometry"] = complete_ukraine
# get changes between current version and new version
if not df_same(gdf, gpd.read_file(dl["current"]), dl["file"], log):
downloads[dl["file"]] = gdf
# create change log that can be pasted into PR
with open(f"CHANGE_{dl_version}.md", "w") as f:
f.write("\n\n".join(log))
# save downloaded geodataframe to appropriate place
for k, gdf_ in downloads.items():
f = [Path(c["current"]) for c in config if c["file"] == k][0]
gdf_.to_file(driver="ESRI Shapefile", filename=Path(f.parent.name).joinpath(f.name))

View File

@@ -1 +0,0 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]

View File

@@ -1,11 +1,14 @@
import warnings
from packaging.version import Version
from statistics import mean
import geopandas
from shapely.geometry import LineString
import numpy as np
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype
from packaging.version import Version
from shapely.geometry import LineString
import geopandas
_MAP_KWARGS = [
"location",
@@ -277,13 +280,14 @@ def _explore(
return cm.get_cmap(_cmap, n_resample)(idx)
try:
import re
import branca as bc
import folium
import re
import matplotlib
from matplotlib import colors
import matplotlib.pyplot as plt
from mapclassify import classify
from matplotlib import colors
# isolate MPL version - GH#2596
MPL_361 = Version(matplotlib.__version__) >= Version("3.6.1")
@@ -316,6 +320,8 @@ def _explore(
gdf.geometry[rings_mask] = gdf.geometry[rings_mask].apply(
lambda g: LineString(g)
)
if isinstance(gdf, geopandas.GeoSeries):
gdf = gdf.to_frame()
if gdf.crs is None:
kwargs["crs"] = "Simple"
@@ -323,12 +329,25 @@ def _explore(
elif not gdf.crs.equals(4326):
gdf = gdf.to_crs(4326)
# Fields which are not JSON serializable are coerced to strings
json_not_supported_cols = gdf.columns[
[is_datetime64_any_dtype(gdf[c]) for c in gdf.columns]
].union(gdf.columns[gdf.dtypes == "object"])
if len(json_not_supported_cols) > 0:
gdf = gdf.astype({c: "string" for c in json_not_supported_cols})
if not isinstance(gdf.index, pd.MultiIndex) and (
is_datetime64_any_dtype(gdf.index) or (gdf.index.dtype == "object")
):
gdf.index = gdf.index.astype("string")
# create folium.Map object
if m is None:
# Get bounds to specify location and map extent
bounds = gdf.total_bounds
location = kwargs.pop("location", None)
if location is None:
if location is None and not np.isnan(bounds).all():
x = mean([bounds[0], bounds[2]])
y = mean([bounds[1], bounds[3]])
location = (y, x)
@@ -381,6 +400,15 @@ def _explore(
if fit:
m.fit_bounds([[bounds[1], bounds[0]], [bounds[3], bounds[2]]])
if gdf.is_empty.all():
warnings.warn(
"The GeoSeries you are attempting to plot is "
"composed of empty geometries. Nothing has been displayed.",
UserWarning,
stacklevel=3,
)
return m
for map_kwd in _MAP_KWARGS:
kwargs.pop(map_kwd, None)
@@ -618,11 +646,13 @@ def _explore(
tooltip = None
popup = None
# escape the curly braces {{}} for jinja2 templates
feature_collection = gdf.__geo_interface__
feature_collection = gdf[
~(gdf.geometry.isna() | gdf.geometry.is_empty) # drop missing or empty geoms
].__geo_interface__
for feature in feature_collection["features"]:
for k in feature["properties"]:
# escape the curly braces in values
if type(feature["properties"][k]) == str:
if isinstance(feature["properties"][k], str):
feature["properties"][k] = re.sub(
r"\{{2,}",
lambda x: "{% raw %}" + x.group(0) + "{% endraw %}",

File diff suppressed because it is too large Load Diff

View File

@@ -1,24 +1,23 @@
from __future__ import annotations
import json
import typing
from typing import Optional, Any, Callable, Dict
import warnings
from packaging.version import Version
from typing import Any, Callable, Dict, Optional
import numpy as np
import pandas as pd
from pandas import Series, MultiIndex
from pandas import Series
from pandas.core.internals import SingleBlockManager
from pyproj import CRS
import shapely
from shapely.geometry.base import BaseGeometry
from shapely.geometry import GeometryCollection
from shapely.geometry.base import BaseGeometry
from geopandas.base import GeoPandasBase, _delegate_property
from geopandas.plotting import plot_series
from geopandas.explore import _explore_geoseries
import geopandas
from geopandas.base import GeoPandasBase, _delegate_property
from geopandas.explore import _explore_geoseries
from geopandas.plotting import plot_series
from . import _compat as compat
from ._decorator import doc
@@ -51,20 +50,16 @@ def _geoseries_constructor_with_fallback(
return Series(data=data, index=index, **kwargs)
def _geoseries_expanddim(data=None, *args, **kwargs):
def _expanddim_logic(df):
"""Shared logic for _constructor_expanddim and _constructor_from_mgr_expanddim."""
from geopandas import GeoDataFrame
# pd.Series._constructor_expanddim == pd.DataFrame
df = pd.DataFrame(data, *args, **kwargs)
geo_col_name = None
if isinstance(data, GeoSeries):
# pandas default column name is 0, keep convention
geo_col_name = data.name if data.name is not None else 0
if df.shape[1] == 1:
geo_col_name = df.columns[0]
if (df.dtypes == "geometry").sum() > 0:
if df.shape[1] == 1:
geo_col_name = df.columns[0]
else:
geo_col_name = None
if geo_col_name is None or not is_geometry_type(df[geo_col_name]):
df = GeoDataFrame(df)
df._geometry_column_name = None
@@ -74,6 +69,13 @@ def _geoseries_expanddim(data=None, *args, **kwargs):
return df
def _geoseries_expanddim(data=None, *args, **kwargs):
# pd.Series._constructor_expanddim == pd.DataFrame, we start
# with that then specialize.
df = pd.DataFrame(data, *args, **kwargs)
return _expanddim_logic(df)
class GeoSeries(GeoPandasBase, Series):
"""
A Series object designed to store shapely geometry objects.
@@ -99,9 +101,9 @@ class GeoSeries(GeoPandasBase, Series):
>>> from shapely.geometry import Point
>>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s
0 POINT (1.00000 1.00000)
1 POINT (2.00000 2.00000)
2 POINT (3.00000 3.00000)
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry
>>> s = geopandas.GeoSeries(
@@ -127,9 +129,9 @@ class GeoSeries(GeoPandasBase, Series):
... [Point(1, 1), Point(2, 2), Point(3, 3)], index=["a", "b", "c"], crs=4326
... )
>>> s
a POINT (1.00000 1.00000)
b POINT (2.00000 2.00000)
c POINT (3.00000 3.00000)
a POINT (1 1)
b POINT (2 2)
c POINT (3 3)
dtype: geometry
>>> s.crs
@@ -152,11 +154,13 @@ class GeoSeries(GeoPandasBase, Series):
"""
_metadata = ["name"]
def __init__(self, data=None, index=None, crs: Optional[Any] = None, **kwargs):
if hasattr(data, "crs") and crs:
if not data.crs:
if (
hasattr(data, "crs")
or (isinstance(data, pd.Series) and hasattr(data.array, "crs"))
) and crs:
data_crs = data.crs if hasattr(data, "crs") else data.array.crs
if not data_crs:
# make a copy to avoid setting CRS to passed GeometryArray
data = data.copy()
else:
@@ -189,7 +193,7 @@ class GeoSeries(GeoPandasBase, Series):
# https://github.com/pandas-dev/pandas/issues/26469
kwargs.pop("dtype", None)
# Use Series constructor to handle input data
with compat.ignore_shapely2_warnings():
with warnings.catch_warnings():
# suppress additional warning from pandas for empty data
# (will always give object dtype instead of float dtype in the future,
# making the `if s.empty: s = s.astype(object)` below unnecessary)
@@ -207,9 +211,16 @@ class GeoSeries(GeoPandasBase, Series):
"Non geometry data passed to GeoSeries constructor, "
f"received data of dtype '{s.dtype}'"
)
# try to convert to GeometryArray, if fails return plain Series
# extract object-dtype numpy array from pandas Series; with CoW this
# gives a read-only array, so we try to set the flag back to writeable
data = s.to_numpy()
try:
data = from_shapely(s.values, crs)
data.flags.writeable = True
except ValueError:
pass
# try to convert to GeometryArray
try:
data = from_shapely(data, crs)
except TypeError:
raise TypeError(
"Non geometry data passed to GeoSeries constructor, "
@@ -225,6 +236,18 @@ class GeoSeries(GeoPandasBase, Series):
def append(self, *args, **kwargs) -> GeoSeries:
return self._wrapped_pandas_method("append", *args, **kwargs)
@GeoPandasBase.crs.setter
def crs(self, value):
if self.crs is not None:
warnings.warn(
"Overriding the CRS of a GeoSeries that already has CRS. "
"This unsafe behavior will be deprecated in future versions. "
"Use GeoSeries.set_crs method instead.",
stacklevel=2,
category=DeprecationWarning,
)
self.geometry.values.crs = value
@property
def geometry(self) -> GeoSeries:
return self
@@ -318,7 +341,7 @@ class GeoSeries(GeoPandasBase, Series):
"""Alternate constructor to create a ``GeoSeries`` from a file.
Can load a ``GeoSeries`` from a file from any format recognized by
`fiona`. See http://fiona.readthedocs.io/en/latest/manual.html for details.
`pyogrio`. See http://pyogrio.readthedocs.io/ for details.
From a file with attributes loads only geometry column. Note that to do
that, GeoPandas first loads the whole GeoDataFrame.
@@ -327,10 +350,10 @@ class GeoSeries(GeoPandasBase, Series):
filename : str
File path or file handle to read from. Depending on which kwargs
are included, the content of filename may vary. See
http://fiona.readthedocs.io/en/latest/README.html#usage for usage details.
:func:`pyogrio.read_dataframe` for usage details.
kwargs : key-word arguments
These arguments are passed to fiona.open, and can be used to
access multi-layer data, data stored within archives (zip files),
These arguments are passed to :func:`pyogrio.read_dataframe`, and can be
used to access multi-layer data, data stored within archives (zip files),
etc.
Examples
@@ -358,7 +381,7 @@ class GeoSeries(GeoPandasBase, Series):
@classmethod
def from_wkb(
cls, data, index=None, crs: Optional[Any] = None, **kwargs
cls, data, index=None, crs: Optional[Any] = None, on_invalid="raise", **kwargs
) -> GeoSeries:
"""
Alternate constructor to create a ``GeoSeries``
@@ -375,6 +398,12 @@ class GeoSeries(GeoPandasBase, Series):
accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
on_invalid: {"raise", "warn", "ignore"}, default "raise"
- raise: an exception will be raised if a WKB input geometry is invalid.
- warn: a warning will be raised and invalid WKB geometries will be returned
as None.
- ignore: invalid WKB geometries will be returned as None without a warning.
kwargs
Additional arguments passed to the Series constructor,
e.g. ``name``.
@@ -388,11 +417,13 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.from_wkt
"""
return cls._from_wkb_or_wkb(from_wkb, data, index=index, crs=crs, **kwargs)
return cls._from_wkb_or_wkt(
from_wkb, data, index=index, crs=crs, on_invalid=on_invalid, **kwargs
)
@classmethod
def from_wkt(
cls, data, index=None, crs: Optional[Any] = None, **kwargs
cls, data, index=None, crs: Optional[Any] = None, on_invalid="raise", **kwargs
) -> GeoSeries:
"""
Alternate constructor to create a ``GeoSeries``
@@ -409,6 +440,13 @@ class GeoSeries(GeoPandasBase, Series):
accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
on_invalid : {"raise", "warn", "ignore"}, default "raise"
- raise: an exception will be raised if a WKT input geometry is invalid.
- warn: a warning will be raised and invalid WKT geometries will be
returned as ``None``.
- ignore: invalid WKT geometries will be returned as ``None`` without a
warning.
kwargs
Additional arguments passed to the Series constructor,
e.g. ``name``.
@@ -431,12 +469,14 @@ class GeoSeries(GeoPandasBase, Series):
... ]
>>> s = geopandas.GeoSeries.from_wkt(wkts)
>>> s
0 POINT (1.00000 1.00000)
1 POINT (2.00000 2.00000)
2 POINT (3.00000 3.00000)
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry
"""
return cls._from_wkb_or_wkb(from_wkt, data, index=index, crs=crs, **kwargs)
return cls._from_wkb_or_wkt(
from_wkt, data, index=index, crs=crs, on_invalid=on_invalid, **kwargs
)
@classmethod
def from_xy(cls, x, y, z=None, index=None, crs=None, **kwargs) -> GeoSeries:
@@ -478,9 +518,9 @@ class GeoSeries(GeoPandasBase, Series):
>>> y = [0.5, 1, 1.5]
>>> s = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326")
>>> s
0 POINT (2.50000 0.50000)
1 POINT (5.00000 1.00000)
2 POINT (-3.00000 1.50000)
0 POINT (2.5 0.5)
1 POINT (5 1)
2 POINT (-3 1.5)
dtype: geometry
"""
if index is None:
@@ -494,12 +534,13 @@ class GeoSeries(GeoPandasBase, Series):
return cls(points_from_xy(x, y, z, crs=crs), index=index, crs=crs, **kwargs)
@classmethod
def _from_wkb_or_wkb(
def _from_wkb_or_wkt(
cls,
from_wkb_or_wkt_function: Callable,
data,
index=None,
crs: Optional[Any] = None,
on_invalid: str = "raise",
**kwargs,
) -> GeoSeries:
"""Create a GeoSeries from either WKT or WKB values"""
@@ -509,7 +550,46 @@ class GeoSeries(GeoPandasBase, Series):
else:
index = data.index
data = data.values
return cls(from_wkb_or_wkt_function(data, crs=crs), index=index, **kwargs)
return cls(
from_wkb_or_wkt_function(data, crs=crs, on_invalid=on_invalid),
index=index,
**kwargs,
)
@classmethod
def from_arrow(cls, arr, **kwargs) -> GeoSeries:
"""
Construct a GeoSeries from a Arrow array object with a GeoArrow
extension type.
See https://geoarrow.org/ for details on the GeoArrow specification.
This functions accepts any Arrow array object implementing
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
method).
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
.. versionadded:: 1.0
Parameters
----------
arr : pyarrow.Array, Arrow array
Any array object implementing the Arrow PyCapsule Protocol
(i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__``
method). The type of the array should be one of the
geoarrow geometry types.
**kwargs
Other parameters passed to the GeoSeries constructor.
Returns
-------
GeoSeries
"""
from geopandas.io._geoarrow import arrow_to_geometry_array
return cls(arrow_to_geometry_array(arr), **kwargs)
@property
def __geo_interface__(self) -> Dict:
@@ -548,7 +628,7 @@ class GeoSeries(GeoPandasBase, Series):
"""Write the ``GeoSeries`` to a file.
By default, an ESRI shapefile is written, but any OGR data source
supported by Fiona can be written.
supported by Pyogrio or Fiona can be written.
Parameters
----------
@@ -578,18 +658,19 @@ class GeoSeries(GeoPandasBase, Series):
will determine the crs based on crs df attribute.
The value can be anything accepted
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
engine : str, "fiona" or "pyogrio"
such as an authority string (eg "EPSG:4326") or a WKT string. The keyword
is not supported for the "pyogrio" engine.
engine : str, "pyogrio" or "fiona"
The underlying library that is used to write the file. Currently, the
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
installed, otherwise tries "pyogrio".
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
installed, otherwise tries "fiona".
**kwargs :
Keyword args to be passed to the engine, and can be used to write
to multi-layer data, store data within archives (zip files), etc.
In case of the "fiona" engine, the keyword arguments are passed to
fiona.open`. For more information on possible keywords, type:
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
the keyword arguments are passed to `pyogrio.write_dataframe`.
In case of the "pyogrio" engine, the keyword arguments are passed to
`pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
arguments are passed to fiona.open`. For more information on possible
keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
See Also
--------
@@ -608,7 +689,6 @@ class GeoSeries(GeoPandasBase, Series):
from geopandas import GeoDataFrame
data = GeoDataFrame({"geometry": self}, index=self.index)
data.crs = self.crs
data.to_file(filename, driver, index=index, **kwargs)
#
@@ -619,10 +699,22 @@ class GeoSeries(GeoPandasBase, Series):
def _constructor(self):
return _geoseries_constructor_with_fallback
def _constructor_from_mgr(self, mgr, axes):
assert isinstance(mgr, SingleBlockManager)
if not isinstance(mgr.blocks[0].dtype, GeometryDtype):
return Series._from_mgr(mgr, axes)
return GeoSeries._from_mgr(mgr, axes)
@property
def _constructor_expanddim(self):
return _geoseries_expanddim
def _constructor_expanddim_from_mgr(self, mgr, axes):
df = pd.DataFrame._from_mgr(mgr, axes)
return _expanddim_logic(df)
def _wrapped_pandas_method(self, mtd, *args, **kwargs):
"""Wrap a generic pandas method to ensure it returns a GeoSeries"""
val = getattr(super(), mtd)(*args, **kwargs)
@@ -647,7 +739,7 @@ class GeoSeries(GeoPandasBase, Series):
return self._wrapped_pandas_method("select", *args, **kwargs)
@doc(pd.Series)
def apply(self, func, convert_dtype: bool = None, args=(), **kwargs):
def apply(self, func, convert_dtype: Optional[bool] = None, args=(), **kwargs):
if convert_dtype is not None:
kwargs["convert_dtype"] = convert_dtype
else:
@@ -686,10 +778,11 @@ class GeoSeries(GeoPandasBase, Series):
... [Polygon([(0, 0), (1, 1), (0, 1)]), None, Polygon([])]
... )
>>> s
0 POLYGON ((0.00000 0.00000, 1.00000 1.00000, 0....
1 None
2 POLYGON EMPTY
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 None
2 POLYGON EMPTY
dtype: geometry
>>> s.isna()
0 False
1 True
@@ -730,10 +823,11 @@ class GeoSeries(GeoPandasBase, Series):
... [Polygon([(0, 0), (1, 1), (0, 1)]), None, Polygon([])]
... )
>>> s
0 POLYGON ((0.00000 0.00000, 1.00000 1.00000, 0....
1 None
2 POLYGON EMPTY
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 None
2 POLYGON EMPTY
dtype: geometry
>>> s.notna()
0 True
1 False
@@ -765,12 +859,10 @@ class GeoSeries(GeoPandasBase, Series):
"""Alias for `notna` method. See `notna` for more detail."""
return self.notna()
def fillna(self, value=None, method=None, inplace: bool = False, **kwargs):
def fillna(self, value=None, inplace: bool = False, limit=None, **kwargs):
"""
Fill NA values with geometry (or geometries).
``method`` is currently not implemented.
Parameters
----------
value : shapely geometry or GeoSeries, default None
@@ -780,6 +872,9 @@ class GeoSeries(GeoPandasBase, Series):
are passed, missing values will be filled based on the corresponding index
locations. If pd.NA or np.nan are passed, values will be filled with
``None`` (not GEOMETRYCOLLECTION EMPTY).
limit : int, default None
This is the maximum number of entries along the entire axis
where NaNs will be filled. Must be greater than 0 if not None.
Returns
-------
@@ -796,25 +891,25 @@ class GeoSeries(GeoPandasBase, Series):
... ]
... )
>>> s
0 POLYGON ((0.00000 0.00000, 1.00000 1.00000, 0....
1 None
2 POLYGON ((0.00000 0.00000, -1.00000 1.00000, 0...
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 None
2 POLYGON ((0 0, -1 1, 0 -1, 0 0))
dtype: geometry
Filled with an empty polygon.
>>> s.fillna()
0 POLYGON ((0.00000 0.00000, 1.00000 1.00000, 0....
1 GEOMETRYCOLLECTION EMPTY
2 POLYGON ((0.00000 0.00000, -1.00000 1.00000, 0...
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 GEOMETRYCOLLECTION EMPTY
2 POLYGON ((0 0, -1 1, 0 -1, 0 0))
dtype: geometry
Filled with a specific polygon.
>>> s.fillna(Polygon([(0, 1), (2, 1), (1, 2)]))
0 POLYGON ((0.00000 0.00000, 1.00000 1.00000, 0....
1 POLYGON ((0.00000 1.00000, 2.00000 1.00000, 1....
2 POLYGON ((0.00000 0.00000, -1.00000 1.00000, 0...
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 POLYGON ((0 1, 2 1, 1 2, 0 1))
2 POLYGON ((0 0, -1 1, 0 -1, 0 0))
dtype: geometry
Filled with another GeoSeries.
@@ -828,9 +923,9 @@ class GeoSeries(GeoPandasBase, Series):
... ]
... )
>>> s.fillna(s_fill)
0 POLYGON ((0.00000 0.00000, 1.00000 1.00000, 0....
1 POINT (1.00000 1.00000)
2 POLYGON ((0.00000 0.00000, -1.00000 1.00000, 0...
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
1 POINT (1 1)
2 POLYGON ((0 0, -1 1, 0 -1, 0 0))
dtype: geometry
See Also
@@ -838,8 +933,8 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.isna : detect missing values
"""
if value is None:
value = GeometryCollection() if compat.SHAPELY_GE_20 else BaseGeometry()
return super().fillna(value=value, method=method, inplace=inplace, **kwargs)
value = GeometryCollection()
return super().fillna(value=value, limit=limit, inplace=inplace, **kwargs)
def __contains__(self, other) -> bool:
"""Allow tests of the form "geom in s"
@@ -862,7 +957,7 @@ class GeoSeries(GeoPandasBase, Series):
"""Interactive map based on folium/leaflet.js"""
return _explore_geoseries(self, *args, **kwargs)
def explode(self, ignore_index=False, index_parts=None) -> GeoSeries:
def explode(self, ignore_index=False, index_parts=False) -> GeoSeries:
"""
Explode multi-part geometries into multiple single geometries.
@@ -875,7 +970,7 @@ class GeoSeries(GeoPandasBase, Series):
ignore_index : bool, default False
If True, the resulting index will be labelled 0, 1, …, n - 1,
ignoring `index_parts`.
index_parts : boolean, default True
index_parts : boolean, default False
If True, the resulting index will be a multi-index (original
index with an additional level indicating the multiple
geometries: a new zero-based index for each single part geometry
@@ -894,16 +989,16 @@ class GeoSeries(GeoPandasBase, Series):
... [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])]
... )
>>> s
0 MULTIPOINT (0.00000 0.00000, 1.00000 1.00000)
1 MULTIPOINT (2.00000 2.00000, 3.00000 3.00000, ...
0 MULTIPOINT ((0 0), (1 1))
1 MULTIPOINT ((2 2), (3 3), (4 4))
dtype: geometry
>>> s.explode(index_parts=True)
0 0 POINT (0.00000 0.00000)
1 POINT (1.00000 1.00000)
1 0 POINT (2.00000 2.00000)
1 POINT (3.00000 3.00000)
2 POINT (4.00000 4.00000)
0 0 POINT (0 0)
1 POINT (1 1)
1 0 POINT (2 2)
1 POINT (3 3)
2 POINT (4 4)
dtype: geometry
See also
@@ -913,70 +1008,21 @@ class GeoSeries(GeoPandasBase, Series):
"""
from .base import _get_index_for_parts
if index_parts is None and not ignore_index:
warnings.warn(
"Currently, index_parts defaults to True, but in the future, "
"it will default to False to be consistent with Pandas. "
"Use `index_parts=True` to keep the current behavior and True/False "
"to silence the warning.",
FutureWarning,
stacklevel=2,
)
index_parts = True
geometries, outer_idx = shapely.get_parts(self.values._data, return_index=True)
if compat.USE_SHAPELY_20 or (compat.USE_PYGEOS and compat.PYGEOS_GE_09):
if compat.USE_SHAPELY_20:
geometries, outer_idx = shapely.get_parts(
self.values._data, return_index=True
)
else:
import pygeos
geometries, outer_idx = pygeos.get_parts(
self.values._data, return_index=True
)
index = _get_index_for_parts(
self.index,
outer_idx,
ignore_index=ignore_index,
index_parts=index_parts,
)
return GeoSeries(geometries, index=index, crs=self.crs).__finalize__(self)
# else PyGEOS is not available or version <= 0.8
index = []
geometries = []
for idx, s in self.geometry.items():
if s.geom_type.startswith("Multi") or s.geom_type == "GeometryCollection":
geoms = s.geoms
idxs = [(idx, i) for i in range(len(geoms))]
else:
geoms = [s]
idxs = [(idx, 0)]
index.extend(idxs)
geometries.extend(geoms)
if ignore_index:
index = range(len(geometries))
elif index_parts:
# if self.index is a MultiIndex then index is a list of nested tuples
if isinstance(self.index, MultiIndex):
index = [tuple(outer) + (inner,) for outer, inner in index]
index = MultiIndex.from_tuples(index, names=self.index.names + [None])
else:
index = [idx for idx, _ in index]
index = _get_index_for_parts(
self.index,
outer_idx,
ignore_index=ignore_index,
index_parts=index_parts,
)
return GeoSeries(geometries, index=index, crs=self.crs).__finalize__(self)
#
# Additional methods
#
@compat.requires_pyproj
def set_crs(
self,
crs: Optional[Any] = None,
@@ -987,12 +1033,16 @@ class GeoSeries(GeoPandasBase, Series):
"""
Set the Coordinate Reference System (CRS) of a ``GeoSeries``.
NOTE: The underlying geometries are not transformed to this CRS. To
Pass ``None`` to remove CRS from the ``GeoSeries``.
Notes
-----
The underlying geometries are not transformed to this CRS. To
transform the geometries to a new CRS, use the ``to_crs`` method.
Parameters
----------
crs : pyproj.CRS, optional if `epsg` is specified
crs : pyproj.CRS | None, optional
The value can be anything accepted
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
@@ -1015,9 +1065,9 @@ class GeoSeries(GeoPandasBase, Series):
>>> from shapely.geometry import Point
>>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s
0 POINT (1.00000 1.00000)
1 POINT (2.00000 2.00000)
2 POINT (3.00000 3.00000)
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry
Setting CRS to a GeoSeries without one:
@@ -1054,12 +1104,12 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.to_crs : re-project to another CRS
"""
from pyproj import CRS
if crs is not None:
crs = CRS.from_user_input(crs)
elif epsg is not None:
crs = CRS.from_epsg(epsg)
else:
raise ValueError("Must pass either crs or epsg.")
if not allow_override and self.crs is not None and not self.crs == crs:
raise ValueError(
@@ -1072,7 +1122,7 @@ class GeoSeries(GeoPandasBase, Series):
result = self.copy()
else:
result = self
result.crs = crs
result.array.crs = crs
return result
def to_crs(
@@ -1109,9 +1159,9 @@ class GeoSeries(GeoPandasBase, Series):
>>> from shapely.geometry import Point
>>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)], crs=4326)
>>> s
0 POINT (1.00000 1.00000)
1 POINT (2.00000 2.00000)
2 POINT (3.00000 3.00000)
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry
>>> s.crs # doctest: +SKIP
<Geographic 2D CRS: EPSG:4326>
@@ -1157,7 +1207,7 @@ class GeoSeries(GeoPandasBase, Series):
self.values.to_crs(crs=crs, epsg=epsg), index=self.index, name=self.name
)
def estimate_utm_crs(self, datum_name: str = "WGS 84") -> CRS:
def estimate_utm_crs(self, datum_name: str = "WGS 84"):
"""Returns the estimated UTM CRS based on the bounds of the dataset.
.. versionadded:: 0.9
@@ -1195,12 +1245,31 @@ class GeoSeries(GeoPandasBase, Series):
"""
return self.values.estimate_utm_crs(datum_name)
def to_json(self, **kwargs) -> str:
def to_json(
self,
show_bbox: bool = True,
drop_id: bool = False,
to_wgs84: bool = False,
**kwargs,
) -> str:
"""
Returns a GeoJSON string representation of the GeoSeries.
Parameters
----------
show_bbox : bool, optional, default: True
Include bbox (bounds) in the geojson
drop_id : bool, default: False
Whether to retain the index of the GeoSeries as the id property
in the generated GeoJSON. Default is False, but may want True
if the index is just arbitrary row numbers.
to_wgs84: bool, optional, default: False
If the CRS is set on the active geometry column it is exported as
WGS84 (EPSG:4326) to meet the `2016 GeoJSON specification
<https://tools.ietf.org/html/rfc7946>`_.
Set to True to force re-projection and set to False to ignore CRS. False by
default.
*kwargs* that will be passed to json.dumps().
Returns
@@ -1212,9 +1281,9 @@ class GeoSeries(GeoPandasBase, Series):
>>> from shapely.geometry import Point
>>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s
0 POINT (1.00000 1.00000)
1 POINT (2.00000 2.00000)
2 POINT (3.00000 3.00000)
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry
>>> s.to_json()
@@ -1229,7 +1298,9 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
--------
GeoSeries.to_file : write GeoSeries to file
"""
return json.dumps(self.__geo_interface__, **kwargs)
return self.to_frame("geometry").to_json(
na="null", show_bbox=show_bbox, drop_id=drop_id, to_wgs84=to_wgs84, **kwargs
)
def to_wkb(self, hex: bool = False, **kwargs) -> Series:
"""
@@ -1242,8 +1313,7 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
The default is to return a binary bytes object.
kwargs
Additional keyword args will be passed to
:func:`shapely.to_wkb` if shapely >= 2 is installed or
:func:`pygeos.to_wkb` if pygeos is installed.
:func:`shapely.to_wkb`.
Returns
-------
@@ -1263,8 +1333,7 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
Parameters
----------
kwargs
Keyword args will be passed to :func:`pygeos.to_wkt`
if pygeos is installed.
Keyword args will be passed to :func:`shapely.to_wkt`.
Returns
-------
@@ -1276,9 +1345,9 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
>>> from shapely.geometry import Point
>>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s
0 POINT (1.00000 1.00000)
1 POINT (2.00000 2.00000)
2 POINT (3.00000 3.00000)
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry
>>> s.to_wkt()
@@ -1293,48 +1362,105 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
"""
return Series(to_wkt(self.array, **kwargs), index=self.index)
#
# Implement standard operators for GeoSeries
#
def to_arrow(self, geometry_encoding="WKB", interleaved=True, include_z=None):
"""Encode a GeoSeries to GeoArrow format.
def __xor__(self, other):
"""Implement ^ operator as for builtin set type"""
warnings.warn(
"'^' operator will be deprecated. Use the 'symmetric_difference' "
"method instead.",
FutureWarning,
stacklevel=2,
See https://geoarrow.org/ for details on the GeoArrow specification.
This functions returns a generic Arrow array object implementing
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
method). This object can then be consumed by your Arrow implementation
of choice that supports this protocol.
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
.. versionadded:: 1.0
Parameters
----------
geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
The GeoArrow encoding to use for the data conversion.
interleaved : bool, default True
Only relevant for 'geoarrow' encoding. If True, the geometries'
coordinates are interleaved in a single fixed size list array.
If False, the coordinates are stored as separate arrays in a
struct type.
include_z : bool, default None
Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
of the individial geometries is preserved).
If False, return 2D geometries. If True, include the third dimension
in the output (if a geometry has no third dimension, the z-coordinates
will be NaN). By default, will infer the dimensionality from the
input geometries. Note that this inference can be unreliable with
empty geometries (for a guaranteed result, it is recommended to
specify the keyword).
Returns
-------
GeoArrowArray
A generic Arrow array object with geometry data encoded to GeoArrow.
Examples
--------
>>> from shapely.geometry import Point
>>> gser = geopandas.GeoSeries([Point(1, 2), Point(2, 1)])
>>> gser
0 POINT (1 2)
1 POINT (2 1)
dtype: geometry
>>> arrow_array = gser.to_arrow()
>>> arrow_array
<geopandas.io._geoarrow.GeoArrowArray object at ...>
The returned array object needs to be consumed by a library implementing
the Arrow PyCapsule Protocol. For example, wrapping the data as a
pyarrow.Array (requires pyarrow >= 14.0):
>>> import pyarrow as pa
>>> array = pa.array(arrow_array)
>>> array
<pyarrow.lib.BinaryArray object at ...>
[
0101000000000000000000F03F0000000000000040,
01010000000000000000000040000000000000F03F
]
"""
import pyarrow as pa
from geopandas.io._geoarrow import (
GeoArrowArray,
construct_geometry_array,
construct_wkb_array,
)
return self.symmetric_difference(other)
def __or__(self, other):
"""Implement | operator as for builtin set type"""
warnings.warn(
"'|' operator will be deprecated. Use the 'union' method instead.",
FutureWarning,
stacklevel=2,
)
return self.union(other)
field_name = self.name if self.name is not None else ""
def __and__(self, other):
"""Implement & operator as for builtin set type"""
warnings.warn(
"'&' operator will be deprecated. Use the 'intersection' method instead.",
FutureWarning,
stacklevel=2,
)
return self.intersection(other)
if geometry_encoding.lower() == "geoarrow":
if Version(pa.__version__) < Version("10.0.0"):
raise ValueError("Converting to 'geoarrow' requires pyarrow >= 10.0.")
def __sub__(self, other):
"""Implement - operator as for builtin set type"""
warnings.warn(
"'-' operator will be deprecated. Use the 'difference' method instead.",
FutureWarning,
stacklevel=2,
)
return self.difference(other)
field, geom_arr = construct_geometry_array(
np.array(self.array),
include_z=include_z,
field_name=field_name,
crs=self.crs,
interleaved=interleaved,
)
elif geometry_encoding.lower() == "wkb":
field, geom_arr = construct_wkb_array(
np.asarray(self.array), field_name=field_name, crs=self.crs
)
else:
raise ValueError(
"Expected geometry encoding 'WKB' or 'geoarrow' "
f"got {geometry_encoding}"
)
def clip(self, mask, keep_geom_type: bool = False) -> GeoSeries:
return GeoArrowArray(field, geom_arr)
def clip(self, mask, keep_geom_type: bool = False, sort=False) -> GeoSeries:
"""Clip points, lines, or polygon geometries to the mask extent.
Both layers must be in the same Coordinate Reference System (CRS).
@@ -1357,6 +1483,10 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
If True, return only geometries of original type in case of intersection
resulting in multiple geometry types or GeometryCollections.
If False, return all resulting geometries (potentially mixed-types).
sort : boolean, default False
If True, the order of rows in the clipped GeoSeries will be preserved
at small performance cost.
If False the order of rows in the clipped GeoSeries will be random.
Returns
-------
@@ -1387,4 +1517,4 @@ e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [3
>>> nws_groceries.shape
(7,)
"""
return geopandas.clip(self, mask=mask, keep_geom_type=keep_geom_type)
return geopandas.clip(self, mask=mask, keep_geom_type=keep_geom_type, sort=sort)

View File

@@ -2,7 +2,6 @@ from packaging.version import Version
import pyarrow
_ERROR_MSG = """\
Disallowed deserialization of 'arrow.py_extension_type':
storage_type = {storage_type}

View File

@@ -1,19 +1,31 @@
from packaging.version import Version
import json
import warnings
from packaging.version import Version
import numpy as np
from pandas import DataFrame, Series
import geopandas._compat as compat
from geopandas._compat import import_optional_dependency
from geopandas.array import from_wkb
from geopandas import GeoDataFrame
import shapely
import geopandas
from geopandas import GeoDataFrame
from geopandas._compat import import_optional_dependency
from geopandas.array import from_shapely, from_wkb
from .file import _expand_user
METADATA_VERSION = "1.0.0"
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0"]
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
GEOARROW_ENCODINGS = [
"point",
"linestring",
"polygon",
"multipoint",
"multilinestring",
"multipolygon",
]
SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
# reference: https://github.com/opengeospatial/geoparquet
# Metadata structure:
@@ -68,7 +80,40 @@ def _remove_id_from_member_of_ensembles(json_dict):
member.pop("id", None)
def _create_metadata(df, schema_version=None):
# type ids 0 to 7
_geometry_type_names = [
"Point",
"LineString",
"LineString",
"Polygon",
"MultiPoint",
"MultiLineString",
"MultiPolygon",
"GeometryCollection",
]
_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
def _get_geometry_types(series):
"""
Get unique geometry types from a GeoSeries.
"""
arr_geometry_types = shapely.get_type_id(series.array._data)
# ensure to include "... Z" for 3D geometries
has_z = shapely.has_z(series.array._data)
arr_geometry_types[has_z] += 8
geometry_types = Series(arr_geometry_types).unique().tolist()
# drop missing values (shapely.get_type_id returns -1 for those)
if -1 in geometry_types:
geometry_types.remove(-1)
return sorted([_geometry_type_names[idx] for idx in geometry_types])
def _create_metadata(
df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
):
"""Create and encode geo metadata dict.
Parameters
@@ -77,13 +122,22 @@ def _create_metadata(df, schema_version=None):
schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
GeoParquet specification version; if not provided will default to
latest supported version.
write_covering_bbox : bool, default False
Writes the bounding box column for each row entry with column
name 'bbox'. Writing a bbox column can be computationally
expensive, hence is default setting is False.
Returns
-------
dict
"""
schema_version = schema_version or METADATA_VERSION
if schema_version is None:
if geometry_encoding and any(
encoding != "WKB" for encoding in geometry_encoding.values()
):
schema_version = "1.1.0"
else:
schema_version = METADATA_VERSION
if schema_version not in SUPPORTED_VERSIONS:
raise ValueError(
@@ -94,7 +148,8 @@ def _create_metadata(df, schema_version=None):
column_metadata = {}
for col in df.columns[df.dtypes == "geometry"]:
series = df[col]
geometry_types = sorted(Series(series.geom_type.unique()).dropna())
geometry_types = _get_geometry_types(series)
if schema_version[0] == "0":
geometry_types_name = "geometry_type"
if len(geometry_types) == 1:
@@ -111,7 +166,7 @@ def _create_metadata(df, schema_version=None):
_remove_id_from_member_of_ensembles(crs)
column_metadata[col] = {
"encoding": "WKB",
"encoding": geometry_encoding[col],
"crs": crs,
geometry_types_name: geometry_types,
}
@@ -121,10 +176,20 @@ def _create_metadata(df, schema_version=None):
# don't add bbox with NaNs for empty / all-NA geometry column
column_metadata[col]["bbox"] = bbox
if write_covering_bbox:
column_metadata[col]["covering"] = {
"bbox": {
"xmin": ["bbox", "xmin"],
"ymin": ["bbox", "ymin"],
"xmax": ["bbox", "xmax"],
"ymax": ["bbox", "ymax"],
},
}
return {
"primary_column": df._geometry_column_name,
"columns": column_metadata,
"version": schema_version or METADATA_VERSION,
"version": schema_version,
"creator": {"library": "geopandas", "version": geopandas.__version__},
}
@@ -188,7 +253,7 @@ def _validate_dataframe(df):
raise ValueError("Index level names must be strings")
def _validate_metadata(metadata):
def _validate_geo_metadata(metadata):
"""Validate geo metadata.
Must not be empty, and must contain the structure specified above.
@@ -232,8 +297,12 @@ def _validate_metadata(metadata):
"'{key}' for column '{col}'".format(key=key, col=col)
)
if column_metadata["encoding"] != "WKB":
raise ValueError("Only WKB geometry encoding is supported")
if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
raise ValueError(
"Only WKB geometry encoding or one of the native encodings "
f"({GEOARROW_ENCODINGS!r}) are supported, "
f"got: {column_metadata['encoding']}"
)
if column_metadata.get("edges", "planar") == "spherical":
warnings.warn(
@@ -245,37 +314,59 @@ def _validate_metadata(metadata):
stacklevel=4,
)
if "covering" in column_metadata:
covering = column_metadata["covering"]
if "bbox" in covering:
bbox = covering["bbox"]
for var in ["xmin", "ymin", "xmax", "ymax"]:
if var not in bbox.keys():
raise ValueError("Metadata for bbox column is malformed.")
def _geopandas_to_arrow(df, index=None, schema_version=None):
def _geopandas_to_arrow(
df,
index=None,
geometry_encoding="WKB",
schema_version=None,
write_covering_bbox=None,
):
"""
Helper function with main, shared logic for to_parquet/to_feather.
"""
from pyarrow import Table
from pyarrow import StructArray
from geopandas.io._geoarrow import geopandas_to_arrow
_validate_dataframe(df)
# create geo metadata before altering incoming data frame
geo_metadata = _create_metadata(df, schema_version=schema_version)
if schema_version is not None:
if geometry_encoding != "WKB" and schema_version != "1.1.0":
raise ValueError(
"'geoarrow' encoding is only supported with schema version >= 1.1.0"
)
kwargs = {}
if compat.USE_SHAPELY_20:
kwargs = {"flavor": "iso"}
else:
for col in df.columns[df.dtypes == "geometry"]:
series = df[col]
if series.has_z.any():
warnings.warn(
"The GeoDataFrame contains 3D geometries, and when using "
"shapely < 2.0, such geometries will be written not exactly "
"following to the GeoParquet spec (not using ISO WKB). For "
"most use cases this should not be a problem (GeoPandas can "
"read such files fine).",
stacklevel=2,
)
break
df = df.to_wkb(**kwargs)
table, geometry_encoding_dict = geopandas_to_arrow(
df, geometry_encoding=geometry_encoding, index=index, interleaved=False
)
geo_metadata = _create_metadata(
df,
schema_version=schema_version,
geometry_encoding=geometry_encoding_dict,
write_covering_bbox=write_covering_bbox,
)
table = Table.from_pandas(df, preserve_index=index)
if write_covering_bbox:
if "bbox" in df.columns:
raise ValueError(
"An existing column 'bbox' already exists in the dataframe. "
"Please rename to write covering bbox."
)
bounds = df.bounds
bbox_array = StructArray.from_arrays(
[bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
names=["xmin", "ymin", "xmax", "ymax"],
)
table = table.append_column("bbox", bbox_array)
# Store geopandas specific file-level metadata
# This must be done AFTER creating the table or it is not persisted
@@ -286,7 +377,14 @@ def _geopandas_to_arrow(df, index=None, schema_version=None):
def _to_parquet(
df, path, index=None, compression="snappy", schema_version=None, **kwargs
df,
path,
index=None,
compression="snappy",
geometry_encoding="WKB",
schema_version=None,
write_covering_bbox=False,
**kwargs,
):
"""
Write a GeoDataFrame to the Parquet format.
@@ -312,9 +410,17 @@ def _to_parquet(
output except `RangeIndex` which is stored as metadata only.
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
Name of the compression to use. Use ``None`` for no compression.
geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
The encoding to use for the geometry columns. Defaults to "WKB"
for maximum interoperability. Specify "geoarrow" to use one of the
native GeoArrow-based single-geometry type encodings.
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
GeoParquet specification version; if not provided will default to
latest supported version.
write_covering_bbox : bool, default False
Writes the bounding box column for each row entry with column
name 'bbox'. Writing a bbox column can be computationally
expensive, hence is default setting is False.
**kwargs
Additional keyword arguments passed to pyarrow.parquet.write_table().
"""
@@ -322,19 +428,14 @@ def _to_parquet(
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
)
if kwargs and "version" in kwargs and kwargs["version"] is not None:
if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
warnings.warn(
"the `version` parameter has been replaced with `schema_version`. "
"`version` will instead be passed directly to the underlying "
"parquet writer unless `version` is 0.1.0 or 0.4.0.",
FutureWarning,
stacklevel=2,
)
schema_version = kwargs.pop("version")
path = _expand_user(path)
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
table = _geopandas_to_arrow(
df,
index=index,
geometry_encoding=geometry_encoding,
schema_version=schema_version,
write_covering_bbox=write_covering_bbox,
)
parquet.write_table(table, path, compression=compression, **kwargs)
@@ -379,47 +480,26 @@ def _to_feather(df, path, index=None, compression=None, schema_version=None, **k
if Version(pyarrow.__version__) < Version("0.17.0"):
raise ImportError("pyarrow >= 0.17 required for Feather support")
if kwargs and "version" in kwargs and kwargs["version"] is not None:
if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
warnings.warn(
"the `version` parameter has been replaced with `schema_version`. "
"`version` will instead be passed directly to the underlying "
"feather writer unless `version` is 0.1.0 or 0.4.0.",
FutureWarning,
stacklevel=2,
)
schema_version = kwargs.pop("version")
path = _expand_user(path)
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
feather.write_feather(table, path, compression=compression, **kwargs)
def _arrow_to_geopandas(table, metadata=None):
def _arrow_to_geopandas(table, geo_metadata=None):
"""
Helper function with main, shared logic for read_parquet/read_feather.
"""
df = table.to_pandas()
metadata = metadata or table.schema.metadata
if metadata is None or b"geo" not in metadata:
raise ValueError(
"""Missing geo metadata in Parquet/Feather file.
Use pandas.read_parquet/read_feather() instead."""
)
try:
metadata = _decode_metadata(metadata.get(b"geo", b""))
except (TypeError, json.decoder.JSONDecodeError):
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
_validate_metadata(metadata)
if geo_metadata is None:
# Note: this path of not passing metadata is also used by dask-geopandas
geo_metadata = _validate_and_decode_metadata(table.schema.metadata)
# Find all geometry columns that were read from the file. May
# be a subset if 'columns' parameter is used.
geometry_columns = df.columns.intersection(metadata["columns"])
geometry_columns = [
col for col in geo_metadata["columns"] if col in table.column_names
]
result_column_names = list(table.slice(0, 0).to_pandas().columns)
geometry_columns.sort(key=result_column_names.index)
if not len(geometry_columns):
raise ValueError(
@@ -428,7 +508,7 @@ def _arrow_to_geopandas(table, metadata=None):
use pandas.read_parquet/read_feather() instead."""
)
geometry = metadata["primary_column"]
geometry = geo_metadata["primary_column"]
# Missing geometry likely indicates a subset of columns was read;
# promote the first available geometry to the primary geometry.
@@ -443,9 +523,12 @@ def _arrow_to_geopandas(table, metadata=None):
stacklevel=3,
)
table_attr = table.drop(geometry_columns)
df = table_attr.to_pandas()
# Convert the WKB columns that are present back to geometry.
for col in geometry_columns:
col_metadata = metadata["columns"][col]
col_metadata = geo_metadata["columns"][col]
if "crs" in col_metadata:
crs = col_metadata["crs"]
if isinstance(crs, dict):
@@ -455,7 +538,19 @@ def _arrow_to_geopandas(table, metadata=None):
# OGC:CRS84
crs = "OGC:CRS84"
df[col] = from_wkb(df[col].values, crs=crs)
if col_metadata["encoding"] == "WKB":
geom_arr = from_wkb(np.array(table[col]), crs=crs)
else:
from geopandas.io._geoarrow import construct_shapely_array
geom_arr = from_shapely(
construct_shapely_array(
table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
),
crs=crs,
)
df.insert(result_column_names.index(col), col, geom_arr)
return GeoDataFrame(df, geometry=geometry)
@@ -521,7 +616,59 @@ def _ensure_arrow_fs(filesystem):
return filesystem
def _read_parquet(path, columns=None, storage_options=None, **kwargs):
def _validate_and_decode_metadata(metadata):
if metadata is None or b"geo" not in metadata:
raise ValueError(
"""Missing geo metadata in Parquet/Feather file.
Use pandas.read_parquet/read_feather() instead."""
)
# check for malformed metadata
try:
decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
except (TypeError, json.decoder.JSONDecodeError):
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
_validate_geo_metadata(decoded_geo_metadata)
return decoded_geo_metadata
def _read_parquet_schema_and_metadata(path, filesystem):
"""
Opening the Parquet file/dataset a first time to get the schema and metadata.
TODO: we should look into how we can reuse opened dataset for reading the
actual data, to avoid discovering the dataset twice (problem right now is
that the ParquetDataset interface doesn't allow passing the filters on read)
"""
import pyarrow
from pyarrow import parquet
kwargs = {}
if Version(pyarrow.__version__) < Version("15.0.0"):
kwargs = dict(use_legacy_dataset=False)
try:
schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
except Exception:
schema = parquet.read_schema(path, filesystem=filesystem)
metadata = schema.metadata
# read metadata separately to get the raw Parquet FileMetaData metadata
# (pyarrow doesn't properly exposes those in schema.metadata for files
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
if metadata is None or b"geo" not in metadata:
try:
metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
except Exception:
pass
return schema, metadata
def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
"""
Load a Parquet object from the file path, returning a GeoDataFrame.
@@ -565,8 +712,13 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
filesystem is preferred. Provide the instantiated fsspec filesystem using
the ``filesystem`` keyword if you wish to use its implementation.
bbox : tuple, optional
Bounding box to be used to filter selection from geoparquet data. This
is only usable if the data was saved with the bbox covering metadata.
Input is of the tuple format (xmin, ymin, xmax, ymax).
**kwargs
Any additional kwargs passed to pyarrow.parquet.read_table().
Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.
Returns
-------
@@ -595,29 +747,36 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
filesystem, path = _get_filesystem_path(
path, filesystem=filesystem, storage_options=storage_options
)
path = _expand_user(path)
schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
geo_metadata = _validate_and_decode_metadata(metadata)
bbox_filter = (
_get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
)
if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
# by default, bbox column is not read in, so must specify which
# columns are read in if it exists.
if not columns and if_bbox_column_exists:
columns = _get_non_bbox_columns(schema, geo_metadata)
# if both bbox and filters kwargs are used, must splice together.
if "filters" in kwargs:
filters_kwarg = kwargs.pop("filters")
filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
else:
filters = bbox_filter
kwargs["use_pandas_metadata"] = True
table = parquet.read_table(path, columns=columns, filesystem=filesystem, **kwargs)
# read metadata separately to get the raw Parquet FileMetaData metadata
# (pyarrow doesn't properly exposes those in schema.metadata for files
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
metadata = None
if table.schema.metadata is None or b"geo" not in table.schema.metadata:
try:
# read_metadata does not accept a filesystem keyword, so need to
# handle this manually (https://issues.apache.org/jira/browse/ARROW-16719)
if filesystem is not None:
pa_filesystem = _ensure_arrow_fs(filesystem)
with pa_filesystem.open_input_file(path) as source:
metadata = parquet.read_metadata(source).metadata
else:
metadata = parquet.read_metadata(path).metadata
except Exception:
pass
table = parquet.read_table(
path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
)
return _arrow_to_geopandas(table, metadata)
return _arrow_to_geopandas(table, geo_metadata)
def _read_feather(path, columns=None, **kwargs):
@@ -677,11 +836,78 @@ def _read_feather(path, columns=None, **kwargs):
)
# TODO move this into `import_optional_dependency`
import pyarrow
import geopandas.io._pyarrow_hotfix # noqa: F401
if Version(pyarrow.__version__) < Version("0.17.0"):
raise ImportError("pyarrow >= 0.17 required for Feather support")
path = _expand_user(path)
table = feather.read_table(path, columns=columns, **kwargs)
return _arrow_to_geopandas(table)
def _get_parquet_bbox_filter(geo_metadata, bbox):
primary_column = geo_metadata["primary_column"]
if _check_if_covering_in_geo_metadata(geo_metadata):
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
elif geo_metadata["columns"][primary_column]["encoding"] == "point":
import pyarrow.compute as pc
return (
(pc.field((primary_column, "x")) >= bbox[0])
& (pc.field((primary_column, "x")) <= bbox[2])
& (pc.field((primary_column, "y")) >= bbox[1])
& (pc.field((primary_column, "y")) <= bbox[3])
)
else:
raise ValueError(
"Specifying 'bbox' not supported for this Parquet file (it should either "
"have a bbox covering column or use 'point' encoding)."
)
def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
import pyarrow.compute as pc
return ~(
(pc.field((bbox_column_name, "xmin")) > bbox[2])
| (pc.field((bbox_column_name, "ymin")) > bbox[3])
| (pc.field((bbox_column_name, "xmax")) < bbox[0])
| (pc.field((bbox_column_name, "ymax")) < bbox[1])
)
def _check_if_covering_in_geo_metadata(geo_metadata):
primary_column = geo_metadata["primary_column"]
return "covering" in geo_metadata["columns"][primary_column].keys()
def _get_bbox_encoding_column_name(geo_metadata):
primary_column = geo_metadata["primary_column"]
return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
def _get_non_bbox_columns(schema, geo_metadata):
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
columns = schema.names
if bbox_column_name in columns:
columns.remove(bbox_column_name)
return columns
def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
parquet = import_optional_dependency(
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
)
if bbox_filter is None:
return kwarg_filters
filters_expression = parquet.filters_to_expression(kwarg_filters)
return bbox_filter & filters_expression

View File

@@ -1,30 +1,33 @@
from __future__ import annotations
import os
import urllib.request
import warnings
from io import IOBase
from packaging.version import Version
from pathlib import Path
import warnings
# Adapted from pandas.io.common
from urllib.parse import urlparse as parse_url
from urllib.parse import uses_netloc, uses_params, uses_relative
import numpy as np
import pandas as pd
from pandas.api.types import is_integer_dtype
import pyproj
import shapely
from shapely.geometry import mapping
from shapely.geometry.base import BaseGeometry
from geopandas import GeoDataFrame, GeoSeries
# Adapted from pandas.io.common
from urllib.parse import urlparse as parse_url
from urllib.parse import uses_netloc, uses_params, uses_relative
import urllib.request
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
from geopandas.io.util import vsi_path
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
_VALID_URLS.discard("")
# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
_VALID_URLS.discard("file")
fiona = None
fiona_env = None
fiona_import_error = None
@@ -55,6 +58,7 @@ def _import_fiona():
FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
"1.9.0"
)
except ImportError as err:
fiona = False
fiona_import_error = str(err)
@@ -71,13 +75,14 @@ def _import_pyogrio():
if pyogrio is None:
try:
import pyogrio
except ImportError as err:
pyogrio = False
pyogrio_import_error = str(err)
def _check_fiona(func):
if fiona is None:
if not fiona:
raise ImportError(
f"the {func} requires the 'fiona' package, but it is not installed or does "
f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
@@ -85,7 +90,7 @@ def _check_fiona(func):
def _check_pyogrio(func):
if pyogrio is None:
if not pyogrio:
raise ImportError(
f"the {func} requires the 'pyogrio' package, but it is not installed "
"or does not import correctly."
@@ -93,35 +98,49 @@ def _check_pyogrio(func):
)
def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
if metadata is None:
return
if driver != "GPKG":
raise NotImplementedError(
"The 'metadata' keyword is only supported for the GPKG driver."
)
if engine == "fiona" and not FIONA_GE_19:
raise NotImplementedError(
"The 'metadata' keyword is only supported for Fiona >= 1.9."
)
def _check_engine(engine, func):
# if not specified through keyword or option, then default to "fiona" if
# installed, otherwise try pyogrio
# if not specified through keyword or option, then default to "pyogrio" if
# installed, otherwise try fiona
if engine is None:
import geopandas
engine = geopandas.options.io_engine
if engine is None:
_import_fiona()
if fiona:
engine = "fiona"
_import_pyogrio()
if pyogrio:
engine = "pyogrio"
else:
_import_pyogrio()
if pyogrio:
engine = "pyogrio"
_import_fiona()
if fiona:
engine = "fiona"
if engine == "fiona":
_import_fiona()
_check_fiona(func)
elif engine == "pyogrio":
if engine == "pyogrio":
_import_pyogrio()
_check_pyogrio(func)
elif engine == "fiona":
_import_fiona()
_check_fiona(func)
elif engine is None:
raise ImportError(
f"The {func} requires the 'pyogrio' or 'fiona' package, "
"but neither is installed or imports correctly."
f"\nImporting fiona resulted in: {fiona_import_error}"
f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
f"\nImporting fiona resulted in: {fiona_import_error}"
)
return engine
@@ -168,31 +187,12 @@ def _is_url(url):
return False
def _is_zip(path):
"""Check if a given path is a zipfile"""
parsed = fiona.path.ParsedPath.from_uri(path)
return (
parsed.archive.endswith(".zip")
if parsed.archive
else parsed.path.endswith(".zip")
)
def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs):
def _read_file(
filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
):
"""
Returns a GeoDataFrame from a file or URL.
.. note::
GeoPandas currently defaults to use Fiona as the engine in ``read_file``.
However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
pyogrio can provide a significant speedup compared to Fiona. We recommend to
already install pyogrio and specify the engine by using the ``engine`` keyword
(``geopandas.read_file(..., engine="pyogrio")``), or by setting the default for
the ``engine`` keyword globally with::
geopandas.options.io_engine = "pyogrio"
Parameters
----------
filename : str, path object or file-like object
@@ -209,21 +209,28 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
Filter for features that intersect with the given dict-like geojson
geometry, GeoSeries, GeoDataFrame or shapely geometry.
CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
Cannot be used with bbox.
Cannot be used with bbox. If multiple geometries are passed, this will
first union all geometries, which may be computationally expensive.
columns : list, optional
List of column names to import from the data source. Column names
must exactly match the names in the data source. To avoid reading
any columns (besides the geometry column), pass an empty list-like.
By default reads all columns.
rows : int or slice, default None
Load in specific rows by passing an integer (first `n` rows) or a
slice() object.
engine : str, "fiona" or "pyogrio"
engine : str, "pyogrio" or "fiona"
The underlying library that is used to read the file. Currently, the
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
installed, otherwise tries "pyogrio".
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
installed, otherwise tries "fiona". Engine can also be set globally
with the ``geopandas.options.io_engine`` option.
**kwargs :
Keyword args to be passed to the engine. In case of the "fiona" engine,
the keyword arguments are passed to :func:`fiona.open` or
:class:`fiona.collection.BytesCollection` when opening the file.
For more information on possible keywords, type:
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
the keyword arguments are passed to :func:`pyogrio.read_dataframe`.
Keyword args to be passed to the engine, and can be used to write
to multi-layer data, store data within archives (zip files), etc.
In case of the "pyogrio" engine, the keyword arguments are passed to
`pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
arguments are passed to fiona.open`. For more information on possible
keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
Examples
@@ -284,7 +291,9 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
from_bytes = True
if engine == "pyogrio":
return _read_file_pyogrio(filename, bbox=bbox, mask=mask, rows=rows, **kwargs)
return _read_file_pyogrio(
filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
)
elif engine == "fiona":
if pd.api.types.is_file_like(filename):
@@ -295,7 +304,13 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
path_or_bytes = filename
return _read_file_fiona(
path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
path_or_bytes,
from_bytes,
bbox=bbox,
mask=mask,
columns=columns,
rows=rows,
**kwargs,
)
else:
@@ -303,31 +318,36 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
def _read_file_fiona(
path_or_bytes, from_bytes, bbox=None, mask=None, rows=None, where=None, **kwargs
path_or_bytes,
from_bytes,
bbox=None,
mask=None,
columns=None,
rows=None,
where=None,
**kwargs,
):
if where is not None and not FIONA_GE_19:
raise NotImplementedError("where requires fiona 1.9+")
if columns is not None:
if "include_fields" in kwargs:
raise ValueError(
"Cannot specify both 'include_fields' and 'columns' keywords"
)
if not FIONA_GE_19:
raise NotImplementedError("'columns' keyword requires fiona 1.9+")
kwargs["include_fields"] = columns
elif "include_fields" in kwargs:
# alias to columns, as this variable is used below to specify column order
# in the dataframe creation
columns = kwargs["include_fields"]
if not from_bytes:
# Opening a file via URL or file-like-object above automatically detects a
# zipped file. In order to match that behavior, attempt to add a zip scheme
# if missing.
if _is_zip(str(path_or_bytes)):
parsed = fiona.parse_path(str(path_or_bytes))
if isinstance(parsed, fiona.path.ParsedPath):
# If fiona is able to parse the path, we can safely look at the scheme
# and update it to have a zip scheme if necessary.
schemes = (parsed.scheme or "").split("+")
if "zip" not in schemes:
parsed.scheme = "+".join(["zip"] + schemes)
path_or_bytes = parsed.name
elif isinstance(parsed, fiona.path.UnparsedPath) and not str(
path_or_bytes
).startswith("/vsi"):
# If fiona is unable to parse the path, it might have a Windows drive
# scheme. Try adding zip:// to the front. If the path starts with "/vsi"
# it is a legacy GDAL path type, so let it pass unmodified.
path_or_bytes = "zip://" + parsed.name
path_or_bytes = vsi_path(str(path_or_bytes))
if from_bytes:
reader = fiona.BytesCollection
@@ -359,7 +379,7 @@ def _read_file_fiona(
assert len(bbox) == 4
# handle loading the mask
elif isinstance(mask, (GeoDataFrame, GeoSeries)):
mask = mapping(mask.to_crs(crs).unary_union)
mask = mapping(mask.to_crs(crs).union_all())
elif isinstance(mask, BaseGeometry):
mask = mapping(mask)
@@ -383,11 +403,14 @@ def _read_file_fiona(
else:
f_filt = features
# get list of columns
columns = list(features.schema["properties"])
columns = columns or list(features.schema["properties"])
datetime_fields = [
k for (k, v) in features.schema["properties"].items() if v == "datetime"
]
if kwargs.get("ignore_geometry", False):
if (
kwargs.get("ignore_geometry", False)
or features.schema["geometry"] == "None"
):
df = pd.DataFrame(
[record["properties"] for record in f_filt], columns=columns
)
@@ -396,16 +419,39 @@ def _read_file_fiona(
f_filt, crs=crs, columns=columns + ["geometry"]
)
for k in datetime_fields:
as_dt = pd.to_datetime(df[k], errors="ignore")
# if to_datetime failed, try again for mixed timezone offsets
if as_dt.dtype == "object":
as_dt = None
# plain try catch for when pandas will raise in the future
# TODO we can tighten the exception type in future when it does
try:
with warnings.catch_warnings():
# pandas 2.x does not yet enforce this behaviour but raises a
# warning -> we want to to suppress this warning for our users,
# and do this by turning it into an error so we take the
# `except` code path to try again with utc=True
warnings.filterwarnings(
"error",
"In a future version of pandas, parsing datetimes with "
"mixed time zones will raise an error",
FutureWarning,
)
as_dt = pd.to_datetime(df[k])
except Exception:
pass
if as_dt is None or as_dt.dtype == "object":
# if to_datetime failed, try again for mixed timezone offsets
# This can still fail if there are invalid datetimes
as_dt = pd.to_datetime(df[k], errors="ignore", utc=True)
try:
as_dt = pd.to_datetime(df[k], utc=True)
except Exception:
pass
# if to_datetime succeeded, round datetimes as
# fiona only supports up to ms precision (any microseconds are
# floating point rounding error)
if not (as_dt.dtype == "object"):
df[k] = as_dt.dt.round(freq="ms")
if as_dt is not None and not (as_dt.dtype == "object"):
if PANDAS_GE_20:
df[k] = as_dt.dt.as_unit("ms")
else:
df[k] = as_dt.dt.round(freq="ms")
return df
@@ -428,48 +474,79 @@ def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs)
raise ValueError("slice with step is not supported")
else:
raise TypeError("'rows' must be an integer or a slice.")
if bbox is not None and mask is not None:
# match error message from Fiona
raise ValueError("mask and bbox can not be set together")
if bbox is not None:
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
bbox = tuple(bbox.total_bounds)
crs = pyogrio.read_info(path_or_bytes).get("crs")
if isinstance(path_or_bytes, IOBase):
path_or_bytes.seek(0)
bbox = tuple(bbox.to_crs(crs).total_bounds)
elif isinstance(bbox, BaseGeometry):
bbox = bbox.bounds
if len(bbox) != 4:
raise ValueError("'bbox' should be a length-4 tuple.")
if mask is not None:
raise ValueError(
"The 'mask' keyword is not supported with the 'pyogrio' engine. "
"You can use 'bbox' instead."
)
# NOTE: mask cannot be used at same time as bbox keyword
if isinstance(mask, (GeoDataFrame, GeoSeries)):
crs = pyogrio.read_info(path_or_bytes).get("crs")
if isinstance(path_or_bytes, IOBase):
path_or_bytes.seek(0)
mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
elif isinstance(mask, BaseGeometry):
mask = shapely.unary_union(mask)
elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
# convert GeoJSON to shapely geometry
mask = shapely.geometry.shape(mask)
kwargs["mask"] = mask
if kwargs.pop("ignore_geometry", False):
kwargs["read_geometry"] = False
# TODO: if bbox is not None, check its CRS vs the CRS of the file
# translate `ignore_fields`/`include_fields` keyword for back compat with fiona
if "ignore_fields" in kwargs and "include_fields" in kwargs:
raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
elif "ignore_fields" in kwargs:
if kwargs.get("columns", None) is not None:
raise ValueError(
"Cannot specify both 'columns' and 'ignore_fields' keywords"
)
warnings.warn(
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
"will be removed in a future release. You can use the 'columns' keyword "
"instead to select which columns to read.",
DeprecationWarning,
stacklevel=3,
)
ignore_fields = kwargs.pop("ignore_fields")
fields = pyogrio.read_info(path_or_bytes)["fields"]
include_fields = [col for col in fields if col not in ignore_fields]
kwargs["columns"] = include_fields
elif "include_fields" in kwargs:
# translate `include_fields` keyword for back compat with fiona engine
if kwargs.get("columns", None) is not None:
raise ValueError(
"Cannot specify both 'columns' and 'include_fields' keywords"
)
warnings.warn(
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
"will be removed in a future release. You can use the 'columns' keyword "
"instead to select which columns to read.",
DeprecationWarning,
stacklevel=3,
)
kwargs["columns"] = kwargs.pop("include_fields")
return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
def read_file(*args, **kwargs):
warnings.warn(
"geopandas.io.file.read_file() is intended for internal "
"use only, and will be deprecated. Use geopandas.read_file() instead.",
FutureWarning,
stacklevel=2,
)
return _read_file(*args, **kwargs)
def to_file(*args, **kwargs):
warnings.warn(
"geopandas.io.file.to_file() is intended for internal "
"use only, and will be deprecated. Use GeoDataFrame.to_file() "
"or GeoSeries.to_file() instead.",
FutureWarning,
stacklevel=2,
)
return _to_file(*args, **kwargs)
def _detect_driver(path):
"""
Attempt to auto-detect driver based on the extension
@@ -497,25 +574,16 @@ def _to_file(
mode="w",
crs=None,
engine=None,
metadata=None,
**kwargs,
):
"""
Write this GeoDataFrame to an OGR data source
A dictionary of supported OGR providers is available via:
>>> import fiona
>>> fiona.supported_drivers # doctest: +SKIP
.. note::
GeoPandas currently defaults to use Fiona as the engine in ``to_file``.
However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
pyogrio can provide a significant speedup compared to Fiona. We recommend to
already install pyogrio and specify the engine by using the ``engine`` keyword
(``df.to_file(..., engine="pyogrio")``), or by setting the default for
the ``engine`` keyword globally with::
geopandas.options.io_engine = "pyogrio"
>>> import pyogrio
>>> pyogrio.list_drivers() # doctest: +SKIP
Parameters
----------
@@ -557,10 +625,15 @@ def _to_file(
The value can be anything accepted
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
engine : str, "fiona" or "pyogrio"
The underlying library that is used to write the file. Currently, the
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
installed, otherwise tries "pyogrio".
engine : str, "pyogrio" or "fiona"
The underlying library that is used to read the file. Currently, the
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
installed, otherwise tries "fiona". Engine can also be set globally
with the ``geopandas.options.io_engine`` option.
metadata : dict[str, str], default None
Optional metadata to be stored in the file. Keys and values must be
strings. Only supported for the "GPKG" driver
(requires Fiona >= 1.9 or pyogrio >= 0.6).
**kwargs :
Keyword args to be passed to the engine, and can be used to write
to multi-layer data, store data within archives (zip files), etc.
@@ -604,44 +677,57 @@ def _to_file(
"to a supported format like a well-known text (WKT) using "
"`GeoSeries.to_wkt()`.",
)
_check_metadata_supported(metadata, engine, driver)
if mode not in ("w", "a"):
raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
if engine == "fiona":
_to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs)
elif engine == "pyogrio":
_to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs)
if engine == "pyogrio":
_to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
elif engine == "fiona":
_to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
else:
raise ValueError(f"unknown engine '{engine}'")
def _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs):
def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
if not HAS_PYPROJ and crs:
raise ImportError(
"The 'pyproj' package is required to write a file with a CRS, but it is not"
" installed or does not import correctly."
)
if schema is None:
schema = infer_schema(df)
if crs:
crs = pyproj.CRS.from_user_input(crs)
from pyproj import CRS
crs = CRS.from_user_input(crs)
else:
crs = df.crs
with fiona_env():
crs_wkt = None
try:
gdal_version = fiona.env.get_gdal_release_name()
except AttributeError:
gdal_version = "2.0.0" # just assume it is not the latest
if Version(gdal_version) >= Version("3.0.0") and crs:
gdal_version = Version(
fiona.env.get_gdal_release_name().strip("e")
) # GH3147
except (AttributeError, ValueError):
gdal_version = Version("2.0.0") # just assume it is not the latest
if gdal_version >= Version("3.0.0") and crs:
crs_wkt = crs.to_wkt()
elif crs:
crs_wkt = crs.to_wkt("WKT1_GDAL")
with fiona.open(
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
) as colxn:
if metadata is not None:
colxn.update_tags(metadata)
colxn.writerecords(df.iterfeatures())
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
import pyogrio
if schema is not None:
@@ -653,13 +739,13 @@ def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
kwargs["append"] = True
if crs is not None:
raise ValueError("Passing 'crs' it not supported with the 'pyogrio' engine.")
raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")
# for the fiona engine, this check is done in gdf.iterfeatures()
if not df.columns.is_unique:
raise ValueError("GeoDataFrame cannot contain duplicated column names.")
pyogrio.write_dataframe(df, filename, driver=driver, **kwargs)
pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)
def infer_schema(df):
@@ -732,3 +818,34 @@ def _geometry_types(df):
geom_types = geom_types[0]
return geom_types
def _list_layers(filename) -> pd.DataFrame:
"""List layers available in a file.
Provides an overview of layers available in a file or URL together with their
geometry types. When supported by the data source, this includes both spatial and
non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
column being ``None``. GeoPandas will not read such layers but they can be read into
a pd.DataFrame using :func:`pyogrio.read_dataframe`.
Parameters
----------
filename : str, path object or file-like object
Either the absolute or relative path to the file or URL to
be opened, or any object with a read() method (such as an open file
or StringIO)
Returns
-------
pandas.DataFrame
A DataFrame with columns "name" and "geometry_type" and one row per layer.
"""
_import_pyogrio()
_check_pyogrio("list_layers")
import pyogrio
return pd.DataFrame(
pyogrio.list_layers(filename), columns=["name", "geometry_type"]
)

View File

@@ -1,5 +1,6 @@
import warnings
from contextlib import contextmanager
from functools import lru_cache
import pandas as pd
@@ -8,8 +9,6 @@ import shapely.wkb
from geopandas import GeoDataFrame
from geopandas import _compat as compat
@contextmanager
def _get_conn(conn_or_engine):
@@ -28,7 +27,7 @@ def _get_conn(conn_or_engine):
-------
Connection
"""
from sqlalchemy.engine.base import Engine, Connection
from sqlalchemy.engine.base import Connection, Engine
if isinstance(conn_or_engine, Connection):
if not conn_or_engine.in_transaction():
@@ -43,7 +42,7 @@ def _get_conn(conn_or_engine):
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
def _df_to_geodf(df, geom_col="geom", crs=None):
def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
"""
Transforms a pandas DataFrame into a GeoDataFrame.
The column 'geom_col' must be a geometry column in WKB representation.
@@ -60,6 +59,8 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
such as an authority string (eg "EPSG:4326") or a WKT string.
If not set, tries to determine CRS from the SRID associated with the
first geometry in the database, and assigns that to all geometries.
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
Active connection to the database to query.
Returns
-------
GeoDataFrame
@@ -80,10 +81,6 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
load_geom_bytes = shapely.wkb.loads
"""Load from Python 3 binary."""
def load_geom_buffer(x):
"""Load from Python 2 binary."""
return shapely.wkb.loads(str(x))
def load_geom_text(x):
"""Load from binary encoded as text."""
return shapely.wkb.loads(str(x), hex=True)
@@ -95,13 +92,31 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
df[geom_col] = geoms = geoms.apply(load_geom)
if crs is None:
if compat.SHAPELY_GE_20:
srid = shapely.get_srid(geoms.iat[0])
else:
srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom)
srid = shapely.get_srid(geoms.iat[0])
# if no defined SRID in geodatabase, returns SRID of 0
if srid != 0:
crs = "epsg:{}".format(srid)
try:
spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
except pd.errors.DatabaseError:
warning_msg = (
f"Could not find the spatial reference system table "
f"(spatial_ref_sys) in PostGIS."
f"Trying epsg:{srid} as a fallback."
)
warnings.warn(warning_msg, UserWarning, stacklevel=3)
crs = "epsg:{}".format(srid)
else:
if not spatial_ref_sys_df.empty:
auth_name = spatial_ref_sys_df["auth_name"].item()
crs = f"{auth_name}:{srid}"
else:
warning_msg = (
f"Could not find srid {srid} in the "
f"spatial_ref_sys table. "
f"Trying epsg:{srid} as a fallback."
)
warnings.warn(warning_msg, UserWarning, stacklevel=3)
crs = "epsg:{}".format(srid)
return GeoDataFrame(df, crs=crs, geometry=geom_col)
@@ -176,7 +191,7 @@ def _read_postgis(
params=params,
chunksize=chunksize,
)
return _df_to_geodf(df, geom_col=geom_col, crs=crs)
return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
else:
# read data in chunks and return a generator
@@ -189,20 +204,9 @@ def _read_postgis(
params=params,
chunksize=chunksize,
)
return (_df_to_geodf(df, geom_col=geom_col, crs=crs) for df in df_generator)
def read_postgis(*args, **kwargs):
import warnings
warnings.warn(
"geopandas.io.sql.read_postgis() is intended for internal "
"use only, and will be deprecated. Use geopandas.read_postgis() instead.",
FutureWarning,
stacklevel=2,
)
return _read_postgis(*args, **kwargs)
return (
_df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
)
def _get_geometry_type(gdf):
@@ -253,7 +257,7 @@ def _get_geometry_type(gdf):
def _get_srid_from_crs(gdf):
"""
Get EPSG code from CRS if available. If not, return -1.
Get EPSG code from CRS if available. If not, return 0.
"""
# Use geoalchemy2 default for srid
@@ -279,7 +283,7 @@ def _get_srid_from_crs(gdf):
warnings.warn(warning_msg, UserWarning, stacklevel=2)
if srid is None:
srid = -1
srid = 0
warnings.warn(warning_msg, UserWarning, stacklevel=2)
return srid
@@ -288,8 +292,8 @@ def _get_srid_from_crs(gdf):
def _convert_linearring_to_linestring(gdf, geom_name):
from shapely.geometry import LineString
# Todo: Use Pygeos function once it's implemented:
# https://github.com/pygeos/pygeos/issues/76
# Todo: Use shapely function once it's implemented:
# https://github.com/shapely/shapely/issues/1617
mask = gdf.geom_type == "LinearRing"
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
@@ -300,26 +304,11 @@ def _convert_linearring_to_linestring(gdf, geom_name):
def _convert_to_ewkb(gdf, geom_name, srid):
"""Convert geometries to ewkb."""
if compat.USE_SHAPELY_20:
geoms = shapely.to_wkb(
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
hex=True,
include_srid=True,
)
elif compat.USE_PYGEOS:
from pygeos import set_srid, to_wkb
geoms = to_wkb(
set_srid(gdf[geom_name].values._data, srid=srid),
hex=True,
include_srid=True,
)
else:
from shapely.wkb import dumps
geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]]
geoms = shapely.to_wkb(
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
hex=True,
include_srid=True,
)
# The gdf will warn that the geometry column doesn't hold in-memory geometries
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
@@ -330,8 +319,8 @@ def _convert_to_ewkb(gdf, geom_name, srid):
def _psql_insert_copy(tbl, conn, keys, data_iter):
import io
import csv
import io
s_buf = io.StringIO()
writer = csv.writer(s_buf)
@@ -341,11 +330,16 @@ def _psql_insert_copy(tbl, conn, keys, data_iter):
columns = ", ".join('"{}"'.format(k) for k in keys)
dbapi_conn = conn.connection
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
tbl.table.schema, tbl.table.name, columns
)
with dbapi_conn.cursor() as cur:
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
tbl.table.schema, tbl.table.name, columns
)
cur.copy_expert(sql=sql, file=s_buf)
# Use psycopg method if it's available
if hasattr(cur, "copy") and callable(cur.copy):
with cur.copy(sql) as copy:
copy.write(s_buf.read())
else: # otherwise use psycopg2 method
cur.copy_expert(sql, s_buf)
def _write_postgis(
@@ -469,3 +463,11 @@ def _write_postgis(
dtype=dtype,
method=_psql_insert_copy,
)
@lru_cache
def _get_spatial_ref_sys_df(con, srid):
spatial_ref_sys_sql = (
f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
)
return pd.read_sql(spatial_ref_sys_sql, con)

View File

@@ -19,6 +19,7 @@ pickles and test versus the current data that is generated
(with master). These are then compared.
"""
import os
import pickle
import platform
@@ -26,9 +27,10 @@ import sys
import pandas as pd
import geopandas
from shapely.geometry import Point
import geopandas
def create_pickle_data():
"""create the pickle data"""

View File

@@ -1,33 +1,41 @@
import datetime
import io
import json
import os
import pathlib
import shutil
import tempfile
from collections import OrderedDict
from packaging.version import Version
import numpy as np
import pandas as pd
import pytest
import pytz
from packaging.version import Version
from pandas.api.types import is_datetime64_any_dtype
from pandas.testing import assert_series_equal
from shapely.geometry import Point, Polygon, box
from shapely.geometry import Point, Polygon, box, mapping
import geopandas
from geopandas import GeoDataFrame, read_file
from geopandas._compat import PANDAS_GE_20
from geopandas.io.file import _detect_driver, _EXTENSION_TO_DRIVER
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20, PANDAS_GE_30
from geopandas.io.file import _EXTENSION_TO_DRIVER, _detect_driver
import pytest
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
from geopandas.tests.util import PACKAGE_DIR, validate_boro_df
from pandas.testing import assert_frame_equal, assert_series_equal
try:
import pyogrio
PYOGRIO_GE_07 = Version(pyogrio.__version__) > Version("0.6.0")
# those version checks have to be defined here instead of imported from
# geopandas.io.file (those are only initialized lazily on first usage)
PYOGRIO_GE_090 = Version(Version(pyogrio.__version__).base_version) >= Version(
"0.9.0"
)
except ImportError:
pyogrio = False
PYOGRIO_GE_07 = False
PYOGRIO_GE_090 = False
try:
@@ -46,6 +54,9 @@ FIONA_MARK = pytest.mark.skipif(not fiona, reason="fiona not installed")
_CRS = "epsg:4326"
pytestmark = pytest.mark.filterwarnings("ignore:Value:RuntimeWarning:pyogrio")
@pytest.fixture(
params=[
pytest.param("fiona", marks=FIONA_MARK),
@@ -62,9 +73,8 @@ def skip_pyogrio_not_supported(engine):
@pytest.fixture
def df_nybb(engine):
nybb_path = geopandas.datasets.get_path("nybb")
df = read_file(nybb_path, engine=engine)
def df_nybb(engine, nybb_filename):
df = read_file(nybb_filename, engine=engine)
return df
@@ -130,7 +140,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
df = GeoDataFrame.from_file(tempfilename, engine=engine)
assert "geometry" in df
assert len(df) == 5
assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
assert np.all(df["BoroName"].values == df_nybb["BoroName"])
# Write layer with null geometry out to file
tempfilename = os.path.join(str(tmpdir), "null_geom" + ext)
@@ -139,7 +149,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
df = GeoDataFrame.from_file(tempfilename, engine=engine)
assert "geometry" in df
assert len(df) == 2
assert np.alltrue(df["Name"].values == df_null["Name"])
assert np.all(df["Name"].values == df_null["Name"])
# check the expected driver
assert_correct_driver(tempfilename, ext, engine)
@@ -153,7 +163,7 @@ def test_to_file_pathlib(tmpdir, df_nybb, driver, ext, engine):
df = GeoDataFrame.from_file(temppath, engine=engine)
assert "geometry" in df
assert len(df) == 5
assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
assert np.all(df["BoroName"].values == df_nybb["BoroName"])
# check the expected driver
assert_correct_driver(temppath, ext, engine)
@@ -174,9 +184,10 @@ def test_to_file_bool(tmpdir, driver, ext, engine):
result = read_file(tempfilename, engine=engine)
if ext in (".shp", ""):
# Shapefile does not support boolean, so is read back as int
if engine == "fiona":
# but since GDAL 3.9 supports boolean fields in SHP
if engine == "fiona" and fiona.gdal_version.minor < 9:
df["col"] = df["col"].astype("int64")
else:
elif engine == "pyogrio" and pyogrio.__gdal_version__ < (3, 9):
df["col"] = df["col"].astype("int32")
assert_geodataframe_equal(result, df)
# check the expected driver
@@ -189,15 +200,15 @@ eastern = pytz.timezone("America/New_York")
datetime_type_tests = (TEST_DATE, eastern.localize(TEST_DATE))
@pytest.mark.filterwarnings(
"ignore:Non-conformant content for record 1 in column b:RuntimeWarning"
) # for GPKG, GDAL writes the tz data but warns on reading (see DATETIME_FORMAT option)
@pytest.mark.parametrize(
"time", datetime_type_tests, ids=("naive_datetime", "datetime_with_timezone")
)
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
def test_to_file_datetime(tmpdir, driver, ext, time, engine):
"""Test writing a data file with the datetime column type"""
if engine == "pyogrio" and time.tzinfo is not None:
# TODO
pytest.skip("pyogrio doesn't yet support timezones")
if ext in (".shp", ""):
pytest.skip(f"Driver corresponding to ext {ext} doesn't support dt fields")
@@ -207,23 +218,25 @@ def test_to_file_datetime(tmpdir, driver, ext, time, engine):
df = GeoDataFrame(
{"a": [1.0, 2.0], "b": [time, time]}, geometry=[point, point], crs=4326
)
fiona_precision_limit = "ms"
df["b"] = df["b"].dt.round(freq=fiona_precision_limit)
df["b"] = df["b"].dt.round(freq="ms")
df.to_file(tempfilename, driver=driver, engine=engine)
df_read = read_file(tempfilename, engine=engine)
assert_geodataframe_equal(df.drop(columns=["b"]), df_read.drop(columns=["b"]))
# Check datetime column
expected = df["b"]
if PANDAS_GE_20:
expected = df["b"].dt.as_unit("ms")
actual = df_read["b"]
if df["b"].dt.tz is not None:
# US/Eastern becomes pytz.FixedOffset(-300) when read from file
# so compare fairly in terms of UTC
assert_series_equal(
df["b"].dt.tz_convert(pytz.utc), df_read["b"].dt.tz_convert(pytz.utc)
)
else:
if engine == "pyogrio" and PANDAS_GE_20:
df["b"] = df["b"].astype("datetime64[ms]")
assert_series_equal(df["b"], df_read["b"])
# as GDAL only models offsets, not timezones.
# Compare fair result in terms of UTC instead
expected = expected.dt.tz_convert(pytz.utc)
actual = actual.dt.tz_convert(pytz.utc)
assert_series_equal(expected, actual)
dt_exts = ["gpkg", "geojson"]
@@ -239,7 +252,7 @@ def write_invalid_date_file(date_str, tmpdir, ext, engine):
)
# Schema not required for GeoJSON since not typed, but needed for GPKG
if ext == "geojson":
df.to_file(tempfilename)
df.to_file(tempfilename, engine=engine)
else:
schema = {"geometry": "Point", "properties": {"date": "datetime"}}
if engine == "pyogrio" and not fiona:
@@ -254,7 +267,7 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
# https://github.com/geopandas/geopandas/issues/2502
date_str = "9999-99-99T00:00:00" # invalid date handled by GDAL
tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
res = read_file(tempfilename)
res = read_file(tempfilename, engine=engine)
if ext == "gpkg":
assert is_datetime64_any_dtype(res["date"])
assert pd.isna(res["date"].iloc[-1])
@@ -265,16 +278,19 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
@pytest.mark.parametrize("ext", dt_exts)
def test_read_file_datetime_out_of_bounds_ns(tmpdir, ext, engine):
if engine == "pyogrio" and not PANDAS_GE_20:
pytest.skip("with pyogrio requires pandas >= 2.0 to pass")
# https://github.com/geopandas/geopandas/issues/2502
if ext == "geojson":
skip_pyogrio_not_supported(engine)
date_str = "9999-12-31T00:00:00" # valid to GDAL, not to [ns] format
tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
res = read_file(tempfilename)
# Pandas invalid datetimes are read in as object dtype (strings)
assert res["date"].dtype == "object"
assert isinstance(res["date"].iloc[0], str)
res = read_file(tempfilename, engine=engine)
if PANDAS_GE_30:
assert res["date"].dtype == "datetime64[ms]"
assert res["date"].iloc[-1] == pd.Timestamp("9999-12-31 00:00:00")
else:
# Pandas invalid datetimes are read in as object dtype (strings)
assert res["date"].dtype == "object"
assert isinstance(res["date"].iloc[0], str)
def test_read_file_datetime_mixed_offsets(tmpdir):
@@ -292,17 +308,13 @@ def test_read_file_datetime_mixed_offsets(tmpdir):
df.to_file(tempfilename)
# check mixed tz don't crash GH2478
res = read_file(tempfilename)
if engine == "fiona":
# Convert mixed timezones to UTC equivalent
assert is_datetime64_any_dtype(res["date"])
if not PANDAS_GE_20:
utc = pytz.utc
else:
utc = datetime.timezone.utc
assert res["date"].dt.tz == utc
# Convert mixed timezones to UTC equivalent
assert is_datetime64_any_dtype(res["date"])
if not PANDAS_GE_20:
utc = pytz.utc
else:
# old fiona and pyogrio ignore timezones and read as datetimes successfully
assert is_datetime64_any_dtype(res["date"])
utc = datetime.timezone.utc
assert res["date"].dt.tz == utc
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
@@ -365,14 +377,21 @@ def test_to_file_int32(tmpdir, df_points, engine, driver, ext):
df = GeoDataFrame(geometry=geometry)
df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int32Dtype())
df.to_file(tempfilename, driver=driver, engine=engine)
df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
# the int column with missing values comes back as float
expected = df.copy()
expected["data"] = expected["data"].astype("float64")
assert_geodataframe_equal(df_read, expected, check_like=True)
tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
df2 = df.dropna()
df2.to_file(tempfilename2, driver=driver, engine=engine)
df2_read = GeoDataFrame.from_file(tempfilename2, engine=engine)
if engine == "pyogrio":
tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
df2 = df.dropna()
df2.to_file(tempfilename2, driver=driver, engine=engine)
df2_read = GeoDataFrame.from_file(tempfilename2, driver=driver, engine=engine)
assert df2_read["data"].dtype == "int32"
else:
# with the fiona engine the 32 bitwidth is not preserved
assert df2_read["data"].dtype == "int64"
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
@@ -382,8 +401,11 @@ def test_to_file_int64(tmpdir, df_points, engine, driver, ext):
df = GeoDataFrame(geometry=geometry)
df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int64Dtype())
df.to_file(tempfilename, driver=driver, engine=engine)
df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
# the int column with missing values comes back as float
expected = df.copy()
expected["data"] = expected["data"].astype("float64")
assert_geodataframe_equal(df_read, expected, check_like=True)
def test_to_file_empty(tmpdir, engine):
@@ -393,12 +415,6 @@ def test_to_file_empty(tmpdir, engine):
input_empty_df.to_file(tempfilename, engine=engine)
def test_to_file_privacy(tmpdir, df_nybb):
tempfilename = os.path.join(str(tmpdir), "test.shp")
with pytest.warns(FutureWarning):
geopandas.io.file.to_file(df_nybb, tempfilename)
def test_to_file_schema(tmpdir, df_nybb, engine):
"""
Ensure that the file is written according to the schema
@@ -431,12 +447,13 @@ def test_to_file_schema(tmpdir, df_nybb, engine):
assert result_schema == schema
def test_to_file_crs(tmpdir, engine):
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_to_file_crs(tmpdir, engine, nybb_filename):
"""
Ensure that the file is written according to the crs
if it is specified
"""
df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
df = read_file(nybb_filename, engine=engine)
tempfilename = os.path.join(str(tmpdir), "crs.shp")
# save correct CRS
@@ -445,7 +462,7 @@ def test_to_file_crs(tmpdir, engine):
assert result.crs == df.crs
if engine == "pyogrio":
with pytest.raises(ValueError, match="Passing 'crs' it not supported"):
with pytest.raises(ValueError, match="Passing 'crs' is not supported"):
df.to_file(tempfilename, crs=3857, engine=engine)
return
@@ -455,8 +472,7 @@ def test_to_file_crs(tmpdir, engine):
assert result.crs == "epsg:3857"
# specify CRS for gdf without one
df2 = df.copy()
df2.crs = None
df2 = df.set_crs(None, allow_override=True)
df2.to_file(tempfilename, crs=2263, engine=engine)
df = GeoDataFrame.from_file(tempfilename, engine=engine)
assert df.crs == "epsg:2263"
@@ -529,6 +545,7 @@ def test_mode_unsupported(tmpdir, df_nybb, engine):
df_nybb.to_file(tempfilename, mode="r", engine=engine)
@pytest.mark.filterwarnings("ignore:'crs' was not provided:UserWarning:pyogrio")
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
def test_empty_crs(tmpdir, driver, ext, engine):
"""Test handling of undefined CRS with GPKG driver (GH #1975)."""
@@ -548,7 +565,7 @@ def test_empty_crs(tmpdir, driver, ext, engine):
if ext == ".geojson":
# geojson by default assumes epsg:4326
df.crs = "EPSG:4326"
df.geometry.array.crs = "EPSG:4326"
assert_geodataframe_equal(result, df)
@@ -561,10 +578,11 @@ def test_empty_crs(tmpdir, driver, ext, engine):
NYBB_CRS = "epsg:2263"
def test_read_file(engine):
df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
def test_read_file(engine, nybb_filename):
df = read_file(nybb_filename, engine=engine)
validate_boro_df(df)
assert df.crs == NYBB_CRS
if HAS_PYPROJ:
assert df.crs == NYBB_CRS
expected_columns = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area"]
assert (df.columns[:-1] == expected_columns).all()
@@ -578,7 +596,7 @@ def test_read_file(engine):
"main/geopandas/tests/data/null_geom.geojson",
# url to zip file
"https://raw.githubusercontent.com/geopandas/geopandas/"
"main/geopandas/datasets/nybb_16a.zip",
"main/geopandas/tests/data/nybb_16a.zip",
# url to zipfile without extension
"https://geonode.goosocean.org/download/480",
# url to web service
@@ -596,6 +614,25 @@ def test_read_file_local_uri(file_path, engine):
assert isinstance(gdf, geopandas.GeoDataFrame)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_read_file_geojson_string_path(engine):
if engine == "pyogrio" and not PYOGRIO_GE_090:
pytest.skip("fixed in pyogrio 0.9.0")
expected = GeoDataFrame({"val_with_hash": ["row # 0"], "geometry": [Point(0, 1)]})
features = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {"val_with_hash": "row # 0"},
"geometry": {"type": "Point", "coordinates": [0.0, 1.0]},
}
],
}
df_read = read_file(json.dumps(features))
assert_geodataframe_equal(expected.set_crs("EPSG:4326"), df_read)
def test_read_file_textio(file_path, engine):
file_text_stream = open(file_path)
file_stringio = io.StringIO(open(file_path).read())
@@ -648,11 +685,11 @@ def test_read_file_tempfile(engine):
temp.close()
def test_read_binary_file_fsspec(engine):
def test_read_binary_file_fsspec(engine, nybb_filename):
fsspec = pytest.importorskip("fsspec")
# Remove the zip scheme so fsspec doesn't open as a zipped file,
# instead we want to read as bytes and let fiona decode it.
path = geopandas.datasets.get_path("nybb")[6:]
path = nybb_filename[6:]
with fsspec.open(path, "rb") as f:
gdf = read_file(f, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
@@ -665,10 +702,10 @@ def test_read_text_file_fsspec(file_path, engine):
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_infer_zipped_file(engine):
def test_infer_zipped_file(engine, nybb_filename):
# Remove the zip scheme so that the test for a zipped file can
# check it and add it back.
path = geopandas.datasets.get_path("nybb")[6:]
path = nybb_filename[6:]
gdf = read_file(path, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
@@ -683,15 +720,24 @@ def test_infer_zipped_file(engine):
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_allow_legacy_gdal_path(engine):
def test_allow_legacy_gdal_path(engine, nybb_filename):
# Construct a GDAL-style zip path.
path = "/vsizip/" + geopandas.datasets.get_path("nybb")[6:]
path = "/vsizip/" + nybb_filename[6:]
gdf = read_file(path, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_read_file_filtered__bbox(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
@pytest.mark.skipif(not PYOGRIO_GE_090, reason="bug fixed in pyogrio 0.9.0")
def test_read_file_with_hash_in_path(engine, nybb_filename, tmp_path):
folder_with_hash = tmp_path / "path with # present"
folder_with_hash.mkdir(exist_ok=True, parents=True)
read_path = folder_with_hash / "nybb.zip"
shutil.copy(nybb_filename[6:], read_path)
gdf = read_file(read_path, engine=engine)
assert isinstance(gdf, geopandas.GeoDataFrame)
def test_read_file_bbox_tuple(df_nybb, engine, nybb_filename):
bbox = (
1031051.7879884212,
224272.49231459625,
@@ -703,8 +749,7 @@ def test_read_file_filtered__bbox(df_nybb, engine):
assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))
def test_read_file_filtered__bbox__polygon(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_bbox_polygon(df_nybb, engine, nybb_filename):
bbox = box(
1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
)
@@ -713,14 +758,12 @@ def test_read_file_filtered__bbox__polygon(df_nybb, engine):
assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))
def test_read_file_filtered__rows(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_filtered__rows(df_nybb, engine, nybb_filename):
filtered_df = read_file(nybb_filename, rows=1, engine=engine)
assert_geodataframe_equal(filtered_df, df_nybb.iloc[[0], :])
def test_read_file_filtered__rows_slice(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_filtered__rows_slice(df_nybb, engine, nybb_filename):
filtered_df = read_file(nybb_filename, rows=slice(1, 3), engine=engine)
assert_geodataframe_equal(filtered_df, df_nybb.iloc[1:3, :].reset_index(drop=True))
@@ -728,21 +771,14 @@ def test_read_file_filtered__rows_slice(df_nybb, engine):
@pytest.mark.filterwarnings(
"ignore:Layer does not support OLC_FASTFEATURECOUNT:RuntimeWarning"
) # for the slice with -1
def test_read_file_filtered__rows_bbox(df_nybb, engine):
nybb_filename = geopandas.datasets.get_path("nybb")
def test_read_file_filtered__rows_bbox(df_nybb, engine, nybb_filename):
bbox = (
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
if engine == "pyogrio" and not PYOGRIO_GE_07:
with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"):
# combination bbox and rows (rows slice applied after bbox filtering!)
filtered_df = read_file(
nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
)
else: # fiona
if engine == "fiona":
# combination bbox and rows (rows slice applied after bbox filtering!)
filtered_df = read_file(
nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
@@ -768,16 +804,14 @@ def test_read_file_filtered__rows_bbox(df_nybb, engine):
)
def test_read_file_filtered_rows_invalid(engine):
def test_read_file_filtered_rows_invalid(engine, nybb_filename):
with pytest.raises(TypeError):
read_file(
geopandas.datasets.get_path("nybb"), rows="not_a_slice", engine=engine
)
read_file(nybb_filename, rows="not_a_slice", engine=engine)
def test_read_file__ignore_geometry(engine):
def test_read_file__ignore_geometry(engine, naturalearth_lowres):
pdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
ignore_geometry=True,
engine=engine,
)
@@ -785,20 +819,73 @@ def test_read_file__ignore_geometry(engine):
assert isinstance(pdf, pd.DataFrame) and not isinstance(pdf, geopandas.GeoDataFrame)
def test_read_file__ignore_all_fields(engine):
skip_pyogrio_not_supported(engine) # pyogrio has "columns" keyword instead
@pytest.mark.filterwarnings(
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
)
def test_read_file__ignore_fields(engine, naturalearth_lowres):
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
ignore_fields=["pop_est", "continent", "iso_a3", "gdp_md_est"],
engine=engine,
)
assert gdf.columns.tolist() == ["name", "geometry"]
@pytest.mark.filterwarnings(
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
)
def test_read_file__ignore_all_fields(engine, naturalearth_lowres):
gdf = geopandas.read_file(
naturalearth_lowres,
ignore_fields=["pop_est", "continent", "name", "iso_a3", "gdp_md_est"],
engine="fiona",
engine=engine,
)
assert gdf.columns.tolist() == ["geometry"]
def test_read_file__where_filter(engine):
def test_read_file_missing_geometry(tmpdir, engine):
filename = str(tmpdir / "test.csv")
expected = pd.DataFrame(
{"col1": np.array([1, 2, 3], dtype="int64"), "col2": ["a", "b", "c"]}
)
expected.to_csv(filename, index=False)
df = geopandas.read_file(filename, engine=engine)
# both engines read integers as strings; force back to original type
df["col1"] = df["col1"].astype("int64")
assert isinstance(df, pd.DataFrame)
assert not isinstance(df, geopandas.GeoDataFrame)
assert_frame_equal(df, expected)
def test_read_file_None_attribute(tmp_path, engine):
# Test added in context of https://github.com/geopandas/geopandas/issues/2901
test_path = tmp_path / "test.gpkg"
gdf = GeoDataFrame(
{"a": [None, None]}, geometry=[Point(1, 2), Point(3, 4)], crs=4326
)
gdf.to_file(test_path, engine=engine)
read_gdf = read_file(test_path, engine=engine)
assert_geodataframe_equal(gdf, read_gdf)
def test_read_csv_dtype(tmpdir, df_nybb):
filename = str(tmpdir / "test.csv")
df_nybb.to_csv(filename, index=False)
pdf = pd.read_csv(filename, dtype={"geometry": "geometry"})
assert pdf.geometry.dtype == "geometry"
def test_read_file__where_filter(engine, naturalearth_lowres):
if FIONA_GE_19 or engine == "pyogrio":
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
where="continent='Africa'",
engine=engine,
)
@@ -806,26 +893,75 @@ def test_read_file__where_filter(engine):
else:
with pytest.raises(NotImplementedError):
geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
naturalearth_lowres,
where="continent='Africa'",
engine="fiona",
)
@PYOGRIO_MARK
def test_read_file__columns():
# TODO: this is only support for pyogrio, but we could mimic it for fiona as well
def test_read_file__columns(engine, naturalearth_lowres):
if engine == "fiona" and not FIONA_GE_19:
pytest.skip("columns requires fiona 1.9+")
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres"),
columns=["name", "pop_est"],
engine="pyogrio",
naturalearth_lowres, columns=["name", "pop_est"], engine=engine
)
assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
def test_read_file__columns_empty(engine, naturalearth_lowres):
if engine == "fiona" and not FIONA_GE_19:
pytest.skip("columns requires fiona 1.9+")
gdf = geopandas.read_file(naturalearth_lowres, columns=[], engine=engine)
assert gdf.columns.tolist() == ["geometry"]
@pytest.mark.skipif(FIONA_GE_19 or not fiona, reason="test for fiona < 1.9")
def test_read_file__columns_old_fiona(naturalearth_lowres):
with pytest.raises(NotImplementedError):
geopandas.read_file(
naturalearth_lowres, columns=["name", "pop_est"], engine="fiona"
)
@pytest.mark.filterwarnings(
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
)
def test_read_file__include_fields(engine, naturalearth_lowres):
if engine == "fiona" and not FIONA_GE_19:
pytest.skip("columns requires fiona 1.9+")
gdf = geopandas.read_file(
naturalearth_lowres, include_fields=["name", "pop_est"], engine=engine
)
assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
@pytest.mark.skipif(not FIONA_GE_19, reason="columns requires fiona 1.9+")
def test_read_file__columns_conflicting_keywords(engine, naturalearth_lowres):
path = naturalearth_lowres
with pytest.raises(ValueError, match="Cannot specify both"):
geopandas.read_file(
path, include_fields=["name"], ignore_fields=["pop_est"], engine=engine
)
with pytest.raises(ValueError, match="Cannot specify both"):
geopandas.read_file(
path, columns=["name"], include_fields=["pop_est"], engine=engine
)
with pytest.raises(ValueError, match="Cannot specify both"):
geopandas.read_file(
path, columns=["name"], ignore_fields=["pop_est"], engine=engine
)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@pytest.mark.parametrize("file_like", [False, True])
def test_read_file_bbox_gdf(df_nybb, engine, nybb_filename, file_like):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
bbox = geopandas.GeoDataFrame(
geometry=[
box(
@@ -837,28 +973,41 @@ def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
],
crs=NYBB_CRS,
)
filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine)
infile = (
open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
)
filtered_df = read_file(infile, bbox=bbox, engine=engine)
filtered_df_shape = filtered_df.shape
assert full_df_shape != filtered_df_shape
assert filtered_df_shape == (2, 5)
def test_read_file_filtered_with_gdf_boundary__mask(df_nybb, engine):
skip_pyogrio_not_supported(engine)
gdf_mask = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
gdf = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_cities"),
mask=gdf_mask[gdf_mask.continent == "Africa"],
engine=engine,
)
filtered_df_shape = gdf.shape
assert filtered_df_shape == (57, 2)
def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
skip_pyogrio_not_supported(engine)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@pytest.mark.parametrize("file_like", [False, True])
def test_read_file_mask_gdf(df_nybb, engine, nybb_filename, file_like):
full_df_shape = df_nybb.shape
mask = geopandas.GeoDataFrame(
geometry=[
box(
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
],
crs=NYBB_CRS,
)
infile = (
open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
)
filtered_df = read_file(infile, mask=mask, engine=engine)
filtered_df_shape = filtered_df.shape
assert full_df_shape != filtered_df_shape
assert filtered_df_shape == (2, 5)
def test_read_file_mask_polygon(df_nybb, engine, nybb_filename):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
mask = box(
1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
)
@@ -868,10 +1017,25 @@ def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
assert filtered_df_shape == (2, 5)
def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
skip_pyogrio_not_supported(engine)
def test_read_file_mask_geojson(df_nybb, nybb_filename, engine):
full_df_shape = df_nybb.shape
mask = mapping(
box(
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
)
filtered_df = read_file(nybb_filename, mask=mask, engine=engine)
filtered_df_shape = filtered_df.shape
assert full_df_shape != filtered_df_shape
assert filtered_df_shape == (2, 5)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_read_file_bbox_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
bbox = geopandas.GeoDataFrame(
geometry=[
box(
@@ -890,10 +1054,9 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
assert filtered_df_shape == (2, 5)
def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engine):
skip_pyogrio_not_supported(engine)
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_read_file_mask_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
full_df_shape = df_nybb.shape
nybb_filename = geopandas.datasets.get_path("nybb")
mask = geopandas.GeoDataFrame(
geometry=[
box(
@@ -912,6 +1075,20 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engi
assert filtered_df_shape == (2, 5)
def test_read_file_bbox_mask_not_allowed(engine, nybb_filename):
bbox = (
1031051.7879884212,
224272.49231459625,
1047224.3104931959,
244317.30894023244,
)
mask = box(*bbox)
with pytest.raises(ValueError, match="mask and bbox can not be set together"):
read_file(nybb_filename, bbox=bbox, mask=mask)
@pytest.mark.filterwarnings(
"ignore:Layer 'b'test_empty'' does not have any features:UserWarning"
)
@@ -942,11 +1119,6 @@ def test_read_file_empty_shapefile(tmpdir, engine):
assert all(empty.columns == ["A", "Z", "geometry"])
def test_read_file_privacy(tmpdir, df_nybb):
with pytest.warns(FutureWarning):
geopandas.io.file.read_file(geopandas.datasets.get_path("nybb"))
class FileNumber(object):
def __init__(self, tmpdir, base, ext):
self.tmpdir = str(tmpdir)
@@ -1113,7 +1285,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
# index as string
df_p = df_points.copy()
df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
df.index = pd.TimedeltaIndex(range(len(df)), "days")
df.index = pd.to_timedelta(range(len(df)), unit="days")
# TODO: TimedeltaIndex is an invalid field type
df.index = df.index.astype(str)
do_checks(df, index_is_used=True)
@@ -1121,7 +1293,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
# unnamed DatetimeIndex
df_p = df_points.copy()
df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
df.index = pd.TimedeltaIndex(range(len(df)), "days") + pd.DatetimeIndex(
df.index = pd.to_timedelta(range(len(df)), unit="days") + pd.to_datetime(
["1999-12-27"] * len(df)
)
if driver == "ESRI Shapefile":
@@ -1152,6 +1324,54 @@ def test_write_read_file(test_file, engine):
os.remove(os.path.expanduser(test_file))
@pytest.mark.skipif(fiona is False, reason="Fiona not available")
@pytest.mark.skipif(FIONA_GE_19, reason="Fiona >= 1.9 supports metadata")
def test_to_file_metadata_unsupported_fiona_version(tmp_path, df_points):
metadata = {"title": "test"}
tmp_file = tmp_path / "test.gpkg"
match = "'metadata' keyword is only supported for Fiona >= 1.9"
with pytest.raises(NotImplementedError, match=match):
df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
@pytest.mark.skipif(not FIONA_GE_19, reason="only Fiona >= 1.9 supports metadata")
def test_to_file_metadata_supported_fiona_version(tmp_path, df_points):
metadata = {"title": "test"}
tmp_file = tmp_path / "test.gpkg"
df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
# Check that metadata is written to the file
with fiona.open(tmp_file) as src:
tags = src.tags()
assert tags == metadata
@pytest.mark.skipif(pyogrio is False, reason="Pyogrio not available")
def test_to_file_metadata_pyogrio(tmp_path, df_points):
metadata = {"title": "test"}
tmp_file = tmp_path / "test.gpkg"
df_points.to_file(tmp_file, driver="GPKG", engine="pyogrio", metadata=metadata)
# Check that metadata is written to the file
info = pyogrio.read_info(tmp_file)
layer_metadata = info["layer_metadata"]
assert layer_metadata == metadata
@pytest.mark.parametrize(
"driver, ext", [("ESRI Shapefile", ".shp"), ("GeoJSON", ".geojson")]
)
def test_to_file_metadata_unsupported_driver(driver, ext, tmpdir, df_points, engine):
metadata = {"title": "Test"}
tempfilename = os.path.join(str(tmpdir), "test" + ext)
with pytest.raises(
NotImplementedError, match="'metadata' keyword is only supported for"
):
df_points.to_file(tempfilename, driver=driver, metadata=metadata)
def test_multiple_geom_cols_error(tmpdir, df_nybb):
df_nybb["geom2"] = df_nybb.geometry
with pytest.raises(ValueError, match="GeoDataFrame contains multiple geometry"):
@@ -1160,7 +1380,7 @@ def test_multiple_geom_cols_error(tmpdir, df_nybb):
@PYOGRIO_MARK
@FIONA_MARK
def test_option_io_engine():
def test_option_io_engine(nybb_filename):
try:
geopandas.options.io_engine = "pyogrio"
@@ -1171,8 +1391,48 @@ def test_option_io_engine():
orig = fiona.supported_drivers["ESRI Shapefile"]
fiona.supported_drivers["ESRI Shapefile"] = "w"
nybb_filename = geopandas.datasets.get_path("nybb")
_ = geopandas.read_file(nybb_filename)
finally:
fiona.supported_drivers["ESRI Shapefile"] = orig
geopandas.options.io_engine = None
@pytest.mark.skipif(pyogrio, reason="test for pyogrio not installed")
def test_error_engine_unavailable_pyogrio(tmp_path, df_points, file_path):
with pytest.raises(ImportError, match="the 'read_file' function requires"):
geopandas.read_file(file_path, engine="pyogrio")
with pytest.raises(ImportError, match="the 'to_file' method requires"):
df_points.to_file(tmp_path / "test.gpkg", engine="pyogrio")
@pytest.mark.skipif(fiona, reason="test for fiona not installed")
def test_error_engine_unavailable_fiona(tmp_path, df_points, file_path):
with pytest.raises(ImportError, match="the 'read_file' function requires"):
geopandas.read_file(file_path, engine="fiona")
with pytest.raises(ImportError, match="the 'to_file' method requires"):
df_points.to_file(tmp_path / "test.gpkg", engine="fiona")
@PYOGRIO_MARK
def test_list_layers(df_points, tmpdir):
tempfilename = os.path.join(str(tmpdir), "dataset.gpkg")
df_points.to_file(tempfilename, layer="original")
df_points.set_geometry(df_points.buffer(1)).to_file(tempfilename, layer="buffered")
df_points.set_geometry(df_points.buffer(2).boundary).to_file(
tempfilename, layer="boundary"
)
pyogrio.write_dataframe(
df_points[["value1", "value2"]], tempfilename, layer="non-spatial"
)
layers = geopandas.list_layers(tempfilename)
expected = pd.DataFrame(
{
"name": ["original", "buffered", "boundary", "non-spatial"],
"geometry_type": ["Point", "Polygon", "LineString", None],
}
)
assert_frame_equal(layers, expected)

View File

@@ -12,11 +12,10 @@ from shapely.geometry import (
import geopandas
from geopandas import GeoDataFrame
from geopandas.testing import assert_geodataframe_equal
import pytest
from .test_file import FIONA_MARK, PYOGRIO_MARK
import pytest
from geopandas.testing import assert_geodataframe_equal
# Credit: Polygons below come from Montreal city Open Data portal
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
@@ -244,7 +243,14 @@ def geodataframe(request):
return request.param
@pytest.fixture(params=["GeoJSON", "ESRI Shapefile", "GPKG", "SQLite"])
@pytest.fixture(
params=[
("GeoJSON", ".geojson"),
("ESRI Shapefile", ".shp"),
("GPKG", ".gpkg"),
("SQLite", ".sqlite"),
]
)
def ogr_driver(request):
return request.param
@@ -260,16 +266,18 @@ def engine(request):
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
output_file = os.path.join(str(tmpdir), "output_file")
driver, ext = ogr_driver
output_file = os.path.join(str(tmpdir), "output_file" + ext)
write_kwargs = {}
if ogr_driver == "SQLite":
if driver == "SQLite":
write_kwargs["spatialite"] = True
# This if statement can be removed once minimal fiona version >= 1.8.20
if engine == "fiona":
import fiona
from packaging.version import Version
import fiona
if Version(fiona.__version__) < Version("1.8.20"):
pytest.skip("SQLite driver only available from version 1.8.20")
@@ -285,22 +293,35 @@ def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
):
write_kwargs["geometry_type"] = "Point Z"
expected_error = _expected_error_on(geodataframe, ogr_driver)
expected_error = _expected_error_on(geodataframe, driver)
if expected_error:
with pytest.raises(
RuntimeError, match="Failed to write record|Could not add feature to layer"
):
geodataframe.to_file(
output_file, driver=ogr_driver, engine=engine, **write_kwargs
output_file, driver=driver, engine=engine, **write_kwargs
)
else:
geodataframe.to_file(
output_file, driver=ogr_driver, engine=engine, **write_kwargs
)
if driver == "SQLite" and engine == "pyogrio":
try:
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
except ValueError as e:
if "unrecognized option 'SPATIALITE'" in str(e):
pytest.xfail(
"pyogrio wheels from PyPI do not come with SpatiaLite support. "
f"Error: {e}"
)
raise
else:
geodataframe.to_file(
output_file, driver=driver, engine=engine, **write_kwargs
)
reloaded = geopandas.read_file(output_file, engine=engine)
if ogr_driver == "GeoJSON" and engine == "pyogrio":
if driver == "GeoJSON" and engine == "pyogrio":
# For GeoJSON files, the int64 column comes back as int32
reloaded["a"] = reloaded["a"].astype("int64")

View File

@@ -1,5 +1,8 @@
from collections import OrderedDict
import numpy as np
import pandas as pd
from shapely.geometry import (
LineString,
MultiLineString,
@@ -9,12 +12,11 @@ from shapely.geometry import (
Polygon,
)
import pandas as pd
import pytest
import numpy as np
from geopandas import GeoDataFrame
from geopandas.io.file import infer_schema
import pytest
# Credit: Polygons below come from Montreal city Open Data portal
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
city_hall_boundaries = Polygon(

View File

@@ -2,7 +2,7 @@
See generate_legacy_storage_files.py for the creation of the legacy files.
"""
from contextlib import contextmanager
import glob
import os
import pathlib
@@ -11,9 +11,6 @@ import pandas as pd
import pytest
from geopandas.testing import assert_geodataframe_equal
from geopandas import _compat as compat
import geopandas
from shapely.geometry import Point
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
@@ -34,18 +31,7 @@ def legacy_pickle(request):
return request.param
@contextmanager
def with_use_pygeos(option):
orig = geopandas.options.use_pygeos
geopandas.options.use_pygeos = option
try:
yield
finally:
geopandas.options.use_pygeos = orig
@pytest.mark.skipif(
compat.USE_SHAPELY_20 or compat.USE_PYGEOS,
@pytest.mark.skip(
reason=(
"shapely 2.0/pygeos-based unpickling currently only works for "
"shapely-2.0/pygeos-written files"
@@ -68,43 +54,3 @@ def test_round_trip_current(tmpdir, current_pickle_data):
result = pd.read_pickle(path)
assert_geodataframe_equal(result, value)
assert isinstance(result.has_sindex, bool)
def _create_gdf():
return geopandas.GeoDataFrame(
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
crs="EPSG:4326",
)
@pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745")
def test_pygeos_switch(tmpdir):
# writing and reading with pygeos disabled
with with_use_pygeos(False):
gdf = _create_gdf()
path = str(tmpdir / "gdf_crs1.pickle")
gdf.to_pickle(path)
result = pd.read_pickle(path)
assert_geodataframe_equal(result, gdf)
# writing without pygeos, reading with pygeos
with with_use_pygeos(False):
gdf = _create_gdf()
path = str(tmpdir / "gdf_crs1.pickle")
gdf.to_pickle(path)
with with_use_pygeos(True):
result = pd.read_pickle(path)
gdf = _create_gdf()
assert_geodataframe_equal(result, gdf)
# writing with pygeos, reading without pygeos
with with_use_pygeos(True):
gdf = _create_gdf()
path = str(tmpdir / "gdf_crs1.pickle")
gdf.to_pickle(path)
with with_use_pygeos(False):
result = pd.read_pickle(path)
gdf = _create_gdf()
assert_geodataframe_equal(result, gdf)

View File

@@ -4,18 +4,27 @@ The spatial database tests may not work without additional system
configuration. postGIS tests require a test database to have been setup;
see geopandas.tests.util for more information.
"""
import os
import warnings
from importlib.util import find_spec
import pandas as pd
import geopandas
from geopandas import GeoDataFrame, read_file, read_postgis
import geopandas._compat as compat
from geopandas.io.sql import _get_conn as get_conn, _write_postgis as write_postgis
from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df
from geopandas import GeoDataFrame, read_file, read_postgis
from geopandas._compat import HAS_PYPROJ
from geopandas.io.sql import _get_conn as get_conn
from geopandas.io.sql import _write_postgis as write_postgis
import pytest
from geopandas.tests.util import (
create_postgis,
create_spatialite,
mock,
validate_boro_df,
)
try:
from sqlalchemy import text
@@ -26,31 +35,48 @@ except ImportError:
@pytest.fixture
def df_nybb():
nybb_path = geopandas.datasets.get_path("nybb")
df = read_file(nybb_path)
def df_nybb(nybb_filename):
df = read_file(nybb_filename)
return df
@pytest.fixture()
def connection_postgis():
def check_available_postgis_drivers() -> list[str]:
"""Work out which of psycopg2 and psycopg are available.
This prevents tests running if the relevant package isn't installed
(rather than being skipped, as skips are treated as failures during postgis CI)
"""
Initiates a connection to a postGIS database that must already exist.
See create_postgis for more information.
"""
psycopg2 = pytest.importorskip("psycopg2")
from psycopg2 import OperationalError
drivers = []
if find_spec("psycopg"):
drivers.append("psycopg")
if find_spec("psycopg2"):
drivers.append("psycopg2")
return drivers
POSTGIS_DRIVERS = check_available_postgis_drivers()
def prepare_database_credentials() -> dict:
"""Gather postgres connection credentials from environment variables."""
return {
"dbname": "test_geopandas",
"user": os.environ.get("PGUSER"),
"password": os.environ.get("PGPASSWORD"),
"host": os.environ.get("PGHOST"),
"port": os.environ.get("PGPORT"),
}
@pytest.fixture()
def connection_postgis(request):
"""Create a postgres connection using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
psycopg = pytest.importorskip(request.param)
dbname = "test_geopandas"
user = os.environ.get("PGUSER")
password = os.environ.get("PGPASSWORD")
host = os.environ.get("PGHOST")
port = os.environ.get("PGPORT")
try:
con = psycopg2.connect(
dbname=dbname, user=user, password=password, host=host, port=port
)
except OperationalError:
con = psycopg.connect(**prepare_database_credentials())
except psycopg.OperationalError:
pytest.skip("Cannot connect with postgresql database")
with warnings.catch_warnings():
warnings.filterwarnings(
@@ -61,28 +87,25 @@ def connection_postgis():
@pytest.fixture()
def engine_postgis():
def engine_postgis(request):
"""
Initiates a connection engine to a postGIS database that must already exist.
Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
"""
sqlalchemy = pytest.importorskip("sqlalchemy")
from sqlalchemy.engine.url import URL
user = os.environ.get("PGUSER")
password = os.environ.get("PGPASSWORD")
host = os.environ.get("PGHOST")
port = os.environ.get("PGPORT")
dbname = "test_geopandas"
credentials = prepare_database_credentials()
try:
con = sqlalchemy.create_engine(
URL.create(
drivername="postgresql+psycopg2",
username=user,
database=dbname,
password=password,
host=host,
port=port,
drivername=f"postgresql+{request.param}",
username=credentials["user"],
database=credentials["dbname"],
password=credentials["password"],
host=credentials["host"],
port=credentials["port"],
)
)
con.connect()
@@ -140,7 +163,7 @@ def drop_table_if_exists(conn_or_engine, table):
@pytest.fixture
def df_mixed_single_and_multi():
from shapely.geometry import Point, LineString, MultiLineString
from shapely.geometry import LineString, MultiLineString, Point
df = geopandas.GeoDataFrame(
{
@@ -157,7 +180,7 @@ def df_mixed_single_and_multi():
@pytest.fixture
def df_geom_collection():
from shapely.geometry import Point, LineString, Polygon, GeometryCollection
from shapely.geometry import GeometryCollection, LineString, Point, Polygon
df = geopandas.GeoDataFrame(
{
@@ -188,7 +211,7 @@ def df_linear_ring():
@pytest.fixture
def df_3D_geoms():
from shapely.geometry import Point, LineString, Polygon
from shapely.geometry import LineString, Point, Polygon
df = geopandas.GeoDataFrame(
{
@@ -204,6 +227,7 @@ def df_3D_geoms():
class TestIO:
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_get_conn(self, engine_postgis):
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
@@ -217,6 +241,7 @@ class TestIO:
with get_conn(object()):
pass
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_default(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
@@ -229,6 +254,7 @@ class TestIO:
# by user; should not be set to 0, as from get_srid failure
assert df.crs is None
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
con = connection_postgis
geom_col = "the_geom"
@@ -239,6 +265,7 @@ class TestIO:
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
con = connection_postgis
@@ -254,6 +281,7 @@ class TestIO:
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
con = connection_postgis
@@ -267,6 +295,7 @@ class TestIO:
validate_boro_df(df)
assert df.crs == crs
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
"""Tests that a user specified CRS overrides the geodatabase SRID."""
con = connection_postgis
@@ -279,6 +308,7 @@ class TestIO:
validate_boro_df(df)
assert df.crs == orig_crs
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_from_postgis_default(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
@@ -288,6 +318,7 @@ class TestIO:
validate_boro_df(df, case_sensitive=False)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
con = connection_postgis
geom_col = "the_geom"
@@ -323,6 +354,7 @@ class TestIO:
df = read_postgis(sql, con, geom_col=geom_col)
validate_boro_df(df)
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
"""Test chunksize argument"""
chunksize = 2
@@ -337,14 +369,7 @@ class TestIO:
# by user; should not be set to 0, as from get_srid failure
assert df.crs is None
def test_read_postgis_privacy(self, connection_postgis, df_nybb):
con = connection_postgis
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
with pytest.warns(FutureWarning):
geopandas.io.sql.read_postgis(sql, con)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_default(self, engine_postgis, df_nybb):
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
engine = engine_postgis
@@ -360,6 +385,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
engine = engine_postgis
@@ -375,6 +401,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
with engine_postgis.begin() as con:
@@ -390,6 +417,7 @@ class TestIO:
df = read_postgis(sql, con, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that uploading the same table raises error when: if_replace='fail'.
@@ -409,6 +437,7 @@ class TestIO:
else:
raise e
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that replacing a table is possible when: if_replace='replace'.
@@ -426,6 +455,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
"""
Tests that appending to existing table produces correct results when:
@@ -445,15 +475,18 @@ class TestIO:
# There should be twice as many rows in the new table
assert new_rows == orig_rows * 2, (
"There should be {target} rows,"
"found: {current}".format(target=orig_rows * 2, current=new_rows),
"There should be {target} rows,found: {current}".format(
target=orig_rows * 2, current=new_rows
),
)
# Number of columns should stay the same
assert new_cols == orig_cols, (
"There should be {target} columns,"
"found: {current}".format(target=orig_cols, current=new_cols),
"There should be {target} columns,found: {current}".format(
target=orig_cols, current=new_cols
),
)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
"""
Tests that GeoDataFrame can be written to PostGIS without CRS information.
@@ -463,8 +496,7 @@ class TestIO:
table = "nybb"
# Write to db
df_nybb = df_nybb
df_nybb.crs = None
df_nybb.geometry.array.crs = None
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# Validate that srid is -1
@@ -477,6 +509,7 @@ class TestIO:
target_srid = conn.execute(sql).fetchone()[0]
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
"""
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
@@ -499,6 +532,7 @@ class TestIO:
target_srid = conn.execute(sql).fetchone()[0]
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_geometry_collection(
self, engine_postgis, df_geom_collection
):
@@ -525,6 +559,7 @@ class TestIO:
assert geom_type.upper() == "GEOMETRYCOLLECTION"
assert df.geom_type.unique()[0] == "GeometryCollection"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_mixed_geometry_types(
self, engine_postgis, df_mixed_single_and_multi
):
@@ -551,6 +586,7 @@ class TestIO:
assert res[1][0].upper() == "MULTILINESTRING"
assert res[2][0].upper() == "POINT"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
"""
Tests that writing a LinearRing.
@@ -572,6 +608,7 @@ class TestIO:
assert geom_type.upper() == "LINESTRING"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
"""
Tests writing a LinearRing works.
@@ -605,6 +642,7 @@ class TestIO:
assert res[1][0].upper() == "MULTILINESTRING"
assert res[2][0].upper() == "POINT"
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
"""
Tests writing data to alternative schema.
@@ -628,6 +666,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_to_different_schema_when_table_exists(
self, engine_postgis, df_nybb
):
@@ -672,6 +711,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
"""
Tests writing a geometries with 3 dimensions works.
@@ -687,6 +727,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
assert list(df.geometry.has_z) == [True, True, True]
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_row_order(self, engine_postgis, df_nybb):
"""
Tests that the row order in db table follows the order of the original frame.
@@ -703,6 +744,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
assert df["BoroCode"].tolist() == correct_order
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_before_table_exists(self, engine_postgis, df_nybb):
"""
Tests that insert works with if_exists='append' when table does not exist yet.
@@ -720,6 +762,7 @@ class TestIO:
df = read_postgis(sql, engine, geom_col="geometry")
validate_boro_df(df)
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_with_different_crs(self, engine_postgis, df_nybb):
"""
Tests that the warning is raised if table CRS differs from frame.
@@ -736,9 +779,26 @@ class TestIO:
with pytest.raises(ValueError, match="CRS of the target table"):
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
def test_append_without_crs(self, engine_postgis, df_nybb):
# This test was included in #3328 when the default value for no
# CRS was changed from an SRID of -1 to 0. This resolves issues
# of appending dataframes to postgis that have no CRS as postgis
# no CRS value is 0.
engine = engine_postgis
df_nybb = df_nybb.set_crs(None, allow_override=True)
table = "nybb"
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
# append another dataframe with no crs
df_nybb2 = df_nybb
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
@pytest.mark.xfail(
compat.PANDAS_GE_20 and not compat.PANDAS_GE_21,
reason="Duplicate columns are dropped in read_sql with pandas 2.0.x",
compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
)
def test_duplicate_geometry_column_fails(self, engine_postgis):
"""
@@ -750,3 +810,69 @@ class TestIO:
with pytest.raises(ValueError):
read_postgis(sql, engine, geom_col="geom")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="esri:54052")
create_postgis(con, df_nybb, srid=54052)
sql = "SELECT * FROM nybb;"
df = read_postgis(sql, con)
validate_boro_df(df)
assert df.crs == "ESRI:54052"
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
@mock.patch("shapely.get_srid")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
# mock a non-existent srid for edge case if shapely has an srid
# not present in postgis table.
pyproj = pytest.importorskip("pyproj")
mock_get_srid.return_value = 99999
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="epsg:4326")
create_postgis(con, df_nybb)
sql = "SELECT * FROM nybb;"
with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
with pytest.warns(UserWarning, match="Could not find srid 99999"):
read_postgis(sql, con)
@mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_no_spatial_ref_sys_table_in_postgis(
self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
):
# mock for a non-existent spatial_ref_sys database
mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="epsg:4326")
create_postgis(con, df_nybb, srid=4326)
sql = "SELECT * FROM nybb;"
with pytest.warns(
UserWarning, match="Could not find the spatial reference system table"
):
df = read_postgis(sql, con)
assert df.crs == "EPSG:4326"
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
"""Test chunksize argument with non epsg crs"""
chunksize = 2
con = connection_postgis
df_nybb = df_nybb.to_crs(crs="esri:54052")
create_postgis(con, df_nybb, srid=54052)
sql = "SELECT * FROM nybb;"
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
validate_boro_df(df)
assert df.crs == "ESRI:54052"

View File

@@ -1,32 +1,16 @@
import warnings
from packaging.version import Version
import numpy as np
import pandas as pd
from pandas.plotting import PlotAccessor
from pandas import CategoricalDtype
from pandas.plotting import PlotAccessor
import geopandas
from packaging.version import Version
from ._decorator import doc
def deprecated(new, warning_type=FutureWarning):
"""Helper to provide deprecation warning."""
def old(*args, **kwargs):
warnings.warn(
"{} is intended for internal ".format(new.__name__[1:])
+ "use only, and will be deprecated.",
warning_type,
stacklevel=2,
)
new(*args, **kwargs)
return old
def _sanitize_geoms(geoms, prefix="Multi"):
"""
Returns Series like geoms and index, except that any Multi geometries
@@ -76,17 +60,11 @@ def _expand_kwargs(kwargs, multiindex):
it (in place) to the correct length/formats with help of 'multiindex', unless
the value appears to already be a valid (single) value for the key.
"""
import matplotlib
from matplotlib.colors import is_color_like
from typing import Iterable
mpl = Version(matplotlib.__version__)
if mpl >= Version("3.4"):
# alpha is supported as array argument with matplotlib 3.4+
scalar_kwargs = ["marker", "path_effects"]
else:
scalar_kwargs = ["marker", "alpha", "path_effects"]
from matplotlib.colors import is_color_like
scalar_kwargs = ["marker", "path_effects"]
for att, value in kwargs.items():
if "color" in att: # color(s), edgecolor(s), facecolor(s)
if is_color_like(value):
@@ -134,7 +112,15 @@ def _PolygonPatch(polygon, **kwargs):
def _plot_polygon_collection(
ax, geoms, values=None, color=None, cmap=None, vmin=None, vmax=None, **kwargs
ax,
geoms,
values=None,
color=None,
cmap=None,
vmin=None,
vmax=None,
autolim=True,
**kwargs,
):
"""
Plots a collection of Polygon and MultiPolygon geometries to `ax`
@@ -155,6 +141,8 @@ def _plot_polygon_collection(
Color to fill the polygons. Cannot be used together with `values`.
color : single color or sequence of `N` colors
Sets both `edgecolor` and `facecolor`
autolim : bool (default True)
Update axes data limits to contain the new geometries.
**kwargs
Additional keyword arguments passed to the collection
@@ -189,16 +177,21 @@ def _plot_polygon_collection(
if "norm" not in kwargs:
collection.set_clim(vmin, vmax)
ax.add_collection(collection, autolim=True)
ax.add_collection(collection, autolim=autolim)
ax.autoscale_view()
return collection
plot_polygon_collection = deprecated(_plot_polygon_collection)
def _plot_linestring_collection(
ax, geoms, values=None, color=None, cmap=None, vmin=None, vmax=None, **kwargs
ax,
geoms,
values=None,
color=None,
cmap=None,
vmin=None,
vmax=None,
autolim=True,
**kwargs,
):
"""
Plots a collection of LineString and MultiLineString geometries to `ax`
@@ -214,6 +207,8 @@ def _plot_linestring_collection(
have 1:1 correspondence with the geometries (not their components).
color : single color or sequence of `N` colors
Cannot be used together with `values`.
autolim : bool (default True)
Update axes data limits to contain the new geometries.
Returns
-------
@@ -247,14 +242,11 @@ def _plot_linestring_collection(
if "norm" not in kwargs:
collection.set_clim(vmin, vmax)
ax.add_collection(collection, autolim=True)
ax.add_collection(collection, autolim=autolim)
ax.autoscale_view()
return collection
plot_linestring_collection = deprecated(_plot_linestring_collection)
def _plot_point_collection(
ax,
geoms,
@@ -318,11 +310,15 @@ def _plot_point_collection(
return collection
plot_point_collection = deprecated(_plot_point_collection)
def plot_series(
s, cmap=None, color=None, ax=None, figsize=None, aspect="auto", **style_kwds
s,
cmap=None,
color=None,
ax=None,
figsize=None,
aspect="auto",
autolim=True,
**style_kwds,
):
"""
Plot a GeoSeries.
@@ -358,6 +354,8 @@ def plot_series(
square appears square in the middle of the plot. This implies an
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
also be set manually (float) as the ratio of y-unit to x-unit.
autolim : bool (default True)
Update axes data limits to contain the new geometries.
**style_kwds : dict
Color options to be passed on to the actual plot function, such
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
@@ -367,22 +365,6 @@ def plot_series(
-------
ax : matplotlib axes instance
"""
if "colormap" in style_kwds:
warnings.warn(
"'colormap' is deprecated, please use 'cmap' instead "
"(for consistency with matplotlib)",
FutureWarning,
stacklevel=3,
)
cmap = style_kwds.pop("colormap")
if "axes" in style_kwds:
warnings.warn(
"'axes' is deprecated, please use 'ax' instead "
"(for consistency with pandas)",
FutureWarning,
stacklevel=3,
)
ax = style_kwds.pop("axes")
try:
import matplotlib.pyplot as plt
@@ -468,7 +450,13 @@ def plot_series(
values_ = values[poly_idx] if cmap else None
_plot_polygon_collection(
ax, polys, values_, facecolor=facecolor, cmap=cmap, **style_kwds
ax,
polys,
values_,
facecolor=facecolor,
cmap=cmap,
autolim=autolim,
**style_kwds,
)
# plot all LineStrings and MultiLineString components in same collection
@@ -478,7 +466,7 @@ def plot_series(
color_ = expl_color[line_idx] if color_given else color
_plot_linestring_collection(
ax, lines, values_, color=color_, cmap=cmap, **style_kwds
ax, lines, values_, color=color_, cmap=cmap, autolim=autolim, **style_kwds
)
# plot all Points in the same collection
@@ -491,7 +479,7 @@ def plot_series(
ax, points, values_, color=color_, cmap=cmap, **style_kwds
)
plt.draw()
ax.figure.canvas.draw_idle()
return ax
@@ -515,6 +503,7 @@ def plot_dataframe(
classification_kwds=None,
missing_kwds=None,
aspect="auto",
autolim=True,
**style_kwds,
):
"""
@@ -618,7 +607,8 @@ def plot_dataframe(
square appears square in the middle of the plot. This implies an
Equirectangular projection. If None, the aspect of `ax` won't be changed. It can
also be set manually (float) as the ratio of y-unit to x-unit.
autolim : bool (default True)
Update axes data limits to contain the new geometries.
**style_kwds : dict
Style options to be passed on to the actual plot function, such
as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
@@ -645,22 +635,6 @@ def plot_dataframe(
See the User Guide page :doc:`../../user_guide/mapping` for details.
"""
if "colormap" in style_kwds:
warnings.warn(
"'colormap' is deprecated, please use 'cmap' instead "
"(for consistency with matplotlib)",
FutureWarning,
stacklevel=3,
)
cmap = style_kwds.pop("colormap")
if "axes" in style_kwds:
warnings.warn(
"'axes' is deprecated, please use 'ax' instead "
"(for consistency with pandas)",
FutureWarning,
stacklevel=3,
)
ax = style_kwds.pop("axes")
if column is not None and color is not None:
warnings.warn(
"Only specify one of 'column' or 'color'. Using 'color'.",
@@ -721,6 +695,7 @@ def plot_dataframe(
figsize=figsize,
markersize=markersize,
aspect=aspect,
autolim=autolim,
**style_kwds,
)
@@ -860,7 +835,14 @@ def plot_dataframe(
subset = values[poly_idx & np.invert(nan_idx)]
if not polys.empty:
_plot_polygon_collection(
ax, polys, subset, vmin=mn, vmax=mx, cmap=cmap, **style_kwds
ax,
polys,
subset,
vmin=mn,
vmax=mx,
cmap=cmap,
autolim=autolim,
**style_kwds,
)
# plot all LineStrings and MultiLineString components in same collection
@@ -868,7 +850,14 @@ def plot_dataframe(
subset = values[line_idx & np.invert(nan_idx)]
if not lines.empty:
_plot_linestring_collection(
ax, lines, subset, vmin=mn, vmax=mx, cmap=cmap, **style_kwds
ax,
lines,
subset,
vmin=mn,
vmax=mx,
cmap=cmap,
autolim=autolim,
**style_kwds,
)
# plot all Points in the same collection
@@ -906,9 +895,9 @@ def plot_dataframe(
if "fmt" in legend_kwds:
legend_kwds.pop("fmt")
from matplotlib.lines import Line2D
from matplotlib.colors import Normalize
from matplotlib import cm
from matplotlib.colors import Normalize
from matplotlib.lines import Line2D
norm = style_kwds.get("norm", None)
if not norm:
@@ -918,7 +907,7 @@ def plot_dataframe(
if scheme is not None:
categories = labels
patches = []
for value, cat in enumerate(categories):
for i in range(len(categories)):
patches.append(
Line2D(
[0],
@@ -927,7 +916,7 @@ def plot_dataframe(
marker="o",
alpha=style_kwds.get("alpha", 1),
markersize=10,
markerfacecolor=n_cmap.to_rgba(value),
markerfacecolor=n_cmap.to_rgba(i),
markeredgewidth=0,
)
)
@@ -964,7 +953,7 @@ def plot_dataframe(
n_cmap.set_array(np.array([]))
ax.get_figure().colorbar(n_cmap, **legend_kwds)
plt.draw()
ax.figure.canvas.draw_idle()
return ax

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,13 @@
"""
Testing functionality for geopandas objects.
"""
import warnings
import pandas as pd
from geopandas import GeoDataFrame, GeoSeries
from geopandas.array import GeometryDtype
from geopandas import _vectorized
def _isna(this):
@@ -189,8 +189,8 @@ def assert_geoseries_equal(
)
if normalize:
left = GeoSeries(_vectorized.normalize(left.array._data))
right = GeoSeries(_vectorized.normalize(right.array._data))
left = GeoSeries(left.array.normalize())
right = GeoSeries(right.array.normalize())
if not check_crs:
with warnings.catch_warnings():
@@ -322,7 +322,7 @@ def assert_geodataframe_equal(
)
if check_like:
left, right = left.reindex_like(right), right
left = left.reindex_like(right)
# column comparison
assert_index_equal(

View File

@@ -1,9 +1,3 @@
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "Name": "Null Geometry" }, "geometry": null },
{ "type": "Feature", "properties": { "Name": "SF to NY" }, "geometry": { "type": "LineString", "coordinates": [ [ -122.4051293283311, 37.786780113640894 ], [ -73.859832357849271, 40.487594916296196 ] ] } }
]
}
version https://git-lfs.github.com/spec/v1
oid sha256:5e87f89afda555a1b1d43d9dc12864169d6d0149a4f222be12d40a6a86ad8066
size 506

View File

@@ -13,8 +13,8 @@ def test_no_additional_imports():
# "fiona",
# "matplotlib", # matplotlib gets imported by pandas, see below
"mapclassify",
# 'rtree', # rtree actually gets imported if installed
"sqlalchemy",
"psycopg",
"psycopg2",
"geopy",
"geoalchemy2",
@@ -34,5 +34,5 @@ if mods:
blacklist
)
call = [sys.executable, "-c", code]
returncode = subprocess.run(call).returncode
returncode = subprocess.run(call, check=False).returncode
assert returncode == 0

View File

@@ -1,34 +1,30 @@
import random
import warnings
import numpy as np
import pandas as pd
from pyproj import CRS
import shapely
import shapely.affinity
import shapely.geometry
from shapely.geometry.base import CAP_STYLE, JOIN_STYLE, BaseGeometry
import shapely.wkb
import shapely.wkt
try:
from shapely import geos_version
except ImportError:
from shapely._buildcfg import geos_version
from shapely import geos_version
from shapely.geometry.base import CAP_STYLE, JOIN_STYLE
import geopandas
from geopandas._compat import HAS_PYPROJ
from geopandas.array import (
GeometryArray,
_check_crs,
_crs_mismatch_warn,
from_shapely,
from_wkb,
from_wkt,
points_from_xy,
to_wkb,
to_wkt,
_check_crs,
_crs_mismatch_warn,
)
import geopandas._compat as compat
import pytest
@@ -143,11 +139,8 @@ def test_from_wkb():
assert all(v.equals(t) for v, t in zip(res, points_no_missing))
# missing values
# TODO(pygeos) does not support empty strings, np.nan, or pd.NA
# TODO(shapely) does not support empty strings, np.nan, or pd.NA
missing_values = [None]
if not (compat.USE_SHAPELY_20 or compat.USE_PYGEOS):
missing_values.extend([b"", np.nan])
missing_values.append(pd.NA)
res = from_wkb(missing_values)
np.testing.assert_array_equal(res, np.full(len(missing_values), None))
@@ -170,6 +163,24 @@ def test_from_wkb_hex():
assert isinstance(res, GeometryArray)
def test_from_wkb_on_invalid():
# Single point LineString hex WKB: invalid
invalid_wkb_hex = "01020000000100000000000000000008400000000000000840"
message = "point array must contain 0 or >1 elements"
with pytest.raises(Exception, match=message):
from_wkb([invalid_wkb_hex], on_invalid="raise")
with pytest.warns(Warning, match=message):
res = from_wkb([invalid_wkb_hex], on_invalid="warn")
assert res == [None]
with warnings.catch_warnings():
warnings.simplefilter("error")
res = from_wkb([invalid_wkb_hex], on_invalid="ignore")
assert res == [None]
def test_to_wkb():
P = from_shapely(points_no_missing)
res = to_wkb(P)
@@ -211,13 +222,10 @@ def test_from_wkt(string_type):
assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing))
# missing values
# TODO(pygeos) does not support empty strings, np.nan, or pd.NA
# TODO(shapely) does not support empty strings, np.nan, or pd.NA
missing_values = [None]
if not (compat.USE_SHAPELY_20 or compat.USE_PYGEOS):
missing_values.extend([f(""), np.nan])
missing_values.append(pd.NA)
res = from_wkb(missing_values)
res = from_wkt(missing_values)
np.testing.assert_array_equal(res, np.full(len(missing_values), None))
# single MultiPolygon
@@ -228,6 +236,24 @@ def test_from_wkt(string_type):
assert res[0] == multi_poly
def test_from_wkt_on_invalid():
# Single point LineString WKT: invalid
invalid_wkt = "LINESTRING(0 0)"
message = "point array must contain 0 or >1 elements"
with pytest.raises(Exception, match=message):
from_wkt([invalid_wkt], on_invalid="raise")
with pytest.warns(Warning, match=message):
res = from_wkt([invalid_wkt], on_invalid="warn")
assert res == [None]
with warnings.catch_warnings():
warnings.simplefilter("error")
res = from_wkt([invalid_wkt], on_invalid="ignore")
assert res == [None]
def test_to_wkt():
P = from_shapely(points_no_missing)
res = to_wkt(P, rounding_precision=-1)
@@ -241,22 +267,6 @@ def test_to_wkt():
assert res[0] is None
def test_data():
arr = from_shapely(points_no_missing)
with pytest.warns(DeprecationWarning):
np_arr = arr.data
assert isinstance(np_arr, np.ndarray)
if compat.USE_PYGEOS:
np_arr2 = arr.to_numpy()
assert isinstance(np_arr2[0], BaseGeometry)
np_arr3 = np.asarray(arr)
assert isinstance(np_arr3[0], BaseGeometry)
else:
assert arr.to_numpy() is np_arr
assert np.asarray(arr) is np_arr
def test_as_array():
arr = from_shapely(points_no_missing)
np_arr1 = np.asarray(arr)
@@ -281,6 +291,9 @@ def test_as_array():
("geom_almost_equals", (3,)),
],
)
# filters required for attr=geom_almost_equals only
@pytest.mark.filterwarnings(r"ignore:The \'geom_almost_equals\(\)\' method is deprecat")
@pytest.mark.filterwarnings(r"ignore:The \'almost_equals\(\)\' method is deprecated")
def test_predicates_vector_scalar(attr, args):
na_value = False
@@ -293,9 +306,11 @@ def test_predicates_vector_scalar(attr, args):
assert result.dtype == bool
expected = [
getattr(tri, attr if "geom" not in attr else attr[5:])(other, *args)
if tri is not None
else na_value
(
getattr(tri, attr if "geom" not in attr else attr[5:])(other, *args)
if tri is not None
else na_value
)
for tri in triangles
]
@@ -320,6 +335,9 @@ def test_predicates_vector_scalar(attr, args):
("geom_almost_equals", (3,)),
],
)
# filters required for attr=geom_almost_equals only
@pytest.mark.filterwarnings(r"ignore:The \'geom_almost_equals\(\)\' method is deprecat")
@pytest.mark.filterwarnings(r"ignore:The \'almost_equals\(\)\' method is deprecated")
def test_predicates_vector_vector(attr, args):
na_value = False
empty_value = True if attr == "disjoint" else False
@@ -449,17 +467,12 @@ def test_binary_geo_scalar(attr):
"is_simple",
"has_z",
# for is_ring we raise a warning about the value for Polygon changing
pytest.param(
"is_ring",
marks=[
pytest.mark.filterwarnings("ignore:is_ring:FutureWarning"),
],
),
"is_ring",
],
)
def test_unary_predicates(attr):
na_value = False
if attr == "is_simple" and geos_version < (3, 8) and not compat.USE_PYGEOS:
if attr == "is_simple" and geos_version < (3, 8):
# poly.is_simple raises an error for empty polygon for GEOS < 3.8
with pytest.raises(Exception): # noqa: B017
T.is_simple
@@ -471,40 +484,17 @@ def test_unary_predicates(attr):
result = getattr(V, attr)
if attr == "is_simple" and geos_version < (3, 8):
# poly.is_simple raises an error for empty polygon for GEOS < 3.8
# with shapely, pygeos always returns False for all GEOS versions
if attr == "is_ring":
expected = [
getattr(t, attr) if t is not None and not t.is_empty else na_value
getattr(t, attr) if t is not None and t.exterior is not None else na_value
for t in vals
]
elif attr == "is_ring":
expected = [
getattr(t.exterior, attr)
if t is not None and t.exterior is not None
else na_value
for t in vals
]
# empty Linearring.is_ring gives False with Shapely < 2.0
if compat.USE_PYGEOS and not compat.SHAPELY_GE_20:
expected[-2] = True
elif (
attr == "is_closed"
and compat.USE_PYGEOS
and compat.SHAPELY_GE_182
and not compat.SHAPELY_GE_20
):
# In shapely 1.8.2, is_closed was changed to return always True for
# Polygon/MultiPolygon, while PyGEOS returns always False
expected = [False] * len(vals)
else:
expected = [getattr(t, attr) if t is not None else na_value for t in vals]
assert result.tolist() == expected
# for is_ring we raise a warning about the value for Polygon changing
@pytest.mark.filterwarnings("ignore:is_ring:FutureWarning")
def test_is_ring():
g = [
shapely.geometry.LinearRing([(0, 0), (1, 1), (1, -1)]),
@@ -514,11 +504,7 @@ def test_is_ring():
shapely.wkt.loads("POLYGON EMPTY"),
None,
]
expected = [True, False, True, True, True, False]
if not compat.USE_PYGEOS and not compat.SHAPELY_GE_20:
# empty polygon is_ring gives False with Shapely < 2.0
expected[-2] = False
expected = [True, False, True, False, False, False]
result = from_shapely(g).is_ring
assert result.tolist() == expected
@@ -561,9 +547,11 @@ def test_binary_distance():
# vector - vector
result = P[: len(T)].distance(T[::-1])
expected = [
getattr(p, attr)(t)
if not ((t is None or t.is_empty) or (p is None or p.is_empty))
else na_value
(
getattr(p, attr)(t)
if not ((t is None or t.is_empty) or (p is None or p.is_empty))
else na_value
)
for t, p in zip(triangles[::-1], points)
]
np.testing.assert_allclose(result, expected)
@@ -620,9 +608,11 @@ def test_binary_project(normalized):
result = L.project(P, normalized=normalized)
expected = [
line.project(p, normalized=normalized)
if line is not None and p is not None
else na_value
(
line.project(p, normalized=normalized)
if line is not None and p is not None
else na_value
)
for p, line in zip(points, lines)
]
np.testing.assert_allclose(result, expected)
@@ -632,16 +622,15 @@ def test_binary_project(normalized):
@pytest.mark.parametrize("join_style", [JOIN_STYLE.round, JOIN_STYLE.bevel])
@pytest.mark.parametrize("resolution", [16, 25])
def test_buffer(resolution, cap_style, join_style):
if compat.USE_PYGEOS:
# TODO(pygeos) need to further investigate why this test fails
if cap_style == 1 and join_style == 3:
pytest.skip("failing TODO")
na_value = None
expected = [
p.buffer(0.1, resolution=resolution, cap_style=cap_style, join_style=join_style)
if p is not None
else na_value
(
p.buffer(
0.1, resolution=resolution, cap_style=cap_style, join_style=join_style
)
if p is not None
else na_value
)
for p in points
]
result = P.buffer(
@@ -676,10 +665,32 @@ def test_unary_union():
shapely.geometry.Polygon([(0, 0), (1, 0), (1, 1)]),
]
G = from_shapely(geoms)
u = G.unary_union()
with pytest.warns(
DeprecationWarning, match="The 'unary_union' attribute is deprecated"
):
u = G.unary_union()
expected = shapely.geometry.Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
assert u.equals(expected)
assert u.equals(G.union_all())
def test_union_all():
geoms = [
shapely.geometry.Polygon([(0, 0), (0, 1), (1, 1)]),
shapely.geometry.Polygon([(0, 0), (1, 0), (1, 1)]),
]
G = from_shapely(geoms)
u = G.union_all()
expected = shapely.geometry.Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
assert u.equals(expected)
u_cov = G.union_all(method="coverage")
assert u_cov.equals(expected)
with pytest.raises(ValueError, match="Method 'invalid' not recognized."):
G.union_all(method="invalid")
@pytest.mark.parametrize(
@@ -810,7 +821,7 @@ def test_setitem(item):
def test_equality_ops():
with pytest.raises(ValueError):
P[:5] == P[:7]
_ = P[:5] == P[:7]
a1 = from_shapely([points[1], points[2], points[3]])
a2 = from_shapely([points[1], points[0], points[3]])
@@ -833,7 +844,7 @@ def test_equality_ops():
def test_dir():
assert "contains" in dir(P)
assert "data" in dir(P)
assert "to_numpy" in dir(P)
def test_chaining():
@@ -894,6 +905,7 @@ def test_astype_multipolygon():
assert result[0] == multi_poly.wkt[:10]
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_check_crs():
t1 = T.copy()
t1.crs = 4326
@@ -902,6 +914,7 @@ def test_check_crs():
assert _check_crs(t1, T, allow_none=True) is True
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
def test_crs_mismatch_warn():
t1 = T.copy()
t2 = T.copy()
@@ -921,6 +934,14 @@ def test_crs_mismatch_warn():
_crs_mismatch_warn(t1, T)
@pytest.mark.skipif(HAS_PYPROJ, reason="pyproj installed")
def test_missing_pyproj():
with pytest.warns(UserWarning, match="Cannot set the CRS, falling back to None"):
t = T.copy()
t.crs = 4326
assert t.crs is None
@pytest.mark.parametrize("NA", [None, np.nan])
def test_isna(NA):
t1 = T.copy()
@@ -948,6 +969,24 @@ def test_unique_has_crs():
assert t.unique().crs == t.crs
@pytest.mark.skipif(HAS_PYPROJ, reason="pyproj installed")
def test_to_crs_pyproj_error():
t = T.copy()
t.crs = 4326
with pytest.raises(
ImportError, match="The 'pyproj' package is required for to_crs"
):
t.to_crs(3857)
@pytest.mark.skipif(HAS_PYPROJ, reason="pyproj installed")
def test_estimate_utm_crs_pyproj_error():
with pytest.raises(
ImportError, match="The 'pyproj' package is required for estimate_utm_crs"
):
T.estimate_utm_crs()
class TestEstimateUtmCrs:
def setup_method(self):
self.esb = shapely.geometry.Point(-73.9847, 40.7484)
@@ -955,15 +994,21 @@ class TestEstimateUtmCrs:
self.landmarks = from_shapely([self.esb, self.sol], crs="epsg:4326")
def test_estimate_utm_crs__geographic(self):
assert self.landmarks.estimate_utm_crs() == CRS("EPSG:32618")
assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918")
pyproj = pytest.importorskip("pyproj")
assert self.landmarks.estimate_utm_crs() == pyproj.CRS("EPSG:32618")
assert self.landmarks.estimate_utm_crs("NAD83") == pyproj.CRS("EPSG:26918")
def test_estimate_utm_crs__projected(self):
assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == CRS(
pyproj = pytest.importorskip("pyproj")
assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == pyproj.CRS(
"EPSG:32618"
)
def test_estimate_utm_crs__antimeridian(self):
pyproj = pytest.importorskip("pyproj")
antimeridian = from_shapely(
[
shapely.geometry.Point(1722483.900174921, 5228058.6143420935),
@@ -971,15 +1016,19 @@ class TestEstimateUtmCrs:
],
crs="EPSG:3851",
)
assert antimeridian.estimate_utm_crs() == CRS("EPSG:32760")
assert antimeridian.estimate_utm_crs() == pyproj.CRS("EPSG:32760")
def test_estimate_utm_crs__out_of_bounds(self):
pytest.importorskip("pyproj")
with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"):
from_shapely(
[shapely.geometry.Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326"
).estimate_utm_crs()
def test_estimate_utm_crs__missing_crs(self):
pytest.importorskip("pyproj")
with pytest.raises(RuntimeError, match="crs must be set"):
from_shapely(
[shapely.geometry.Polygon([(0, 90), (1, 90), (2, 90)])]

View File

@@ -1,7 +1,7 @@
import pytest
from geopandas._compat import import_optional_dependency
import pytest
def test_import_optional_dependency_present():
# pandas is not optional, but we know it is present

Some files were not shown because too many files have changed in this diff Show More