venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -2,7 +2,6 @@ from packaging.version import Version
|
||||
|
||||
import pyarrow
|
||||
|
||||
|
||||
_ERROR_MSG = """\
|
||||
Disallowed deserialization of 'arrow.py_extension_type':
|
||||
storage_type = {storage_type}
|
||||
|
||||
@@ -1,19 +1,31 @@
|
||||
from packaging.version import Version
|
||||
import json
|
||||
import warnings
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame, Series
|
||||
|
||||
import geopandas._compat as compat
|
||||
from geopandas._compat import import_optional_dependency
|
||||
from geopandas.array import from_wkb
|
||||
from geopandas import GeoDataFrame
|
||||
import shapely
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas._compat import import_optional_dependency
|
||||
from geopandas.array import from_shapely, from_wkb
|
||||
|
||||
from .file import _expand_user
|
||||
|
||||
METADATA_VERSION = "1.0.0"
|
||||
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0"]
|
||||
SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0", "1.1.0"]
|
||||
GEOARROW_ENCODINGS = [
|
||||
"point",
|
||||
"linestring",
|
||||
"polygon",
|
||||
"multipoint",
|
||||
"multilinestring",
|
||||
"multipolygon",
|
||||
]
|
||||
SUPPORTED_ENCODINGS = ["WKB"] + GEOARROW_ENCODINGS
|
||||
|
||||
# reference: https://github.com/opengeospatial/geoparquet
|
||||
|
||||
# Metadata structure:
|
||||
@@ -68,7 +80,40 @@ def _remove_id_from_member_of_ensembles(json_dict):
|
||||
member.pop("id", None)
|
||||
|
||||
|
||||
def _create_metadata(df, schema_version=None):
|
||||
# type ids 0 to 7
|
||||
_geometry_type_names = [
|
||||
"Point",
|
||||
"LineString",
|
||||
"LineString",
|
||||
"Polygon",
|
||||
"MultiPoint",
|
||||
"MultiLineString",
|
||||
"MultiPolygon",
|
||||
"GeometryCollection",
|
||||
]
|
||||
_geometry_type_names += [geom_type + " Z" for geom_type in _geometry_type_names]
|
||||
|
||||
|
||||
def _get_geometry_types(series):
|
||||
"""
|
||||
Get unique geometry types from a GeoSeries.
|
||||
"""
|
||||
arr_geometry_types = shapely.get_type_id(series.array._data)
|
||||
# ensure to include "... Z" for 3D geometries
|
||||
has_z = shapely.has_z(series.array._data)
|
||||
arr_geometry_types[has_z] += 8
|
||||
|
||||
geometry_types = Series(arr_geometry_types).unique().tolist()
|
||||
# drop missing values (shapely.get_type_id returns -1 for those)
|
||||
if -1 in geometry_types:
|
||||
geometry_types.remove(-1)
|
||||
|
||||
return sorted([_geometry_type_names[idx] for idx in geometry_types])
|
||||
|
||||
|
||||
def _create_metadata(
|
||||
df, schema_version=None, geometry_encoding=None, write_covering_bbox=False
|
||||
):
|
||||
"""Create and encode geo metadata dict.
|
||||
|
||||
Parameters
|
||||
@@ -77,13 +122,22 @@ def _create_metadata(df, schema_version=None):
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
write_covering_bbox : bool, default False
|
||||
Writes the bounding box column for each row entry with column
|
||||
name 'bbox'. Writing a bbox column can be computationally
|
||||
expensive, hence is default setting is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
"""
|
||||
|
||||
schema_version = schema_version or METADATA_VERSION
|
||||
if schema_version is None:
|
||||
if geometry_encoding and any(
|
||||
encoding != "WKB" for encoding in geometry_encoding.values()
|
||||
):
|
||||
schema_version = "1.1.0"
|
||||
else:
|
||||
schema_version = METADATA_VERSION
|
||||
|
||||
if schema_version not in SUPPORTED_VERSIONS:
|
||||
raise ValueError(
|
||||
@@ -94,7 +148,8 @@ def _create_metadata(df, schema_version=None):
|
||||
column_metadata = {}
|
||||
for col in df.columns[df.dtypes == "geometry"]:
|
||||
series = df[col]
|
||||
geometry_types = sorted(Series(series.geom_type.unique()).dropna())
|
||||
|
||||
geometry_types = _get_geometry_types(series)
|
||||
if schema_version[0] == "0":
|
||||
geometry_types_name = "geometry_type"
|
||||
if len(geometry_types) == 1:
|
||||
@@ -111,7 +166,7 @@ def _create_metadata(df, schema_version=None):
|
||||
_remove_id_from_member_of_ensembles(crs)
|
||||
|
||||
column_metadata[col] = {
|
||||
"encoding": "WKB",
|
||||
"encoding": geometry_encoding[col],
|
||||
"crs": crs,
|
||||
geometry_types_name: geometry_types,
|
||||
}
|
||||
@@ -121,10 +176,20 @@ def _create_metadata(df, schema_version=None):
|
||||
# don't add bbox with NaNs for empty / all-NA geometry column
|
||||
column_metadata[col]["bbox"] = bbox
|
||||
|
||||
if write_covering_bbox:
|
||||
column_metadata[col]["covering"] = {
|
||||
"bbox": {
|
||||
"xmin": ["bbox", "xmin"],
|
||||
"ymin": ["bbox", "ymin"],
|
||||
"xmax": ["bbox", "xmax"],
|
||||
"ymax": ["bbox", "ymax"],
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"primary_column": df._geometry_column_name,
|
||||
"columns": column_metadata,
|
||||
"version": schema_version or METADATA_VERSION,
|
||||
"version": schema_version,
|
||||
"creator": {"library": "geopandas", "version": geopandas.__version__},
|
||||
}
|
||||
|
||||
@@ -188,7 +253,7 @@ def _validate_dataframe(df):
|
||||
raise ValueError("Index level names must be strings")
|
||||
|
||||
|
||||
def _validate_metadata(metadata):
|
||||
def _validate_geo_metadata(metadata):
|
||||
"""Validate geo metadata.
|
||||
Must not be empty, and must contain the structure specified above.
|
||||
|
||||
@@ -232,8 +297,12 @@ def _validate_metadata(metadata):
|
||||
"'{key}' for column '{col}'".format(key=key, col=col)
|
||||
)
|
||||
|
||||
if column_metadata["encoding"] != "WKB":
|
||||
raise ValueError("Only WKB geometry encoding is supported")
|
||||
if column_metadata["encoding"] not in SUPPORTED_ENCODINGS:
|
||||
raise ValueError(
|
||||
"Only WKB geometry encoding or one of the native encodings "
|
||||
f"({GEOARROW_ENCODINGS!r}) are supported, "
|
||||
f"got: {column_metadata['encoding']}"
|
||||
)
|
||||
|
||||
if column_metadata.get("edges", "planar") == "spherical":
|
||||
warnings.warn(
|
||||
@@ -245,37 +314,59 @@ def _validate_metadata(metadata):
|
||||
stacklevel=4,
|
||||
)
|
||||
|
||||
if "covering" in column_metadata:
|
||||
covering = column_metadata["covering"]
|
||||
if "bbox" in covering:
|
||||
bbox = covering["bbox"]
|
||||
for var in ["xmin", "ymin", "xmax", "ymax"]:
|
||||
if var not in bbox.keys():
|
||||
raise ValueError("Metadata for bbox column is malformed.")
|
||||
|
||||
def _geopandas_to_arrow(df, index=None, schema_version=None):
|
||||
|
||||
def _geopandas_to_arrow(
|
||||
df,
|
||||
index=None,
|
||||
geometry_encoding="WKB",
|
||||
schema_version=None,
|
||||
write_covering_bbox=None,
|
||||
):
|
||||
"""
|
||||
Helper function with main, shared logic for to_parquet/to_feather.
|
||||
"""
|
||||
from pyarrow import Table
|
||||
from pyarrow import StructArray
|
||||
|
||||
from geopandas.io._geoarrow import geopandas_to_arrow
|
||||
|
||||
_validate_dataframe(df)
|
||||
|
||||
# create geo metadata before altering incoming data frame
|
||||
geo_metadata = _create_metadata(df, schema_version=schema_version)
|
||||
if schema_version is not None:
|
||||
if geometry_encoding != "WKB" and schema_version != "1.1.0":
|
||||
raise ValueError(
|
||||
"'geoarrow' encoding is only supported with schema version >= 1.1.0"
|
||||
)
|
||||
|
||||
kwargs = {}
|
||||
if compat.USE_SHAPELY_20:
|
||||
kwargs = {"flavor": "iso"}
|
||||
else:
|
||||
for col in df.columns[df.dtypes == "geometry"]:
|
||||
series = df[col]
|
||||
if series.has_z.any():
|
||||
warnings.warn(
|
||||
"The GeoDataFrame contains 3D geometries, and when using "
|
||||
"shapely < 2.0, such geometries will be written not exactly "
|
||||
"following to the GeoParquet spec (not using ISO WKB). For "
|
||||
"most use cases this should not be a problem (GeoPandas can "
|
||||
"read such files fine).",
|
||||
stacklevel=2,
|
||||
)
|
||||
break
|
||||
df = df.to_wkb(**kwargs)
|
||||
table, geometry_encoding_dict = geopandas_to_arrow(
|
||||
df, geometry_encoding=geometry_encoding, index=index, interleaved=False
|
||||
)
|
||||
geo_metadata = _create_metadata(
|
||||
df,
|
||||
schema_version=schema_version,
|
||||
geometry_encoding=geometry_encoding_dict,
|
||||
write_covering_bbox=write_covering_bbox,
|
||||
)
|
||||
|
||||
table = Table.from_pandas(df, preserve_index=index)
|
||||
if write_covering_bbox:
|
||||
if "bbox" in df.columns:
|
||||
raise ValueError(
|
||||
"An existing column 'bbox' already exists in the dataframe. "
|
||||
"Please rename to write covering bbox."
|
||||
)
|
||||
bounds = df.bounds
|
||||
bbox_array = StructArray.from_arrays(
|
||||
[bounds["minx"], bounds["miny"], bounds["maxx"], bounds["maxy"]],
|
||||
names=["xmin", "ymin", "xmax", "ymax"],
|
||||
)
|
||||
table = table.append_column("bbox", bbox_array)
|
||||
|
||||
# Store geopandas specific file-level metadata
|
||||
# This must be done AFTER creating the table or it is not persisted
|
||||
@@ -286,7 +377,14 @@ def _geopandas_to_arrow(df, index=None, schema_version=None):
|
||||
|
||||
|
||||
def _to_parquet(
|
||||
df, path, index=None, compression="snappy", schema_version=None, **kwargs
|
||||
df,
|
||||
path,
|
||||
index=None,
|
||||
compression="snappy",
|
||||
geometry_encoding="WKB",
|
||||
schema_version=None,
|
||||
write_covering_bbox=False,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write a GeoDataFrame to the Parquet format.
|
||||
@@ -312,9 +410,17 @@ def _to_parquet(
|
||||
output except `RangeIndex` which is stored as metadata only.
|
||||
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
|
||||
Name of the compression to use. Use ``None`` for no compression.
|
||||
geometry_encoding : {'WKB', 'geoarrow'}, default 'WKB'
|
||||
The encoding to use for the geometry columns. Defaults to "WKB"
|
||||
for maximum interoperability. Specify "geoarrow" to use one of the
|
||||
native GeoArrow-based single-geometry type encodings.
|
||||
schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
|
||||
GeoParquet specification version; if not provided will default to
|
||||
latest supported version.
|
||||
write_covering_bbox : bool, default False
|
||||
Writes the bounding box column for each row entry with column
|
||||
name 'bbox'. Writing a bbox column can be computationally
|
||||
expensive, hence is default setting is False.
|
||||
**kwargs
|
||||
Additional keyword arguments passed to pyarrow.parquet.write_table().
|
||||
"""
|
||||
@@ -322,19 +428,14 @@ def _to_parquet(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
|
||||
if kwargs and "version" in kwargs and kwargs["version"] is not None:
|
||||
if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
|
||||
warnings.warn(
|
||||
"the `version` parameter has been replaced with `schema_version`. "
|
||||
"`version` will instead be passed directly to the underlying "
|
||||
"parquet writer unless `version` is 0.1.0 or 0.4.0.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
schema_version = kwargs.pop("version")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
|
||||
table = _geopandas_to_arrow(
|
||||
df,
|
||||
index=index,
|
||||
geometry_encoding=geometry_encoding,
|
||||
schema_version=schema_version,
|
||||
write_covering_bbox=write_covering_bbox,
|
||||
)
|
||||
parquet.write_table(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
@@ -379,47 +480,26 @@ def _to_feather(df, path, index=None, compression=None, schema_version=None, **k
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
if kwargs and "version" in kwargs and kwargs["version"] is not None:
|
||||
if schema_version is None and kwargs["version"] in SUPPORTED_VERSIONS:
|
||||
warnings.warn(
|
||||
"the `version` parameter has been replaced with `schema_version`. "
|
||||
"`version` will instead be passed directly to the underlying "
|
||||
"feather writer unless `version` is 0.1.0 or 0.4.0.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
schema_version = kwargs.pop("version")
|
||||
|
||||
path = _expand_user(path)
|
||||
table = _geopandas_to_arrow(df, index=index, schema_version=schema_version)
|
||||
feather.write_feather(table, path, compression=compression, **kwargs)
|
||||
|
||||
|
||||
def _arrow_to_geopandas(table, metadata=None):
|
||||
def _arrow_to_geopandas(table, geo_metadata=None):
|
||||
"""
|
||||
Helper function with main, shared logic for read_parquet/read_feather.
|
||||
"""
|
||||
df = table.to_pandas()
|
||||
|
||||
metadata = metadata or table.schema.metadata
|
||||
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
raise ValueError(
|
||||
"""Missing geo metadata in Parquet/Feather file.
|
||||
Use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
try:
|
||||
metadata = _decode_metadata(metadata.get(b"geo", b""))
|
||||
|
||||
except (TypeError, json.decoder.JSONDecodeError):
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
_validate_metadata(metadata)
|
||||
if geo_metadata is None:
|
||||
# Note: this path of not passing metadata is also used by dask-geopandas
|
||||
geo_metadata = _validate_and_decode_metadata(table.schema.metadata)
|
||||
|
||||
# Find all geometry columns that were read from the file. May
|
||||
# be a subset if 'columns' parameter is used.
|
||||
geometry_columns = df.columns.intersection(metadata["columns"])
|
||||
geometry_columns = [
|
||||
col for col in geo_metadata["columns"] if col in table.column_names
|
||||
]
|
||||
result_column_names = list(table.slice(0, 0).to_pandas().columns)
|
||||
geometry_columns.sort(key=result_column_names.index)
|
||||
|
||||
if not len(geometry_columns):
|
||||
raise ValueError(
|
||||
@@ -428,7 +508,7 @@ def _arrow_to_geopandas(table, metadata=None):
|
||||
use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
geometry = metadata["primary_column"]
|
||||
geometry = geo_metadata["primary_column"]
|
||||
|
||||
# Missing geometry likely indicates a subset of columns was read;
|
||||
# promote the first available geometry to the primary geometry.
|
||||
@@ -443,9 +523,12 @@ def _arrow_to_geopandas(table, metadata=None):
|
||||
stacklevel=3,
|
||||
)
|
||||
|
||||
table_attr = table.drop(geometry_columns)
|
||||
df = table_attr.to_pandas()
|
||||
|
||||
# Convert the WKB columns that are present back to geometry.
|
||||
for col in geometry_columns:
|
||||
col_metadata = metadata["columns"][col]
|
||||
col_metadata = geo_metadata["columns"][col]
|
||||
if "crs" in col_metadata:
|
||||
crs = col_metadata["crs"]
|
||||
if isinstance(crs, dict):
|
||||
@@ -455,7 +538,19 @@ def _arrow_to_geopandas(table, metadata=None):
|
||||
# OGC:CRS84
|
||||
crs = "OGC:CRS84"
|
||||
|
||||
df[col] = from_wkb(df[col].values, crs=crs)
|
||||
if col_metadata["encoding"] == "WKB":
|
||||
geom_arr = from_wkb(np.array(table[col]), crs=crs)
|
||||
else:
|
||||
from geopandas.io._geoarrow import construct_shapely_array
|
||||
|
||||
geom_arr = from_shapely(
|
||||
construct_shapely_array(
|
||||
table[col].combine_chunks(), "geoarrow." + col_metadata["encoding"]
|
||||
),
|
||||
crs=crs,
|
||||
)
|
||||
|
||||
df.insert(result_column_names.index(col), col, geom_arr)
|
||||
|
||||
return GeoDataFrame(df, geometry=geometry)
|
||||
|
||||
@@ -521,7 +616,59 @@ def _ensure_arrow_fs(filesystem):
|
||||
return filesystem
|
||||
|
||||
|
||||
def _read_parquet(path, columns=None, storage_options=None, **kwargs):
|
||||
def _validate_and_decode_metadata(metadata):
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
raise ValueError(
|
||||
"""Missing geo metadata in Parquet/Feather file.
|
||||
Use pandas.read_parquet/read_feather() instead."""
|
||||
)
|
||||
|
||||
# check for malformed metadata
|
||||
try:
|
||||
decoded_geo_metadata = _decode_metadata(metadata.get(b"geo", b""))
|
||||
except (TypeError, json.decoder.JSONDecodeError):
|
||||
raise ValueError("Missing or malformed geo metadata in Parquet/Feather file")
|
||||
|
||||
_validate_geo_metadata(decoded_geo_metadata)
|
||||
return decoded_geo_metadata
|
||||
|
||||
|
||||
def _read_parquet_schema_and_metadata(path, filesystem):
|
||||
"""
|
||||
Opening the Parquet file/dataset a first time to get the schema and metadata.
|
||||
|
||||
TODO: we should look into how we can reuse opened dataset for reading the
|
||||
actual data, to avoid discovering the dataset twice (problem right now is
|
||||
that the ParquetDataset interface doesn't allow passing the filters on read)
|
||||
|
||||
"""
|
||||
import pyarrow
|
||||
from pyarrow import parquet
|
||||
|
||||
kwargs = {}
|
||||
if Version(pyarrow.__version__) < Version("15.0.0"):
|
||||
kwargs = dict(use_legacy_dataset=False)
|
||||
|
||||
try:
|
||||
schema = parquet.ParquetDataset(path, filesystem=filesystem, **kwargs).schema
|
||||
except Exception:
|
||||
schema = parquet.read_schema(path, filesystem=filesystem)
|
||||
|
||||
metadata = schema.metadata
|
||||
|
||||
# read metadata separately to get the raw Parquet FileMetaData metadata
|
||||
# (pyarrow doesn't properly exposes those in schema.metadata for files
|
||||
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
|
||||
if metadata is None or b"geo" not in metadata:
|
||||
try:
|
||||
metadata = parquet.read_metadata(path, filesystem=filesystem).metadata
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return schema, metadata
|
||||
|
||||
|
||||
def _read_parquet(path, columns=None, storage_options=None, bbox=None, **kwargs):
|
||||
"""
|
||||
Load a Parquet object from the file path, returning a GeoDataFrame.
|
||||
|
||||
@@ -565,8 +712,13 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
|
||||
both ``pyarrow.fs`` and ``fsspec`` (e.g. "s3://") then the ``pyarrow.fs``
|
||||
filesystem is preferred. Provide the instantiated fsspec filesystem using
|
||||
the ``filesystem`` keyword if you wish to use its implementation.
|
||||
bbox : tuple, optional
|
||||
Bounding box to be used to filter selection from geoparquet data. This
|
||||
is only usable if the data was saved with the bbox covering metadata.
|
||||
Input is of the tuple format (xmin, ymin, xmax, ymax).
|
||||
|
||||
**kwargs
|
||||
Any additional kwargs passed to pyarrow.parquet.read_table().
|
||||
Any additional kwargs passed to :func:`pyarrow.parquet.read_table`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -595,29 +747,36 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
|
||||
filesystem, path = _get_filesystem_path(
|
||||
path, filesystem=filesystem, storage_options=storage_options
|
||||
)
|
||||
|
||||
path = _expand_user(path)
|
||||
schema, metadata = _read_parquet_schema_and_metadata(path, filesystem)
|
||||
|
||||
geo_metadata = _validate_and_decode_metadata(metadata)
|
||||
|
||||
bbox_filter = (
|
||||
_get_parquet_bbox_filter(geo_metadata, bbox) if bbox is not None else None
|
||||
)
|
||||
|
||||
if_bbox_column_exists = _check_if_covering_in_geo_metadata(geo_metadata)
|
||||
|
||||
# by default, bbox column is not read in, so must specify which
|
||||
# columns are read in if it exists.
|
||||
if not columns and if_bbox_column_exists:
|
||||
columns = _get_non_bbox_columns(schema, geo_metadata)
|
||||
|
||||
# if both bbox and filters kwargs are used, must splice together.
|
||||
if "filters" in kwargs:
|
||||
filters_kwarg = kwargs.pop("filters")
|
||||
filters = _splice_bbox_and_filters(filters_kwarg, bbox_filter)
|
||||
else:
|
||||
filters = bbox_filter
|
||||
|
||||
kwargs["use_pandas_metadata"] = True
|
||||
table = parquet.read_table(path, columns=columns, filesystem=filesystem, **kwargs)
|
||||
|
||||
# read metadata separately to get the raw Parquet FileMetaData metadata
|
||||
# (pyarrow doesn't properly exposes those in schema.metadata for files
|
||||
# created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688)
|
||||
metadata = None
|
||||
if table.schema.metadata is None or b"geo" not in table.schema.metadata:
|
||||
try:
|
||||
# read_metadata does not accept a filesystem keyword, so need to
|
||||
# handle this manually (https://issues.apache.org/jira/browse/ARROW-16719)
|
||||
if filesystem is not None:
|
||||
pa_filesystem = _ensure_arrow_fs(filesystem)
|
||||
with pa_filesystem.open_input_file(path) as source:
|
||||
metadata = parquet.read_metadata(source).metadata
|
||||
else:
|
||||
metadata = parquet.read_metadata(path).metadata
|
||||
except Exception:
|
||||
pass
|
||||
table = parquet.read_table(
|
||||
path, columns=columns, filesystem=filesystem, filters=filters, **kwargs
|
||||
)
|
||||
|
||||
return _arrow_to_geopandas(table, metadata)
|
||||
return _arrow_to_geopandas(table, geo_metadata)
|
||||
|
||||
|
||||
def _read_feather(path, columns=None, **kwargs):
|
||||
@@ -677,11 +836,78 @@ def _read_feather(path, columns=None, **kwargs):
|
||||
)
|
||||
# TODO move this into `import_optional_dependency`
|
||||
import pyarrow
|
||||
|
||||
import geopandas.io._pyarrow_hotfix # noqa: F401
|
||||
|
||||
if Version(pyarrow.__version__) < Version("0.17.0"):
|
||||
raise ImportError("pyarrow >= 0.17 required for Feather support")
|
||||
|
||||
path = _expand_user(path)
|
||||
|
||||
table = feather.read_table(path, columns=columns, **kwargs)
|
||||
return _arrow_to_geopandas(table)
|
||||
|
||||
|
||||
def _get_parquet_bbox_filter(geo_metadata, bbox):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
|
||||
if _check_if_covering_in_geo_metadata(geo_metadata):
|
||||
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
|
||||
return _convert_bbox_to_parquet_filter(bbox, bbox_column_name)
|
||||
|
||||
elif geo_metadata["columns"][primary_column]["encoding"] == "point":
|
||||
import pyarrow.compute as pc
|
||||
|
||||
return (
|
||||
(pc.field((primary_column, "x")) >= bbox[0])
|
||||
& (pc.field((primary_column, "x")) <= bbox[2])
|
||||
& (pc.field((primary_column, "y")) >= bbox[1])
|
||||
& (pc.field((primary_column, "y")) <= bbox[3])
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Specifying 'bbox' not supported for this Parquet file (it should either "
|
||||
"have a bbox covering column or use 'point' encoding)."
|
||||
)
|
||||
|
||||
|
||||
def _convert_bbox_to_parquet_filter(bbox, bbox_column_name):
|
||||
import pyarrow.compute as pc
|
||||
|
||||
return ~(
|
||||
(pc.field((bbox_column_name, "xmin")) > bbox[2])
|
||||
| (pc.field((bbox_column_name, "ymin")) > bbox[3])
|
||||
| (pc.field((bbox_column_name, "xmax")) < bbox[0])
|
||||
| (pc.field((bbox_column_name, "ymax")) < bbox[1])
|
||||
)
|
||||
|
||||
|
||||
def _check_if_covering_in_geo_metadata(geo_metadata):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
return "covering" in geo_metadata["columns"][primary_column].keys()
|
||||
|
||||
|
||||
def _get_bbox_encoding_column_name(geo_metadata):
|
||||
primary_column = geo_metadata["primary_column"]
|
||||
return geo_metadata["columns"][primary_column]["covering"]["bbox"]["xmin"][0]
|
||||
|
||||
|
||||
def _get_non_bbox_columns(schema, geo_metadata):
|
||||
|
||||
bbox_column_name = _get_bbox_encoding_column_name(geo_metadata)
|
||||
columns = schema.names
|
||||
if bbox_column_name in columns:
|
||||
columns.remove(bbox_column_name)
|
||||
return columns
|
||||
|
||||
|
||||
def _splice_bbox_and_filters(kwarg_filters, bbox_filter):
|
||||
parquet = import_optional_dependency(
|
||||
"pyarrow.parquet", extra="pyarrow is required for Parquet support."
|
||||
)
|
||||
if bbox_filter is None:
|
||||
return kwarg_filters
|
||||
|
||||
filters_expression = parquet.filters_to_expression(kwarg_filters)
|
||||
return bbox_filter & filters_expression
|
||||
|
||||
@@ -1,30 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import urllib.request
|
||||
import warnings
|
||||
from io import IOBase
|
||||
from packaging.version import Version
|
||||
from pathlib import Path
|
||||
import warnings
|
||||
|
||||
# Adapted from pandas.io.common
|
||||
from urllib.parse import urlparse as parse_url
|
||||
from urllib.parse import uses_netloc, uses_params, uses_relative
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas.api.types import is_integer_dtype
|
||||
|
||||
import pyproj
|
||||
import shapely
|
||||
from shapely.geometry import mapping
|
||||
from shapely.geometry.base import BaseGeometry
|
||||
|
||||
from geopandas import GeoDataFrame, GeoSeries
|
||||
|
||||
# Adapted from pandas.io.common
|
||||
from urllib.parse import urlparse as parse_url
|
||||
from urllib.parse import uses_netloc, uses_params, uses_relative
|
||||
import urllib.request
|
||||
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20
|
||||
from geopandas.io.util import vsi_path
|
||||
|
||||
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
|
||||
_VALID_URLS.discard("")
|
||||
# file:// URIs are supported by fiona/pyogrio -> don't already open + read the file here
|
||||
_VALID_URLS.discard("file")
|
||||
|
||||
|
||||
fiona = None
|
||||
fiona_env = None
|
||||
fiona_import_error = None
|
||||
@@ -55,6 +58,7 @@ def _import_fiona():
|
||||
FIONA_GE_19 = Version(Version(fiona.__version__).base_version) >= Version(
|
||||
"1.9.0"
|
||||
)
|
||||
|
||||
except ImportError as err:
|
||||
fiona = False
|
||||
fiona_import_error = str(err)
|
||||
@@ -71,13 +75,14 @@ def _import_pyogrio():
|
||||
if pyogrio is None:
|
||||
try:
|
||||
import pyogrio
|
||||
|
||||
except ImportError as err:
|
||||
pyogrio = False
|
||||
pyogrio_import_error = str(err)
|
||||
|
||||
|
||||
def _check_fiona(func):
|
||||
if fiona is None:
|
||||
if not fiona:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'fiona' package, but it is not installed or does "
|
||||
f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}"
|
||||
@@ -85,7 +90,7 @@ def _check_fiona(func):
|
||||
|
||||
|
||||
def _check_pyogrio(func):
|
||||
if pyogrio is None:
|
||||
if not pyogrio:
|
||||
raise ImportError(
|
||||
f"the {func} requires the 'pyogrio' package, but it is not installed "
|
||||
"or does not import correctly."
|
||||
@@ -93,35 +98,49 @@ def _check_pyogrio(func):
|
||||
)
|
||||
|
||||
|
||||
def _check_metadata_supported(metadata: str | None, engine: str, driver: str) -> None:
|
||||
if metadata is None:
|
||||
return
|
||||
if driver != "GPKG":
|
||||
raise NotImplementedError(
|
||||
"The 'metadata' keyword is only supported for the GPKG driver."
|
||||
)
|
||||
|
||||
if engine == "fiona" and not FIONA_GE_19:
|
||||
raise NotImplementedError(
|
||||
"The 'metadata' keyword is only supported for Fiona >= 1.9."
|
||||
)
|
||||
|
||||
|
||||
def _check_engine(engine, func):
|
||||
# if not specified through keyword or option, then default to "fiona" if
|
||||
# installed, otherwise try pyogrio
|
||||
# if not specified through keyword or option, then default to "pyogrio" if
|
||||
# installed, otherwise try fiona
|
||||
if engine is None:
|
||||
import geopandas
|
||||
|
||||
engine = geopandas.options.io_engine
|
||||
|
||||
if engine is None:
|
||||
_import_fiona()
|
||||
if fiona:
|
||||
engine = "fiona"
|
||||
_import_pyogrio()
|
||||
if pyogrio:
|
||||
engine = "pyogrio"
|
||||
else:
|
||||
_import_pyogrio()
|
||||
if pyogrio:
|
||||
engine = "pyogrio"
|
||||
_import_fiona()
|
||||
if fiona:
|
||||
engine = "fiona"
|
||||
|
||||
if engine == "fiona":
|
||||
_import_fiona()
|
||||
_check_fiona(func)
|
||||
elif engine == "pyogrio":
|
||||
if engine == "pyogrio":
|
||||
_import_pyogrio()
|
||||
_check_pyogrio(func)
|
||||
elif engine == "fiona":
|
||||
_import_fiona()
|
||||
_check_fiona(func)
|
||||
elif engine is None:
|
||||
raise ImportError(
|
||||
f"The {func} requires the 'pyogrio' or 'fiona' package, "
|
||||
"but neither is installed or imports correctly."
|
||||
f"\nImporting fiona resulted in: {fiona_import_error}"
|
||||
f"\nImporting pyogrio resulted in: {pyogrio_import_error}"
|
||||
f"\nImporting fiona resulted in: {fiona_import_error}"
|
||||
)
|
||||
|
||||
return engine
|
||||
@@ -168,31 +187,12 @@ def _is_url(url):
|
||||
return False
|
||||
|
||||
|
||||
def _is_zip(path):
|
||||
"""Check if a given path is a zipfile"""
|
||||
parsed = fiona.path.ParsedPath.from_uri(path)
|
||||
return (
|
||||
parsed.archive.endswith(".zip")
|
||||
if parsed.archive
|
||||
else parsed.path.endswith(".zip")
|
||||
)
|
||||
|
||||
|
||||
def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs):
|
||||
def _read_file(
|
||||
filename, bbox=None, mask=None, columns=None, rows=None, engine=None, **kwargs
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame from a file or URL.
|
||||
|
||||
.. note::
|
||||
|
||||
GeoPandas currently defaults to use Fiona as the engine in ``read_file``.
|
||||
However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
|
||||
pyogrio can provide a significant speedup compared to Fiona. We recommend to
|
||||
already install pyogrio and specify the engine by using the ``engine`` keyword
|
||||
(``geopandas.read_file(..., engine="pyogrio")``), or by setting the default for
|
||||
the ``engine`` keyword globally with::
|
||||
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
@@ -209,21 +209,28 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
|
||||
Filter for features that intersect with the given dict-like geojson
|
||||
geometry, GeoSeries, GeoDataFrame or shapely geometry.
|
||||
CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame.
|
||||
Cannot be used with bbox.
|
||||
Cannot be used with bbox. If multiple geometries are passed, this will
|
||||
first union all geometries, which may be computationally expensive.
|
||||
columns : list, optional
|
||||
List of column names to import from the data source. Column names
|
||||
must exactly match the names in the data source. To avoid reading
|
||||
any columns (besides the geometry column), pass an empty list-like.
|
||||
By default reads all columns.
|
||||
rows : int or slice, default None
|
||||
Load in specific rows by passing an integer (first `n` rows) or a
|
||||
slice() object.
|
||||
engine : str, "fiona" or "pyogrio"
|
||||
engine : str, "pyogrio" or "fiona"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
|
||||
installed, otherwise tries "pyogrio".
|
||||
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
|
||||
installed, otherwise tries "fiona". Engine can also be set globally
|
||||
with the ``geopandas.options.io_engine`` option.
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine. In case of the "fiona" engine,
|
||||
the keyword arguments are passed to :func:`fiona.open` or
|
||||
:class:`fiona.collection.BytesCollection` when opening the file.
|
||||
For more information on possible keywords, type:
|
||||
``import fiona; help(fiona.open)``. In case of the "pyogrio" engine,
|
||||
the keyword arguments are passed to :func:`pyogrio.read_dataframe`.
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
In case of the "pyogrio" engine, the keyword arguments are passed to
|
||||
`pyogrio.write_dataframe`. In case of the "fiona" engine, the keyword
|
||||
arguments are passed to fiona.open`. For more information on possible
|
||||
keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
|
||||
|
||||
|
||||
Examples
|
||||
@@ -284,7 +291,9 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
|
||||
from_bytes = True
|
||||
|
||||
if engine == "pyogrio":
|
||||
return _read_file_pyogrio(filename, bbox=bbox, mask=mask, rows=rows, **kwargs)
|
||||
return _read_file_pyogrio(
|
||||
filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs
|
||||
)
|
||||
|
||||
elif engine == "fiona":
|
||||
if pd.api.types.is_file_like(filename):
|
||||
@@ -295,7 +304,13 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
|
||||
path_or_bytes = filename
|
||||
|
||||
return _read_file_fiona(
|
||||
path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs
|
||||
path_or_bytes,
|
||||
from_bytes,
|
||||
bbox=bbox,
|
||||
mask=mask,
|
||||
columns=columns,
|
||||
rows=rows,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
else:
|
||||
@@ -303,31 +318,36 @@ def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs)
|
||||
|
||||
|
||||
def _read_file_fiona(
|
||||
path_or_bytes, from_bytes, bbox=None, mask=None, rows=None, where=None, **kwargs
|
||||
path_or_bytes,
|
||||
from_bytes,
|
||||
bbox=None,
|
||||
mask=None,
|
||||
columns=None,
|
||||
rows=None,
|
||||
where=None,
|
||||
**kwargs,
|
||||
):
|
||||
if where is not None and not FIONA_GE_19:
|
||||
raise NotImplementedError("where requires fiona 1.9+")
|
||||
|
||||
if columns is not None:
|
||||
if "include_fields" in kwargs:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'include_fields' and 'columns' keywords"
|
||||
)
|
||||
if not FIONA_GE_19:
|
||||
raise NotImplementedError("'columns' keyword requires fiona 1.9+")
|
||||
kwargs["include_fields"] = columns
|
||||
elif "include_fields" in kwargs:
|
||||
# alias to columns, as this variable is used below to specify column order
|
||||
# in the dataframe creation
|
||||
columns = kwargs["include_fields"]
|
||||
|
||||
if not from_bytes:
|
||||
# Opening a file via URL or file-like-object above automatically detects a
|
||||
# zipped file. In order to match that behavior, attempt to add a zip scheme
|
||||
# if missing.
|
||||
if _is_zip(str(path_or_bytes)):
|
||||
parsed = fiona.parse_path(str(path_or_bytes))
|
||||
if isinstance(parsed, fiona.path.ParsedPath):
|
||||
# If fiona is able to parse the path, we can safely look at the scheme
|
||||
# and update it to have a zip scheme if necessary.
|
||||
schemes = (parsed.scheme or "").split("+")
|
||||
if "zip" not in schemes:
|
||||
parsed.scheme = "+".join(["zip"] + schemes)
|
||||
path_or_bytes = parsed.name
|
||||
elif isinstance(parsed, fiona.path.UnparsedPath) and not str(
|
||||
path_or_bytes
|
||||
).startswith("/vsi"):
|
||||
# If fiona is unable to parse the path, it might have a Windows drive
|
||||
# scheme. Try adding zip:// to the front. If the path starts with "/vsi"
|
||||
# it is a legacy GDAL path type, so let it pass unmodified.
|
||||
path_or_bytes = "zip://" + parsed.name
|
||||
path_or_bytes = vsi_path(str(path_or_bytes))
|
||||
|
||||
if from_bytes:
|
||||
reader = fiona.BytesCollection
|
||||
@@ -359,7 +379,7 @@ def _read_file_fiona(
|
||||
assert len(bbox) == 4
|
||||
# handle loading the mask
|
||||
elif isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
mask = mapping(mask.to_crs(crs).unary_union)
|
||||
mask = mapping(mask.to_crs(crs).union_all())
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = mapping(mask)
|
||||
|
||||
@@ -383,11 +403,14 @@ def _read_file_fiona(
|
||||
else:
|
||||
f_filt = features
|
||||
# get list of columns
|
||||
columns = list(features.schema["properties"])
|
||||
columns = columns or list(features.schema["properties"])
|
||||
datetime_fields = [
|
||||
k for (k, v) in features.schema["properties"].items() if v == "datetime"
|
||||
]
|
||||
if kwargs.get("ignore_geometry", False):
|
||||
if (
|
||||
kwargs.get("ignore_geometry", False)
|
||||
or features.schema["geometry"] == "None"
|
||||
):
|
||||
df = pd.DataFrame(
|
||||
[record["properties"] for record in f_filt], columns=columns
|
||||
)
|
||||
@@ -396,16 +419,39 @@ def _read_file_fiona(
|
||||
f_filt, crs=crs, columns=columns + ["geometry"]
|
||||
)
|
||||
for k in datetime_fields:
|
||||
as_dt = pd.to_datetime(df[k], errors="ignore")
|
||||
# if to_datetime failed, try again for mixed timezone offsets
|
||||
if as_dt.dtype == "object":
|
||||
as_dt = None
|
||||
# plain try catch for when pandas will raise in the future
|
||||
# TODO we can tighten the exception type in future when it does
|
||||
try:
|
||||
with warnings.catch_warnings():
|
||||
# pandas 2.x does not yet enforce this behaviour but raises a
|
||||
# warning -> we want to to suppress this warning for our users,
|
||||
# and do this by turning it into an error so we take the
|
||||
# `except` code path to try again with utc=True
|
||||
warnings.filterwarnings(
|
||||
"error",
|
||||
"In a future version of pandas, parsing datetimes with "
|
||||
"mixed time zones will raise an error",
|
||||
FutureWarning,
|
||||
)
|
||||
as_dt = pd.to_datetime(df[k])
|
||||
except Exception:
|
||||
pass
|
||||
if as_dt is None or as_dt.dtype == "object":
|
||||
# if to_datetime failed, try again for mixed timezone offsets
|
||||
# This can still fail if there are invalid datetimes
|
||||
as_dt = pd.to_datetime(df[k], errors="ignore", utc=True)
|
||||
try:
|
||||
as_dt = pd.to_datetime(df[k], utc=True)
|
||||
except Exception:
|
||||
pass
|
||||
# if to_datetime succeeded, round datetimes as
|
||||
# fiona only supports up to ms precision (any microseconds are
|
||||
# floating point rounding error)
|
||||
if not (as_dt.dtype == "object"):
|
||||
df[k] = as_dt.dt.round(freq="ms")
|
||||
if as_dt is not None and not (as_dt.dtype == "object"):
|
||||
if PANDAS_GE_20:
|
||||
df[k] = as_dt.dt.as_unit("ms")
|
||||
else:
|
||||
df[k] = as_dt.dt.round(freq="ms")
|
||||
return df
|
||||
|
||||
|
||||
@@ -428,48 +474,79 @@ def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs)
|
||||
raise ValueError("slice with step is not supported")
|
||||
else:
|
||||
raise TypeError("'rows' must be an integer or a slice.")
|
||||
|
||||
if bbox is not None and mask is not None:
|
||||
# match error message from Fiona
|
||||
raise ValueError("mask and bbox can not be set together")
|
||||
|
||||
if bbox is not None:
|
||||
if isinstance(bbox, (GeoDataFrame, GeoSeries)):
|
||||
bbox = tuple(bbox.total_bounds)
|
||||
crs = pyogrio.read_info(path_or_bytes).get("crs")
|
||||
if isinstance(path_or_bytes, IOBase):
|
||||
path_or_bytes.seek(0)
|
||||
|
||||
bbox = tuple(bbox.to_crs(crs).total_bounds)
|
||||
elif isinstance(bbox, BaseGeometry):
|
||||
bbox = bbox.bounds
|
||||
if len(bbox) != 4:
|
||||
raise ValueError("'bbox' should be a length-4 tuple.")
|
||||
|
||||
if mask is not None:
|
||||
raise ValueError(
|
||||
"The 'mask' keyword is not supported with the 'pyogrio' engine. "
|
||||
"You can use 'bbox' instead."
|
||||
)
|
||||
# NOTE: mask cannot be used at same time as bbox keyword
|
||||
if isinstance(mask, (GeoDataFrame, GeoSeries)):
|
||||
crs = pyogrio.read_info(path_or_bytes).get("crs")
|
||||
if isinstance(path_or_bytes, IOBase):
|
||||
path_or_bytes.seek(0)
|
||||
|
||||
mask = shapely.unary_union(mask.to_crs(crs).geometry.values)
|
||||
elif isinstance(mask, BaseGeometry):
|
||||
mask = shapely.unary_union(mask)
|
||||
elif isinstance(mask, dict) or hasattr(mask, "__geo_interface__"):
|
||||
# convert GeoJSON to shapely geometry
|
||||
mask = shapely.geometry.shape(mask)
|
||||
|
||||
kwargs["mask"] = mask
|
||||
|
||||
if kwargs.pop("ignore_geometry", False):
|
||||
kwargs["read_geometry"] = False
|
||||
|
||||
# TODO: if bbox is not None, check its CRS vs the CRS of the file
|
||||
# translate `ignore_fields`/`include_fields` keyword for back compat with fiona
|
||||
if "ignore_fields" in kwargs and "include_fields" in kwargs:
|
||||
raise ValueError("Cannot specify both 'ignore_fields' and 'include_fields'")
|
||||
elif "ignore_fields" in kwargs:
|
||||
if kwargs.get("columns", None) is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'columns' and 'ignore_fields' keywords"
|
||||
)
|
||||
warnings.warn(
|
||||
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
|
||||
"will be removed in a future release. You can use the 'columns' keyword "
|
||||
"instead to select which columns to read.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
ignore_fields = kwargs.pop("ignore_fields")
|
||||
fields = pyogrio.read_info(path_or_bytes)["fields"]
|
||||
include_fields = [col for col in fields if col not in ignore_fields]
|
||||
kwargs["columns"] = include_fields
|
||||
elif "include_fields" in kwargs:
|
||||
# translate `include_fields` keyword for back compat with fiona engine
|
||||
if kwargs.get("columns", None) is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both 'columns' and 'include_fields' keywords"
|
||||
)
|
||||
warnings.warn(
|
||||
"The 'include_fields' and 'ignore_fields' keywords are deprecated, and "
|
||||
"will be removed in a future release. You can use the 'columns' keyword "
|
||||
"instead to select which columns to read.",
|
||||
DeprecationWarning,
|
||||
stacklevel=3,
|
||||
)
|
||||
kwargs["columns"] = kwargs.pop("include_fields")
|
||||
|
||||
return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)
|
||||
|
||||
|
||||
def read_file(*args, **kwargs):
|
||||
warnings.warn(
|
||||
"geopandas.io.file.read_file() is intended for internal "
|
||||
"use only, and will be deprecated. Use geopandas.read_file() instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _read_file(*args, **kwargs)
|
||||
|
||||
|
||||
def to_file(*args, **kwargs):
|
||||
warnings.warn(
|
||||
"geopandas.io.file.to_file() is intended for internal "
|
||||
"use only, and will be deprecated. Use GeoDataFrame.to_file() "
|
||||
"or GeoSeries.to_file() instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _to_file(*args, **kwargs)
|
||||
|
||||
|
||||
def _detect_driver(path):
|
||||
"""
|
||||
Attempt to auto-detect driver based on the extension
|
||||
@@ -497,25 +574,16 @@ def _to_file(
|
||||
mode="w",
|
||||
crs=None,
|
||||
engine=None,
|
||||
metadata=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Write this GeoDataFrame to an OGR data source
|
||||
|
||||
A dictionary of supported OGR providers is available via:
|
||||
>>> import fiona
|
||||
>>> fiona.supported_drivers # doctest: +SKIP
|
||||
|
||||
.. note::
|
||||
|
||||
GeoPandas currently defaults to use Fiona as the engine in ``to_file``.
|
||||
However, GeoPandas 1.0 will switch to use pyogrio as the default engine, since
|
||||
pyogrio can provide a significant speedup compared to Fiona. We recommend to
|
||||
already install pyogrio and specify the engine by using the ``engine`` keyword
|
||||
(``df.to_file(..., engine="pyogrio")``), or by setting the default for
|
||||
the ``engine`` keyword globally with::
|
||||
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
>>> import pyogrio
|
||||
>>> pyogrio.list_drivers() # doctest: +SKIP
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -557,10 +625,15 @@ def _to_file(
|
||||
The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
engine : str, "fiona" or "pyogrio"
|
||||
The underlying library that is used to write the file. Currently, the
|
||||
supported options are "fiona" and "pyogrio". Defaults to "fiona" if
|
||||
installed, otherwise tries "pyogrio".
|
||||
engine : str, "pyogrio" or "fiona"
|
||||
The underlying library that is used to read the file. Currently, the
|
||||
supported options are "pyogrio" and "fiona". Defaults to "pyogrio" if
|
||||
installed, otherwise tries "fiona". Engine can also be set globally
|
||||
with the ``geopandas.options.io_engine`` option.
|
||||
metadata : dict[str, str], default None
|
||||
Optional metadata to be stored in the file. Keys and values must be
|
||||
strings. Only supported for the "GPKG" driver
|
||||
(requires Fiona >= 1.9 or pyogrio >= 0.6).
|
||||
**kwargs :
|
||||
Keyword args to be passed to the engine, and can be used to write
|
||||
to multi-layer data, store data within archives (zip files), etc.
|
||||
@@ -604,44 +677,57 @@ def _to_file(
|
||||
"to a supported format like a well-known text (WKT) using "
|
||||
"`GeoSeries.to_wkt()`.",
|
||||
)
|
||||
_check_metadata_supported(metadata, engine, driver)
|
||||
|
||||
if mode not in ("w", "a"):
|
||||
raise ValueError(f"'mode' should be one of 'w' or 'a', got '{mode}' instead")
|
||||
|
||||
if engine == "fiona":
|
||||
_to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs)
|
||||
elif engine == "pyogrio":
|
||||
_to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs)
|
||||
if engine == "pyogrio":
|
||||
_to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs)
|
||||
elif engine == "fiona":
|
||||
_to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs)
|
||||
else:
|
||||
raise ValueError(f"unknown engine '{engine}'")
|
||||
|
||||
|
||||
def _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs):
|
||||
def _to_file_fiona(df, filename, driver, schema, crs, mode, metadata, **kwargs):
|
||||
if not HAS_PYPROJ and crs:
|
||||
raise ImportError(
|
||||
"The 'pyproj' package is required to write a file with a CRS, but it is not"
|
||||
" installed or does not import correctly."
|
||||
)
|
||||
|
||||
if schema is None:
|
||||
schema = infer_schema(df)
|
||||
|
||||
if crs:
|
||||
crs = pyproj.CRS.from_user_input(crs)
|
||||
from pyproj import CRS
|
||||
|
||||
crs = CRS.from_user_input(crs)
|
||||
else:
|
||||
crs = df.crs
|
||||
|
||||
with fiona_env():
|
||||
crs_wkt = None
|
||||
try:
|
||||
gdal_version = fiona.env.get_gdal_release_name()
|
||||
except AttributeError:
|
||||
gdal_version = "2.0.0" # just assume it is not the latest
|
||||
if Version(gdal_version) >= Version("3.0.0") and crs:
|
||||
gdal_version = Version(
|
||||
fiona.env.get_gdal_release_name().strip("e")
|
||||
) # GH3147
|
||||
except (AttributeError, ValueError):
|
||||
gdal_version = Version("2.0.0") # just assume it is not the latest
|
||||
if gdal_version >= Version("3.0.0") and crs:
|
||||
crs_wkt = crs.to_wkt()
|
||||
elif crs:
|
||||
crs_wkt = crs.to_wkt("WKT1_GDAL")
|
||||
with fiona.open(
|
||||
filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs
|
||||
) as colxn:
|
||||
if metadata is not None:
|
||||
colxn.update_tags(metadata)
|
||||
colxn.writerecords(df.iterfeatures())
|
||||
|
||||
|
||||
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
|
||||
def _to_file_pyogrio(df, filename, driver, schema, crs, mode, metadata, **kwargs):
|
||||
import pyogrio
|
||||
|
||||
if schema is not None:
|
||||
@@ -653,13 +739,13 @@ def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs):
|
||||
kwargs["append"] = True
|
||||
|
||||
if crs is not None:
|
||||
raise ValueError("Passing 'crs' it not supported with the 'pyogrio' engine.")
|
||||
raise ValueError("Passing 'crs' is not supported with the 'pyogrio' engine.")
|
||||
|
||||
# for the fiona engine, this check is done in gdf.iterfeatures()
|
||||
if not df.columns.is_unique:
|
||||
raise ValueError("GeoDataFrame cannot contain duplicated column names.")
|
||||
|
||||
pyogrio.write_dataframe(df, filename, driver=driver, **kwargs)
|
||||
pyogrio.write_dataframe(df, filename, driver=driver, metadata=metadata, **kwargs)
|
||||
|
||||
|
||||
def infer_schema(df):
|
||||
@@ -732,3 +818,34 @@ def _geometry_types(df):
|
||||
geom_types = geom_types[0]
|
||||
|
||||
return geom_types
|
||||
|
||||
|
||||
def _list_layers(filename) -> pd.DataFrame:
|
||||
"""List layers available in a file.
|
||||
|
||||
Provides an overview of layers available in a file or URL together with their
|
||||
geometry types. When supported by the data source, this includes both spatial and
|
||||
non-spatial layers. Non-spatial layers are indicated by the ``"geometry_type"``
|
||||
column being ``None``. GeoPandas will not read such layers but they can be read into
|
||||
a pd.DataFrame using :func:`pyogrio.read_dataframe`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filename : str, path object or file-like object
|
||||
Either the absolute or relative path to the file or URL to
|
||||
be opened, or any object with a read() method (such as an open file
|
||||
or StringIO)
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.DataFrame
|
||||
A DataFrame with columns "name" and "geometry_type" and one row per layer.
|
||||
"""
|
||||
_import_pyogrio()
|
||||
_check_pyogrio("list_layers")
|
||||
|
||||
import pyogrio
|
||||
|
||||
return pd.DataFrame(
|
||||
pyogrio.list_layers(filename), columns=["name", "geometry_type"]
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from functools import lru_cache
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@@ -8,8 +9,6 @@ import shapely.wkb
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from geopandas import _compat as compat
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _get_conn(conn_or_engine):
|
||||
@@ -28,7 +27,7 @@ def _get_conn(conn_or_engine):
|
||||
-------
|
||||
Connection
|
||||
"""
|
||||
from sqlalchemy.engine.base import Engine, Connection
|
||||
from sqlalchemy.engine.base import Connection, Engine
|
||||
|
||||
if isinstance(conn_or_engine, Connection):
|
||||
if not conn_or_engine.in_transaction():
|
||||
@@ -43,7 +42,7 @@ def _get_conn(conn_or_engine):
|
||||
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
|
||||
|
||||
|
||||
def _df_to_geodf(df, geom_col="geom", crs=None):
|
||||
def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
|
||||
"""
|
||||
Transforms a pandas DataFrame into a GeoDataFrame.
|
||||
The column 'geom_col' must be a geometry column in WKB representation.
|
||||
@@ -60,6 +59,8 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
If not set, tries to determine CRS from the SRID associated with the
|
||||
first geometry in the database, and assigns that to all geometries.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
@@ -80,10 +81,6 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
|
||||
load_geom_bytes = shapely.wkb.loads
|
||||
"""Load from Python 3 binary."""
|
||||
|
||||
def load_geom_buffer(x):
|
||||
"""Load from Python 2 binary."""
|
||||
return shapely.wkb.loads(str(x))
|
||||
|
||||
def load_geom_text(x):
|
||||
"""Load from binary encoded as text."""
|
||||
return shapely.wkb.loads(str(x), hex=True)
|
||||
@@ -95,13 +92,31 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
|
||||
|
||||
df[geom_col] = geoms = geoms.apply(load_geom)
|
||||
if crs is None:
|
||||
if compat.SHAPELY_GE_20:
|
||||
srid = shapely.get_srid(geoms.iat[0])
|
||||
else:
|
||||
srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom)
|
||||
srid = shapely.get_srid(geoms.iat[0])
|
||||
# if no defined SRID in geodatabase, returns SRID of 0
|
||||
if srid != 0:
|
||||
crs = "epsg:{}".format(srid)
|
||||
try:
|
||||
spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
|
||||
except pd.errors.DatabaseError:
|
||||
warning_msg = (
|
||||
f"Could not find the spatial reference system table "
|
||||
f"(spatial_ref_sys) in PostGIS."
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
else:
|
||||
if not spatial_ref_sys_df.empty:
|
||||
auth_name = spatial_ref_sys_df["auth_name"].item()
|
||||
crs = f"{auth_name}:{srid}"
|
||||
else:
|
||||
warning_msg = (
|
||||
f"Could not find srid {srid} in the "
|
||||
f"spatial_ref_sys table. "
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
|
||||
return GeoDataFrame(df, crs=crs, geometry=geom_col)
|
||||
|
||||
@@ -176,7 +191,7 @@ def _read_postgis(
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return _df_to_geodf(df, geom_col=geom_col, crs=crs)
|
||||
return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
|
||||
|
||||
else:
|
||||
# read data in chunks and return a generator
|
||||
@@ -189,20 +204,9 @@ def _read_postgis(
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return (_df_to_geodf(df, geom_col=geom_col, crs=crs) for df in df_generator)
|
||||
|
||||
|
||||
def read_postgis(*args, **kwargs):
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"geopandas.io.sql.read_postgis() is intended for internal "
|
||||
"use only, and will be deprecated. Use geopandas.read_postgis() instead.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
return _read_postgis(*args, **kwargs)
|
||||
return (
|
||||
_df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
|
||||
)
|
||||
|
||||
|
||||
def _get_geometry_type(gdf):
|
||||
@@ -253,7 +257,7 @@ def _get_geometry_type(gdf):
|
||||
|
||||
def _get_srid_from_crs(gdf):
|
||||
"""
|
||||
Get EPSG code from CRS if available. If not, return -1.
|
||||
Get EPSG code from CRS if available. If not, return 0.
|
||||
"""
|
||||
|
||||
# Use geoalchemy2 default for srid
|
||||
@@ -279,7 +283,7 @@ def _get_srid_from_crs(gdf):
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
if srid is None:
|
||||
srid = -1
|
||||
srid = 0
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
return srid
|
||||
@@ -288,8 +292,8 @@ def _get_srid_from_crs(gdf):
|
||||
def _convert_linearring_to_linestring(gdf, geom_name):
|
||||
from shapely.geometry import LineString
|
||||
|
||||
# Todo: Use Pygeos function once it's implemented:
|
||||
# https://github.com/pygeos/pygeos/issues/76
|
||||
# Todo: Use shapely function once it's implemented:
|
||||
# https://github.com/shapely/shapely/issues/1617
|
||||
|
||||
mask = gdf.geom_type == "LinearRing"
|
||||
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
|
||||
@@ -300,26 +304,11 @@ def _convert_linearring_to_linestring(gdf, geom_name):
|
||||
|
||||
def _convert_to_ewkb(gdf, geom_name, srid):
|
||||
"""Convert geometries to ewkb."""
|
||||
if compat.USE_SHAPELY_20:
|
||||
geoms = shapely.to_wkb(
|
||||
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
elif compat.USE_PYGEOS:
|
||||
from pygeos import set_srid, to_wkb
|
||||
|
||||
geoms = to_wkb(
|
||||
set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
else:
|
||||
from shapely.wkb import dumps
|
||||
|
||||
geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]]
|
||||
geoms = shapely.to_wkb(
|
||||
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
# The gdf will warn that the geometry column doesn't hold in-memory geometries
|
||||
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
|
||||
@@ -330,8 +319,8 @@ def _convert_to_ewkb(gdf, geom_name, srid):
|
||||
|
||||
|
||||
def _psql_insert_copy(tbl, conn, keys, data_iter):
|
||||
import io
|
||||
import csv
|
||||
import io
|
||||
|
||||
s_buf = io.StringIO()
|
||||
writer = csv.writer(s_buf)
|
||||
@@ -341,11 +330,16 @@ def _psql_insert_copy(tbl, conn, keys, data_iter):
|
||||
columns = ", ".join('"{}"'.format(k) for k in keys)
|
||||
|
||||
dbapi_conn = conn.connection
|
||||
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
|
||||
tbl.table.schema, tbl.table.name, columns
|
||||
)
|
||||
with dbapi_conn.cursor() as cur:
|
||||
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
|
||||
tbl.table.schema, tbl.table.name, columns
|
||||
)
|
||||
cur.copy_expert(sql=sql, file=s_buf)
|
||||
# Use psycopg method if it's available
|
||||
if hasattr(cur, "copy") and callable(cur.copy):
|
||||
with cur.copy(sql) as copy:
|
||||
copy.write(s_buf.read())
|
||||
else: # otherwise use psycopg2 method
|
||||
cur.copy_expert(sql, s_buf)
|
||||
|
||||
|
||||
def _write_postgis(
|
||||
@@ -469,3 +463,11 @@ def _write_postgis(
|
||||
dtype=dtype,
|
||||
method=_psql_insert_copy,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _get_spatial_ref_sys_df(con, srid):
|
||||
spatial_ref_sys_sql = (
|
||||
f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
|
||||
)
|
||||
return pd.read_sql(spatial_ref_sys_sql, con)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -19,6 +19,7 @@ pickles and test versus the current data that is generated
|
||||
(with master). These are then compared.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import platform
|
||||
@@ -26,9 +27,10 @@ import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from shapely.geometry import Point
|
||||
|
||||
import geopandas
|
||||
|
||||
|
||||
def create_pickle_data():
|
||||
"""create the pickle data"""
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,33 +1,41 @@
|
||||
import datetime
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import tempfile
|
||||
from collections import OrderedDict
|
||||
from packaging.version import Version
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import pytz
|
||||
from packaging.version import Version
|
||||
from pandas.api.types import is_datetime64_any_dtype
|
||||
from pandas.testing import assert_series_equal
|
||||
from shapely.geometry import Point, Polygon, box
|
||||
|
||||
from shapely.geometry import Point, Polygon, box, mapping
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, read_file
|
||||
from geopandas._compat import PANDAS_GE_20
|
||||
from geopandas.io.file import _detect_driver, _EXTENSION_TO_DRIVER
|
||||
from geopandas._compat import HAS_PYPROJ, PANDAS_GE_20, PANDAS_GE_30
|
||||
from geopandas.io.file import _EXTENSION_TO_DRIVER, _detect_driver
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
|
||||
from geopandas.tests.util import PACKAGE_DIR, validate_boro_df
|
||||
from pandas.testing import assert_frame_equal, assert_series_equal
|
||||
|
||||
try:
|
||||
import pyogrio
|
||||
|
||||
PYOGRIO_GE_07 = Version(pyogrio.__version__) > Version("0.6.0")
|
||||
# those version checks have to be defined here instead of imported from
|
||||
# geopandas.io.file (those are only initialized lazily on first usage)
|
||||
PYOGRIO_GE_090 = Version(Version(pyogrio.__version__).base_version) >= Version(
|
||||
"0.9.0"
|
||||
)
|
||||
except ImportError:
|
||||
pyogrio = False
|
||||
PYOGRIO_GE_07 = False
|
||||
PYOGRIO_GE_090 = False
|
||||
|
||||
|
||||
try:
|
||||
@@ -46,6 +54,9 @@ FIONA_MARK = pytest.mark.skipif(not fiona, reason="fiona not installed")
|
||||
_CRS = "epsg:4326"
|
||||
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings("ignore:Value:RuntimeWarning:pyogrio")
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("fiona", marks=FIONA_MARK),
|
||||
@@ -62,9 +73,8 @@ def skip_pyogrio_not_supported(engine):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_nybb(engine):
|
||||
nybb_path = geopandas.datasets.get_path("nybb")
|
||||
df = read_file(nybb_path, engine=engine)
|
||||
def df_nybb(engine, nybb_filename):
|
||||
df = read_file(nybb_filename, engine=engine)
|
||||
return df
|
||||
|
||||
|
||||
@@ -130,7 +140,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
|
||||
df = GeoDataFrame.from_file(tempfilename, engine=engine)
|
||||
assert "geometry" in df
|
||||
assert len(df) == 5
|
||||
assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
|
||||
assert np.all(df["BoroName"].values == df_nybb["BoroName"])
|
||||
|
||||
# Write layer with null geometry out to file
|
||||
tempfilename = os.path.join(str(tmpdir), "null_geom" + ext)
|
||||
@@ -139,7 +149,7 @@ def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine):
|
||||
df = GeoDataFrame.from_file(tempfilename, engine=engine)
|
||||
assert "geometry" in df
|
||||
assert len(df) == 2
|
||||
assert np.alltrue(df["Name"].values == df_null["Name"])
|
||||
assert np.all(df["Name"].values == df_null["Name"])
|
||||
# check the expected driver
|
||||
assert_correct_driver(tempfilename, ext, engine)
|
||||
|
||||
@@ -153,7 +163,7 @@ def test_to_file_pathlib(tmpdir, df_nybb, driver, ext, engine):
|
||||
df = GeoDataFrame.from_file(temppath, engine=engine)
|
||||
assert "geometry" in df
|
||||
assert len(df) == 5
|
||||
assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"])
|
||||
assert np.all(df["BoroName"].values == df_nybb["BoroName"])
|
||||
# check the expected driver
|
||||
assert_correct_driver(temppath, ext, engine)
|
||||
|
||||
@@ -174,9 +184,10 @@ def test_to_file_bool(tmpdir, driver, ext, engine):
|
||||
result = read_file(tempfilename, engine=engine)
|
||||
if ext in (".shp", ""):
|
||||
# Shapefile does not support boolean, so is read back as int
|
||||
if engine == "fiona":
|
||||
# but since GDAL 3.9 supports boolean fields in SHP
|
||||
if engine == "fiona" and fiona.gdal_version.minor < 9:
|
||||
df["col"] = df["col"].astype("int64")
|
||||
else:
|
||||
elif engine == "pyogrio" and pyogrio.__gdal_version__ < (3, 9):
|
||||
df["col"] = df["col"].astype("int32")
|
||||
assert_geodataframe_equal(result, df)
|
||||
# check the expected driver
|
||||
@@ -189,15 +200,15 @@ eastern = pytz.timezone("America/New_York")
|
||||
datetime_type_tests = (TEST_DATE, eastern.localize(TEST_DATE))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Non-conformant content for record 1 in column b:RuntimeWarning"
|
||||
) # for GPKG, GDAL writes the tz data but warns on reading (see DATETIME_FORMAT option)
|
||||
@pytest.mark.parametrize(
|
||||
"time", datetime_type_tests, ids=("naive_datetime", "datetime_with_timezone")
|
||||
)
|
||||
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
|
||||
def test_to_file_datetime(tmpdir, driver, ext, time, engine):
|
||||
"""Test writing a data file with the datetime column type"""
|
||||
if engine == "pyogrio" and time.tzinfo is not None:
|
||||
# TODO
|
||||
pytest.skip("pyogrio doesn't yet support timezones")
|
||||
if ext in (".shp", ""):
|
||||
pytest.skip(f"Driver corresponding to ext {ext} doesn't support dt fields")
|
||||
|
||||
@@ -207,23 +218,25 @@ def test_to_file_datetime(tmpdir, driver, ext, time, engine):
|
||||
df = GeoDataFrame(
|
||||
{"a": [1.0, 2.0], "b": [time, time]}, geometry=[point, point], crs=4326
|
||||
)
|
||||
fiona_precision_limit = "ms"
|
||||
df["b"] = df["b"].dt.round(freq=fiona_precision_limit)
|
||||
df["b"] = df["b"].dt.round(freq="ms")
|
||||
|
||||
df.to_file(tempfilename, driver=driver, engine=engine)
|
||||
df_read = read_file(tempfilename, engine=engine)
|
||||
|
||||
assert_geodataframe_equal(df.drop(columns=["b"]), df_read.drop(columns=["b"]))
|
||||
# Check datetime column
|
||||
expected = df["b"]
|
||||
if PANDAS_GE_20:
|
||||
expected = df["b"].dt.as_unit("ms")
|
||||
actual = df_read["b"]
|
||||
if df["b"].dt.tz is not None:
|
||||
# US/Eastern becomes pytz.FixedOffset(-300) when read from file
|
||||
# so compare fairly in terms of UTC
|
||||
assert_series_equal(
|
||||
df["b"].dt.tz_convert(pytz.utc), df_read["b"].dt.tz_convert(pytz.utc)
|
||||
)
|
||||
else:
|
||||
if engine == "pyogrio" and PANDAS_GE_20:
|
||||
df["b"] = df["b"].astype("datetime64[ms]")
|
||||
assert_series_equal(df["b"], df_read["b"])
|
||||
# as GDAL only models offsets, not timezones.
|
||||
# Compare fair result in terms of UTC instead
|
||||
expected = expected.dt.tz_convert(pytz.utc)
|
||||
actual = actual.dt.tz_convert(pytz.utc)
|
||||
|
||||
assert_series_equal(expected, actual)
|
||||
|
||||
|
||||
dt_exts = ["gpkg", "geojson"]
|
||||
@@ -239,7 +252,7 @@ def write_invalid_date_file(date_str, tmpdir, ext, engine):
|
||||
)
|
||||
# Schema not required for GeoJSON since not typed, but needed for GPKG
|
||||
if ext == "geojson":
|
||||
df.to_file(tempfilename)
|
||||
df.to_file(tempfilename, engine=engine)
|
||||
else:
|
||||
schema = {"geometry": "Point", "properties": {"date": "datetime"}}
|
||||
if engine == "pyogrio" and not fiona:
|
||||
@@ -254,7 +267,7 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
|
||||
# https://github.com/geopandas/geopandas/issues/2502
|
||||
date_str = "9999-99-99T00:00:00" # invalid date handled by GDAL
|
||||
tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
|
||||
res = read_file(tempfilename)
|
||||
res = read_file(tempfilename, engine=engine)
|
||||
if ext == "gpkg":
|
||||
assert is_datetime64_any_dtype(res["date"])
|
||||
assert pd.isna(res["date"].iloc[-1])
|
||||
@@ -265,16 +278,19 @@ def test_read_file_datetime_invalid(tmpdir, ext, engine):
|
||||
|
||||
@pytest.mark.parametrize("ext", dt_exts)
|
||||
def test_read_file_datetime_out_of_bounds_ns(tmpdir, ext, engine):
|
||||
if engine == "pyogrio" and not PANDAS_GE_20:
|
||||
pytest.skip("with pyogrio requires pandas >= 2.0 to pass")
|
||||
# https://github.com/geopandas/geopandas/issues/2502
|
||||
if ext == "geojson":
|
||||
skip_pyogrio_not_supported(engine)
|
||||
|
||||
date_str = "9999-12-31T00:00:00" # valid to GDAL, not to [ns] format
|
||||
tempfilename = write_invalid_date_file(date_str, tmpdir, ext, engine)
|
||||
res = read_file(tempfilename)
|
||||
# Pandas invalid datetimes are read in as object dtype (strings)
|
||||
assert res["date"].dtype == "object"
|
||||
assert isinstance(res["date"].iloc[0], str)
|
||||
res = read_file(tempfilename, engine=engine)
|
||||
if PANDAS_GE_30:
|
||||
assert res["date"].dtype == "datetime64[ms]"
|
||||
assert res["date"].iloc[-1] == pd.Timestamp("9999-12-31 00:00:00")
|
||||
else:
|
||||
# Pandas invalid datetimes are read in as object dtype (strings)
|
||||
assert res["date"].dtype == "object"
|
||||
assert isinstance(res["date"].iloc[0], str)
|
||||
|
||||
|
||||
def test_read_file_datetime_mixed_offsets(tmpdir):
|
||||
@@ -292,17 +308,13 @@ def test_read_file_datetime_mixed_offsets(tmpdir):
|
||||
df.to_file(tempfilename)
|
||||
# check mixed tz don't crash GH2478
|
||||
res = read_file(tempfilename)
|
||||
if engine == "fiona":
|
||||
# Convert mixed timezones to UTC equivalent
|
||||
assert is_datetime64_any_dtype(res["date"])
|
||||
if not PANDAS_GE_20:
|
||||
utc = pytz.utc
|
||||
else:
|
||||
utc = datetime.timezone.utc
|
||||
assert res["date"].dt.tz == utc
|
||||
# Convert mixed timezones to UTC equivalent
|
||||
assert is_datetime64_any_dtype(res["date"])
|
||||
if not PANDAS_GE_20:
|
||||
utc = pytz.utc
|
||||
else:
|
||||
# old fiona and pyogrio ignore timezones and read as datetimes successfully
|
||||
assert is_datetime64_any_dtype(res["date"])
|
||||
utc = datetime.timezone.utc
|
||||
assert res["date"].dt.tz == utc
|
||||
|
||||
|
||||
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
|
||||
@@ -365,14 +377,21 @@ def test_to_file_int32(tmpdir, df_points, engine, driver, ext):
|
||||
df = GeoDataFrame(geometry=geometry)
|
||||
df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int32Dtype())
|
||||
df.to_file(tempfilename, driver=driver, engine=engine)
|
||||
df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
|
||||
assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
|
||||
df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
|
||||
# the int column with missing values comes back as float
|
||||
expected = df.copy()
|
||||
expected["data"] = expected["data"].astype("float64")
|
||||
assert_geodataframe_equal(df_read, expected, check_like=True)
|
||||
|
||||
tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
|
||||
df2 = df.dropna()
|
||||
df2.to_file(tempfilename2, driver=driver, engine=engine)
|
||||
df2_read = GeoDataFrame.from_file(tempfilename2, engine=engine)
|
||||
if engine == "pyogrio":
|
||||
tempfilename2 = os.path.join(str(tmpdir), f"int32_2.{ext}")
|
||||
df2 = df.dropna()
|
||||
df2.to_file(tempfilename2, driver=driver, engine=engine)
|
||||
df2_read = GeoDataFrame.from_file(tempfilename2, driver=driver, engine=engine)
|
||||
assert df2_read["data"].dtype == "int32"
|
||||
else:
|
||||
# with the fiona engine the 32 bitwidth is not preserved
|
||||
assert df2_read["data"].dtype == "int64"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
|
||||
@@ -382,8 +401,11 @@ def test_to_file_int64(tmpdir, df_points, engine, driver, ext):
|
||||
df = GeoDataFrame(geometry=geometry)
|
||||
df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int64Dtype())
|
||||
df.to_file(tempfilename, driver=driver, engine=engine)
|
||||
df_read = GeoDataFrame.from_file(tempfilename, driver=driver, engine=engine)
|
||||
assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True)
|
||||
df_read = GeoDataFrame.from_file(tempfilename, engine=engine)
|
||||
# the int column with missing values comes back as float
|
||||
expected = df.copy()
|
||||
expected["data"] = expected["data"].astype("float64")
|
||||
assert_geodataframe_equal(df_read, expected, check_like=True)
|
||||
|
||||
|
||||
def test_to_file_empty(tmpdir, engine):
|
||||
@@ -393,12 +415,6 @@ def test_to_file_empty(tmpdir, engine):
|
||||
input_empty_df.to_file(tempfilename, engine=engine)
|
||||
|
||||
|
||||
def test_to_file_privacy(tmpdir, df_nybb):
|
||||
tempfilename = os.path.join(str(tmpdir), "test.shp")
|
||||
with pytest.warns(FutureWarning):
|
||||
geopandas.io.file.to_file(df_nybb, tempfilename)
|
||||
|
||||
|
||||
def test_to_file_schema(tmpdir, df_nybb, engine):
|
||||
"""
|
||||
Ensure that the file is written according to the schema
|
||||
@@ -431,12 +447,13 @@ def test_to_file_schema(tmpdir, df_nybb, engine):
|
||||
assert result_schema == schema
|
||||
|
||||
|
||||
def test_to_file_crs(tmpdir, engine):
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
def test_to_file_crs(tmpdir, engine, nybb_filename):
|
||||
"""
|
||||
Ensure that the file is written according to the crs
|
||||
if it is specified
|
||||
"""
|
||||
df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
|
||||
df = read_file(nybb_filename, engine=engine)
|
||||
tempfilename = os.path.join(str(tmpdir), "crs.shp")
|
||||
|
||||
# save correct CRS
|
||||
@@ -445,7 +462,7 @@ def test_to_file_crs(tmpdir, engine):
|
||||
assert result.crs == df.crs
|
||||
|
||||
if engine == "pyogrio":
|
||||
with pytest.raises(ValueError, match="Passing 'crs' it not supported"):
|
||||
with pytest.raises(ValueError, match="Passing 'crs' is not supported"):
|
||||
df.to_file(tempfilename, crs=3857, engine=engine)
|
||||
return
|
||||
|
||||
@@ -455,8 +472,7 @@ def test_to_file_crs(tmpdir, engine):
|
||||
assert result.crs == "epsg:3857"
|
||||
|
||||
# specify CRS for gdf without one
|
||||
df2 = df.copy()
|
||||
df2.crs = None
|
||||
df2 = df.set_crs(None, allow_override=True)
|
||||
df2.to_file(tempfilename, crs=2263, engine=engine)
|
||||
df = GeoDataFrame.from_file(tempfilename, engine=engine)
|
||||
assert df.crs == "epsg:2263"
|
||||
@@ -529,6 +545,7 @@ def test_mode_unsupported(tmpdir, df_nybb, engine):
|
||||
df_nybb.to_file(tempfilename, mode="r", engine=engine)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:'crs' was not provided:UserWarning:pyogrio")
|
||||
@pytest.mark.parametrize("driver,ext", driver_ext_pairs)
|
||||
def test_empty_crs(tmpdir, driver, ext, engine):
|
||||
"""Test handling of undefined CRS with GPKG driver (GH #1975)."""
|
||||
@@ -548,7 +565,7 @@ def test_empty_crs(tmpdir, driver, ext, engine):
|
||||
|
||||
if ext == ".geojson":
|
||||
# geojson by default assumes epsg:4326
|
||||
df.crs = "EPSG:4326"
|
||||
df.geometry.array.crs = "EPSG:4326"
|
||||
|
||||
assert_geodataframe_equal(result, df)
|
||||
|
||||
@@ -561,10 +578,11 @@ def test_empty_crs(tmpdir, driver, ext, engine):
|
||||
NYBB_CRS = "epsg:2263"
|
||||
|
||||
|
||||
def test_read_file(engine):
|
||||
df = read_file(geopandas.datasets.get_path("nybb"), engine=engine)
|
||||
def test_read_file(engine, nybb_filename):
|
||||
df = read_file(nybb_filename, engine=engine)
|
||||
validate_boro_df(df)
|
||||
assert df.crs == NYBB_CRS
|
||||
if HAS_PYPROJ:
|
||||
assert df.crs == NYBB_CRS
|
||||
expected_columns = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area"]
|
||||
assert (df.columns[:-1] == expected_columns).all()
|
||||
|
||||
@@ -578,7 +596,7 @@ def test_read_file(engine):
|
||||
"main/geopandas/tests/data/null_geom.geojson",
|
||||
# url to zip file
|
||||
"https://raw.githubusercontent.com/geopandas/geopandas/"
|
||||
"main/geopandas/datasets/nybb_16a.zip",
|
||||
"main/geopandas/tests/data/nybb_16a.zip",
|
||||
# url to zipfile without extension
|
||||
"https://geonode.goosocean.org/download/480",
|
||||
# url to web service
|
||||
@@ -596,6 +614,25 @@ def test_read_file_local_uri(file_path, engine):
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
def test_read_file_geojson_string_path(engine):
|
||||
if engine == "pyogrio" and not PYOGRIO_GE_090:
|
||||
pytest.skip("fixed in pyogrio 0.9.0")
|
||||
expected = GeoDataFrame({"val_with_hash": ["row # 0"], "geometry": [Point(0, 1)]})
|
||||
features = {
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {"val_with_hash": "row # 0"},
|
||||
"geometry": {"type": "Point", "coordinates": [0.0, 1.0]},
|
||||
}
|
||||
],
|
||||
}
|
||||
df_read = read_file(json.dumps(features))
|
||||
assert_geodataframe_equal(expected.set_crs("EPSG:4326"), df_read)
|
||||
|
||||
|
||||
def test_read_file_textio(file_path, engine):
|
||||
file_text_stream = open(file_path)
|
||||
file_stringio = io.StringIO(open(file_path).read())
|
||||
@@ -648,11 +685,11 @@ def test_read_file_tempfile(engine):
|
||||
temp.close()
|
||||
|
||||
|
||||
def test_read_binary_file_fsspec(engine):
|
||||
def test_read_binary_file_fsspec(engine, nybb_filename):
|
||||
fsspec = pytest.importorskip("fsspec")
|
||||
# Remove the zip scheme so fsspec doesn't open as a zipped file,
|
||||
# instead we want to read as bytes and let fiona decode it.
|
||||
path = geopandas.datasets.get_path("nybb")[6:]
|
||||
path = nybb_filename[6:]
|
||||
with fsspec.open(path, "rb") as f:
|
||||
gdf = read_file(f, engine=engine)
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
@@ -665,10 +702,10 @@ def test_read_text_file_fsspec(file_path, engine):
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
|
||||
|
||||
def test_infer_zipped_file(engine):
|
||||
def test_infer_zipped_file(engine, nybb_filename):
|
||||
# Remove the zip scheme so that the test for a zipped file can
|
||||
# check it and add it back.
|
||||
path = geopandas.datasets.get_path("nybb")[6:]
|
||||
path = nybb_filename[6:]
|
||||
gdf = read_file(path, engine=engine)
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
|
||||
@@ -683,15 +720,24 @@ def test_infer_zipped_file(engine):
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
|
||||
|
||||
def test_allow_legacy_gdal_path(engine):
|
||||
def test_allow_legacy_gdal_path(engine, nybb_filename):
|
||||
# Construct a GDAL-style zip path.
|
||||
path = "/vsizip/" + geopandas.datasets.get_path("nybb")[6:]
|
||||
path = "/vsizip/" + nybb_filename[6:]
|
||||
gdf = read_file(path, engine=engine)
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
|
||||
|
||||
def test_read_file_filtered__bbox(df_nybb, engine):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
@pytest.mark.skipif(not PYOGRIO_GE_090, reason="bug fixed in pyogrio 0.9.0")
|
||||
def test_read_file_with_hash_in_path(engine, nybb_filename, tmp_path):
|
||||
folder_with_hash = tmp_path / "path with # present"
|
||||
folder_with_hash.mkdir(exist_ok=True, parents=True)
|
||||
read_path = folder_with_hash / "nybb.zip"
|
||||
shutil.copy(nybb_filename[6:], read_path)
|
||||
gdf = read_file(read_path, engine=engine)
|
||||
assert isinstance(gdf, geopandas.GeoDataFrame)
|
||||
|
||||
|
||||
def test_read_file_bbox_tuple(df_nybb, engine, nybb_filename):
|
||||
bbox = (
|
||||
1031051.7879884212,
|
||||
224272.49231459625,
|
||||
@@ -703,8 +749,7 @@ def test_read_file_filtered__bbox(df_nybb, engine):
|
||||
assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))
|
||||
|
||||
|
||||
def test_read_file_filtered__bbox__polygon(df_nybb, engine):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
def test_read_file_bbox_polygon(df_nybb, engine, nybb_filename):
|
||||
bbox = box(
|
||||
1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
|
||||
)
|
||||
@@ -713,14 +758,12 @@ def test_read_file_filtered__bbox__polygon(df_nybb, engine):
|
||||
assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True))
|
||||
|
||||
|
||||
def test_read_file_filtered__rows(df_nybb, engine):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
def test_read_file_filtered__rows(df_nybb, engine, nybb_filename):
|
||||
filtered_df = read_file(nybb_filename, rows=1, engine=engine)
|
||||
assert_geodataframe_equal(filtered_df, df_nybb.iloc[[0], :])
|
||||
|
||||
|
||||
def test_read_file_filtered__rows_slice(df_nybb, engine):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
def test_read_file_filtered__rows_slice(df_nybb, engine, nybb_filename):
|
||||
filtered_df = read_file(nybb_filename, rows=slice(1, 3), engine=engine)
|
||||
assert_geodataframe_equal(filtered_df, df_nybb.iloc[1:3, :].reset_index(drop=True))
|
||||
|
||||
@@ -728,21 +771,14 @@ def test_read_file_filtered__rows_slice(df_nybb, engine):
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Layer does not support OLC_FASTFEATURECOUNT:RuntimeWarning"
|
||||
) # for the slice with -1
|
||||
def test_read_file_filtered__rows_bbox(df_nybb, engine):
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
def test_read_file_filtered__rows_bbox(df_nybb, engine, nybb_filename):
|
||||
bbox = (
|
||||
1031051.7879884212,
|
||||
224272.49231459625,
|
||||
1047224.3104931959,
|
||||
244317.30894023244,
|
||||
)
|
||||
if engine == "pyogrio" and not PYOGRIO_GE_07:
|
||||
with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"):
|
||||
# combination bbox and rows (rows slice applied after bbox filtering!)
|
||||
filtered_df = read_file(
|
||||
nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
|
||||
)
|
||||
else: # fiona
|
||||
if engine == "fiona":
|
||||
# combination bbox and rows (rows slice applied after bbox filtering!)
|
||||
filtered_df = read_file(
|
||||
nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine
|
||||
@@ -768,16 +804,14 @@ def test_read_file_filtered__rows_bbox(df_nybb, engine):
|
||||
)
|
||||
|
||||
|
||||
def test_read_file_filtered_rows_invalid(engine):
|
||||
def test_read_file_filtered_rows_invalid(engine, nybb_filename):
|
||||
with pytest.raises(TypeError):
|
||||
read_file(
|
||||
geopandas.datasets.get_path("nybb"), rows="not_a_slice", engine=engine
|
||||
)
|
||||
read_file(nybb_filename, rows="not_a_slice", engine=engine)
|
||||
|
||||
|
||||
def test_read_file__ignore_geometry(engine):
|
||||
def test_read_file__ignore_geometry(engine, naturalearth_lowres):
|
||||
pdf = geopandas.read_file(
|
||||
geopandas.datasets.get_path("naturalearth_lowres"),
|
||||
naturalearth_lowres,
|
||||
ignore_geometry=True,
|
||||
engine=engine,
|
||||
)
|
||||
@@ -785,20 +819,73 @@ def test_read_file__ignore_geometry(engine):
|
||||
assert isinstance(pdf, pd.DataFrame) and not isinstance(pdf, geopandas.GeoDataFrame)
|
||||
|
||||
|
||||
def test_read_file__ignore_all_fields(engine):
|
||||
skip_pyogrio_not_supported(engine) # pyogrio has "columns" keyword instead
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
|
||||
)
|
||||
def test_read_file__ignore_fields(engine, naturalearth_lowres):
|
||||
gdf = geopandas.read_file(
|
||||
geopandas.datasets.get_path("naturalearth_lowres"),
|
||||
naturalearth_lowres,
|
||||
ignore_fields=["pop_est", "continent", "iso_a3", "gdp_md_est"],
|
||||
engine=engine,
|
||||
)
|
||||
assert gdf.columns.tolist() == ["name", "geometry"]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
|
||||
)
|
||||
def test_read_file__ignore_all_fields(engine, naturalearth_lowres):
|
||||
gdf = geopandas.read_file(
|
||||
naturalearth_lowres,
|
||||
ignore_fields=["pop_est", "continent", "name", "iso_a3", "gdp_md_est"],
|
||||
engine="fiona",
|
||||
engine=engine,
|
||||
)
|
||||
assert gdf.columns.tolist() == ["geometry"]
|
||||
|
||||
|
||||
def test_read_file__where_filter(engine):
|
||||
def test_read_file_missing_geometry(tmpdir, engine):
|
||||
filename = str(tmpdir / "test.csv")
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"col1": np.array([1, 2, 3], dtype="int64"), "col2": ["a", "b", "c"]}
|
||||
)
|
||||
expected.to_csv(filename, index=False)
|
||||
|
||||
df = geopandas.read_file(filename, engine=engine)
|
||||
# both engines read integers as strings; force back to original type
|
||||
df["col1"] = df["col1"].astype("int64")
|
||||
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
assert not isinstance(df, geopandas.GeoDataFrame)
|
||||
|
||||
assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_read_file_None_attribute(tmp_path, engine):
|
||||
# Test added in context of https://github.com/geopandas/geopandas/issues/2901
|
||||
test_path = tmp_path / "test.gpkg"
|
||||
gdf = GeoDataFrame(
|
||||
{"a": [None, None]}, geometry=[Point(1, 2), Point(3, 4)], crs=4326
|
||||
)
|
||||
|
||||
gdf.to_file(test_path, engine=engine)
|
||||
read_gdf = read_file(test_path, engine=engine)
|
||||
assert_geodataframe_equal(gdf, read_gdf)
|
||||
|
||||
|
||||
def test_read_csv_dtype(tmpdir, df_nybb):
|
||||
filename = str(tmpdir / "test.csv")
|
||||
|
||||
df_nybb.to_csv(filename, index=False)
|
||||
pdf = pd.read_csv(filename, dtype={"geometry": "geometry"})
|
||||
|
||||
assert pdf.geometry.dtype == "geometry"
|
||||
|
||||
|
||||
def test_read_file__where_filter(engine, naturalearth_lowres):
|
||||
if FIONA_GE_19 or engine == "pyogrio":
|
||||
gdf = geopandas.read_file(
|
||||
geopandas.datasets.get_path("naturalearth_lowres"),
|
||||
naturalearth_lowres,
|
||||
where="continent='Africa'",
|
||||
engine=engine,
|
||||
)
|
||||
@@ -806,26 +893,75 @@ def test_read_file__where_filter(engine):
|
||||
else:
|
||||
with pytest.raises(NotImplementedError):
|
||||
geopandas.read_file(
|
||||
geopandas.datasets.get_path("naturalearth_lowres"),
|
||||
naturalearth_lowres,
|
||||
where="continent='Africa'",
|
||||
engine="fiona",
|
||||
)
|
||||
|
||||
|
||||
@PYOGRIO_MARK
|
||||
def test_read_file__columns():
|
||||
# TODO: this is only support for pyogrio, but we could mimic it for fiona as well
|
||||
def test_read_file__columns(engine, naturalearth_lowres):
|
||||
if engine == "fiona" and not FIONA_GE_19:
|
||||
pytest.skip("columns requires fiona 1.9+")
|
||||
|
||||
gdf = geopandas.read_file(
|
||||
geopandas.datasets.get_path("naturalearth_lowres"),
|
||||
columns=["name", "pop_est"],
|
||||
engine="pyogrio",
|
||||
naturalearth_lowres, columns=["name", "pop_est"], engine=engine
|
||||
)
|
||||
assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
|
||||
|
||||
|
||||
def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
|
||||
def test_read_file__columns_empty(engine, naturalearth_lowres):
|
||||
if engine == "fiona" and not FIONA_GE_19:
|
||||
pytest.skip("columns requires fiona 1.9+")
|
||||
|
||||
gdf = geopandas.read_file(naturalearth_lowres, columns=[], engine=engine)
|
||||
assert gdf.columns.tolist() == ["geometry"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(FIONA_GE_19 or not fiona, reason="test for fiona < 1.9")
|
||||
def test_read_file__columns_old_fiona(naturalearth_lowres):
|
||||
with pytest.raises(NotImplementedError):
|
||||
geopandas.read_file(
|
||||
naturalearth_lowres, columns=["name", "pop_est"], engine="fiona"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:The 'include_fields' and 'ignore_fields' keywords:DeprecationWarning"
|
||||
)
|
||||
def test_read_file__include_fields(engine, naturalearth_lowres):
|
||||
if engine == "fiona" and not FIONA_GE_19:
|
||||
pytest.skip("columns requires fiona 1.9+")
|
||||
|
||||
gdf = geopandas.read_file(
|
||||
naturalearth_lowres, include_fields=["name", "pop_est"], engine=engine
|
||||
)
|
||||
assert gdf.columns.tolist() == ["name", "pop_est", "geometry"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not FIONA_GE_19, reason="columns requires fiona 1.9+")
|
||||
def test_read_file__columns_conflicting_keywords(engine, naturalearth_lowres):
|
||||
path = naturalearth_lowres
|
||||
|
||||
with pytest.raises(ValueError, match="Cannot specify both"):
|
||||
geopandas.read_file(
|
||||
path, include_fields=["name"], ignore_fields=["pop_est"], engine=engine
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Cannot specify both"):
|
||||
geopandas.read_file(
|
||||
path, columns=["name"], include_fields=["pop_est"], engine=engine
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Cannot specify both"):
|
||||
geopandas.read_file(
|
||||
path, columns=["name"], ignore_fields=["pop_est"], engine=engine
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
@pytest.mark.parametrize("file_like", [False, True])
|
||||
def test_read_file_bbox_gdf(df_nybb, engine, nybb_filename, file_like):
|
||||
full_df_shape = df_nybb.shape
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
bbox = geopandas.GeoDataFrame(
|
||||
geometry=[
|
||||
box(
|
||||
@@ -837,28 +973,41 @@ def test_read_file_filtered_with_gdf_boundary(df_nybb, engine):
|
||||
],
|
||||
crs=NYBB_CRS,
|
||||
)
|
||||
filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine)
|
||||
infile = (
|
||||
open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
|
||||
)
|
||||
filtered_df = read_file(infile, bbox=bbox, engine=engine)
|
||||
filtered_df_shape = filtered_df.shape
|
||||
assert full_df_shape != filtered_df_shape
|
||||
assert filtered_df_shape == (2, 5)
|
||||
|
||||
|
||||
def test_read_file_filtered_with_gdf_boundary__mask(df_nybb, engine):
|
||||
skip_pyogrio_not_supported(engine)
|
||||
gdf_mask = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
|
||||
gdf = geopandas.read_file(
|
||||
geopandas.datasets.get_path("naturalearth_cities"),
|
||||
mask=gdf_mask[gdf_mask.continent == "Africa"],
|
||||
engine=engine,
|
||||
)
|
||||
filtered_df_shape = gdf.shape
|
||||
assert filtered_df_shape == (57, 2)
|
||||
|
||||
|
||||
def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
|
||||
skip_pyogrio_not_supported(engine)
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
@pytest.mark.parametrize("file_like", [False, True])
|
||||
def test_read_file_mask_gdf(df_nybb, engine, nybb_filename, file_like):
|
||||
full_df_shape = df_nybb.shape
|
||||
mask = geopandas.GeoDataFrame(
|
||||
geometry=[
|
||||
box(
|
||||
1031051.7879884212,
|
||||
224272.49231459625,
|
||||
1047224.3104931959,
|
||||
244317.30894023244,
|
||||
)
|
||||
],
|
||||
crs=NYBB_CRS,
|
||||
)
|
||||
infile = (
|
||||
open(nybb_filename.replace("zip://", ""), "rb") if file_like else nybb_filename
|
||||
)
|
||||
filtered_df = read_file(infile, mask=mask, engine=engine)
|
||||
filtered_df_shape = filtered_df.shape
|
||||
assert full_df_shape != filtered_df_shape
|
||||
assert filtered_df_shape == (2, 5)
|
||||
|
||||
|
||||
def test_read_file_mask_polygon(df_nybb, engine, nybb_filename):
|
||||
full_df_shape = df_nybb.shape
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
mask = box(
|
||||
1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244
|
||||
)
|
||||
@@ -868,10 +1017,25 @@ def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine):
|
||||
assert filtered_df_shape == (2, 5)
|
||||
|
||||
|
||||
def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
|
||||
skip_pyogrio_not_supported(engine)
|
||||
def test_read_file_mask_geojson(df_nybb, nybb_filename, engine):
|
||||
full_df_shape = df_nybb.shape
|
||||
mask = mapping(
|
||||
box(
|
||||
1031051.7879884212,
|
||||
224272.49231459625,
|
||||
1047224.3104931959,
|
||||
244317.30894023244,
|
||||
)
|
||||
)
|
||||
filtered_df = read_file(nybb_filename, mask=mask, engine=engine)
|
||||
filtered_df_shape = filtered_df.shape
|
||||
assert full_df_shape != filtered_df_shape
|
||||
assert filtered_df_shape == (2, 5)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
def test_read_file_bbox_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
|
||||
full_df_shape = df_nybb.shape
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
bbox = geopandas.GeoDataFrame(
|
||||
geometry=[
|
||||
box(
|
||||
@@ -890,10 +1054,9 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine):
|
||||
assert filtered_df_shape == (2, 5)
|
||||
|
||||
|
||||
def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engine):
|
||||
skip_pyogrio_not_supported(engine)
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
def test_read_file_mask_gdf_mismatched_crs(df_nybb, engine, nybb_filename):
|
||||
full_df_shape = df_nybb.shape
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
mask = geopandas.GeoDataFrame(
|
||||
geometry=[
|
||||
box(
|
||||
@@ -912,6 +1075,20 @@ def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engi
|
||||
assert filtered_df_shape == (2, 5)
|
||||
|
||||
|
||||
def test_read_file_bbox_mask_not_allowed(engine, nybb_filename):
|
||||
bbox = (
|
||||
1031051.7879884212,
|
||||
224272.49231459625,
|
||||
1047224.3104931959,
|
||||
244317.30894023244,
|
||||
)
|
||||
|
||||
mask = box(*bbox)
|
||||
|
||||
with pytest.raises(ValueError, match="mask and bbox can not be set together"):
|
||||
read_file(nybb_filename, bbox=bbox, mask=mask)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Layer 'b'test_empty'' does not have any features:UserWarning"
|
||||
)
|
||||
@@ -942,11 +1119,6 @@ def test_read_file_empty_shapefile(tmpdir, engine):
|
||||
assert all(empty.columns == ["A", "Z", "geometry"])
|
||||
|
||||
|
||||
def test_read_file_privacy(tmpdir, df_nybb):
|
||||
with pytest.warns(FutureWarning):
|
||||
geopandas.io.file.read_file(geopandas.datasets.get_path("nybb"))
|
||||
|
||||
|
||||
class FileNumber(object):
|
||||
def __init__(self, tmpdir, base, ext):
|
||||
self.tmpdir = str(tmpdir)
|
||||
@@ -1113,7 +1285,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
|
||||
# index as string
|
||||
df_p = df_points.copy()
|
||||
df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
|
||||
df.index = pd.TimedeltaIndex(range(len(df)), "days")
|
||||
df.index = pd.to_timedelta(range(len(df)), unit="days")
|
||||
# TODO: TimedeltaIndex is an invalid field type
|
||||
df.index = df.index.astype(str)
|
||||
do_checks(df, index_is_used=True)
|
||||
@@ -1121,7 +1293,7 @@ def test_write_index_to_file(tmpdir, df_points, driver, ext, engine):
|
||||
# unnamed DatetimeIndex
|
||||
df_p = df_points.copy()
|
||||
df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
|
||||
df.index = pd.TimedeltaIndex(range(len(df)), "days") + pd.DatetimeIndex(
|
||||
df.index = pd.to_timedelta(range(len(df)), unit="days") + pd.to_datetime(
|
||||
["1999-12-27"] * len(df)
|
||||
)
|
||||
if driver == "ESRI Shapefile":
|
||||
@@ -1152,6 +1324,54 @@ def test_write_read_file(test_file, engine):
|
||||
os.remove(os.path.expanduser(test_file))
|
||||
|
||||
|
||||
@pytest.mark.skipif(fiona is False, reason="Fiona not available")
|
||||
@pytest.mark.skipif(FIONA_GE_19, reason="Fiona >= 1.9 supports metadata")
|
||||
def test_to_file_metadata_unsupported_fiona_version(tmp_path, df_points):
|
||||
metadata = {"title": "test"}
|
||||
tmp_file = tmp_path / "test.gpkg"
|
||||
match = "'metadata' keyword is only supported for Fiona >= 1.9"
|
||||
with pytest.raises(NotImplementedError, match=match):
|
||||
df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not FIONA_GE_19, reason="only Fiona >= 1.9 supports metadata")
|
||||
def test_to_file_metadata_supported_fiona_version(tmp_path, df_points):
|
||||
metadata = {"title": "test"}
|
||||
tmp_file = tmp_path / "test.gpkg"
|
||||
|
||||
df_points.to_file(tmp_file, driver="GPKG", engine="fiona", metadata=metadata)
|
||||
|
||||
# Check that metadata is written to the file
|
||||
with fiona.open(tmp_file) as src:
|
||||
tags = src.tags()
|
||||
assert tags == metadata
|
||||
|
||||
|
||||
@pytest.mark.skipif(pyogrio is False, reason="Pyogrio not available")
|
||||
def test_to_file_metadata_pyogrio(tmp_path, df_points):
|
||||
metadata = {"title": "test"}
|
||||
tmp_file = tmp_path / "test.gpkg"
|
||||
|
||||
df_points.to_file(tmp_file, driver="GPKG", engine="pyogrio", metadata=metadata)
|
||||
|
||||
# Check that metadata is written to the file
|
||||
info = pyogrio.read_info(tmp_file)
|
||||
layer_metadata = info["layer_metadata"]
|
||||
assert layer_metadata == metadata
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"driver, ext", [("ESRI Shapefile", ".shp"), ("GeoJSON", ".geojson")]
|
||||
)
|
||||
def test_to_file_metadata_unsupported_driver(driver, ext, tmpdir, df_points, engine):
|
||||
metadata = {"title": "Test"}
|
||||
tempfilename = os.path.join(str(tmpdir), "test" + ext)
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="'metadata' keyword is only supported for"
|
||||
):
|
||||
df_points.to_file(tempfilename, driver=driver, metadata=metadata)
|
||||
|
||||
|
||||
def test_multiple_geom_cols_error(tmpdir, df_nybb):
|
||||
df_nybb["geom2"] = df_nybb.geometry
|
||||
with pytest.raises(ValueError, match="GeoDataFrame contains multiple geometry"):
|
||||
@@ -1160,7 +1380,7 @@ def test_multiple_geom_cols_error(tmpdir, df_nybb):
|
||||
|
||||
@PYOGRIO_MARK
|
||||
@FIONA_MARK
|
||||
def test_option_io_engine():
|
||||
def test_option_io_engine(nybb_filename):
|
||||
try:
|
||||
geopandas.options.io_engine = "pyogrio"
|
||||
|
||||
@@ -1171,8 +1391,48 @@ def test_option_io_engine():
|
||||
orig = fiona.supported_drivers["ESRI Shapefile"]
|
||||
fiona.supported_drivers["ESRI Shapefile"] = "w"
|
||||
|
||||
nybb_filename = geopandas.datasets.get_path("nybb")
|
||||
_ = geopandas.read_file(nybb_filename)
|
||||
finally:
|
||||
fiona.supported_drivers["ESRI Shapefile"] = orig
|
||||
geopandas.options.io_engine = None
|
||||
|
||||
|
||||
@pytest.mark.skipif(pyogrio, reason="test for pyogrio not installed")
|
||||
def test_error_engine_unavailable_pyogrio(tmp_path, df_points, file_path):
|
||||
|
||||
with pytest.raises(ImportError, match="the 'read_file' function requires"):
|
||||
geopandas.read_file(file_path, engine="pyogrio")
|
||||
|
||||
with pytest.raises(ImportError, match="the 'to_file' method requires"):
|
||||
df_points.to_file(tmp_path / "test.gpkg", engine="pyogrio")
|
||||
|
||||
|
||||
@pytest.mark.skipif(fiona, reason="test for fiona not installed")
|
||||
def test_error_engine_unavailable_fiona(tmp_path, df_points, file_path):
|
||||
|
||||
with pytest.raises(ImportError, match="the 'read_file' function requires"):
|
||||
geopandas.read_file(file_path, engine="fiona")
|
||||
|
||||
with pytest.raises(ImportError, match="the 'to_file' method requires"):
|
||||
df_points.to_file(tmp_path / "test.gpkg", engine="fiona")
|
||||
|
||||
|
||||
@PYOGRIO_MARK
|
||||
def test_list_layers(df_points, tmpdir):
|
||||
tempfilename = os.path.join(str(tmpdir), "dataset.gpkg")
|
||||
df_points.to_file(tempfilename, layer="original")
|
||||
df_points.set_geometry(df_points.buffer(1)).to_file(tempfilename, layer="buffered")
|
||||
df_points.set_geometry(df_points.buffer(2).boundary).to_file(
|
||||
tempfilename, layer="boundary"
|
||||
)
|
||||
pyogrio.write_dataframe(
|
||||
df_points[["value1", "value2"]], tempfilename, layer="non-spatial"
|
||||
)
|
||||
layers = geopandas.list_layers(tempfilename)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"name": ["original", "buffered", "boundary", "non-spatial"],
|
||||
"geometry_type": ["Point", "Polygon", "LineString", None],
|
||||
}
|
||||
)
|
||||
assert_frame_equal(layers, expected)
|
||||
|
||||
@@ -12,11 +12,10 @@ from shapely.geometry import (
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
import pytest
|
||||
|
||||
from .test_file import FIONA_MARK, PYOGRIO_MARK
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
@@ -244,7 +243,14 @@ def geodataframe(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["GeoJSON", "ESRI Shapefile", "GPKG", "SQLite"])
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
("GeoJSON", ".geojson"),
|
||||
("ESRI Shapefile", ".shp"),
|
||||
("GPKG", ".gpkg"),
|
||||
("SQLite", ".sqlite"),
|
||||
]
|
||||
)
|
||||
def ogr_driver(request):
|
||||
return request.param
|
||||
|
||||
@@ -260,16 +266,18 @@ def engine(request):
|
||||
|
||||
|
||||
def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
|
||||
output_file = os.path.join(str(tmpdir), "output_file")
|
||||
driver, ext = ogr_driver
|
||||
output_file = os.path.join(str(tmpdir), "output_file" + ext)
|
||||
write_kwargs = {}
|
||||
if ogr_driver == "SQLite":
|
||||
if driver == "SQLite":
|
||||
write_kwargs["spatialite"] = True
|
||||
|
||||
# This if statement can be removed once minimal fiona version >= 1.8.20
|
||||
if engine == "fiona":
|
||||
import fiona
|
||||
from packaging.version import Version
|
||||
|
||||
import fiona
|
||||
|
||||
if Version(fiona.__version__) < Version("1.8.20"):
|
||||
pytest.skip("SQLite driver only available from version 1.8.20")
|
||||
|
||||
@@ -285,22 +293,35 @@ def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine):
|
||||
):
|
||||
write_kwargs["geometry_type"] = "Point Z"
|
||||
|
||||
expected_error = _expected_error_on(geodataframe, ogr_driver)
|
||||
expected_error = _expected_error_on(geodataframe, driver)
|
||||
if expected_error:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Failed to write record|Could not add feature to layer"
|
||||
):
|
||||
geodataframe.to_file(
|
||||
output_file, driver=ogr_driver, engine=engine, **write_kwargs
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
else:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=ogr_driver, engine=engine, **write_kwargs
|
||||
)
|
||||
if driver == "SQLite" and engine == "pyogrio":
|
||||
try:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
except ValueError as e:
|
||||
if "unrecognized option 'SPATIALITE'" in str(e):
|
||||
pytest.xfail(
|
||||
"pyogrio wheels from PyPI do not come with SpatiaLite support. "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
geodataframe.to_file(
|
||||
output_file, driver=driver, engine=engine, **write_kwargs
|
||||
)
|
||||
|
||||
reloaded = geopandas.read_file(output_file, engine=engine)
|
||||
|
||||
if ogr_driver == "GeoJSON" and engine == "pyogrio":
|
||||
if driver == "GeoJSON" and engine == "pyogrio":
|
||||
# For GeoJSON files, the int64 column comes back as int32
|
||||
reloaded["a"] = reloaded["a"].astype("int64")
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from shapely.geometry import (
|
||||
LineString,
|
||||
MultiLineString,
|
||||
@@ -9,12 +12,11 @@ from shapely.geometry import (
|
||||
Polygon,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import numpy as np
|
||||
from geopandas import GeoDataFrame
|
||||
from geopandas.io.file import infer_schema
|
||||
|
||||
import pytest
|
||||
|
||||
# Credit: Polygons below come from Montreal city Open Data portal
|
||||
# http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere
|
||||
city_hall_boundaries = Polygon(
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
See generate_legacy_storage_files.py for the creation of the legacy files.
|
||||
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
|
||||
import glob
|
||||
import os
|
||||
import pathlib
|
||||
@@ -11,9 +11,6 @@ import pandas as pd
|
||||
|
||||
import pytest
|
||||
from geopandas.testing import assert_geodataframe_equal
|
||||
from geopandas import _compat as compat
|
||||
import geopandas
|
||||
from shapely.geometry import Point
|
||||
|
||||
DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data"
|
||||
|
||||
@@ -34,18 +31,7 @@ def legacy_pickle(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@contextmanager
|
||||
def with_use_pygeos(option):
|
||||
orig = geopandas.options.use_pygeos
|
||||
geopandas.options.use_pygeos = option
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
geopandas.options.use_pygeos = orig
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
compat.USE_SHAPELY_20 or compat.USE_PYGEOS,
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"shapely 2.0/pygeos-based unpickling currently only works for "
|
||||
"shapely-2.0/pygeos-written files"
|
||||
@@ -68,43 +54,3 @@ def test_round_trip_current(tmpdir, current_pickle_data):
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, value)
|
||||
assert isinstance(result.has_sindex, bool)
|
||||
|
||||
|
||||
def _create_gdf():
|
||||
return geopandas.GeoDataFrame(
|
||||
{"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]},
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745")
|
||||
def test_pygeos_switch(tmpdir):
|
||||
# writing and reading with pygeos disabled
|
||||
with with_use_pygeos(False):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
result = pd.read_pickle(path)
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
# writing without pygeos, reading with pygeos
|
||||
with with_use_pygeos(False):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
|
||||
with with_use_pygeos(True):
|
||||
result = pd.read_pickle(path)
|
||||
gdf = _create_gdf()
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
# writing with pygeos, reading without pygeos
|
||||
with with_use_pygeos(True):
|
||||
gdf = _create_gdf()
|
||||
path = str(tmpdir / "gdf_crs1.pickle")
|
||||
gdf.to_pickle(path)
|
||||
|
||||
with with_use_pygeos(False):
|
||||
result = pd.read_pickle(path)
|
||||
gdf = _create_gdf()
|
||||
assert_geodataframe_equal(result, gdf)
|
||||
|
||||
@@ -4,18 +4,27 @@ The spatial database tests may not work without additional system
|
||||
configuration. postGIS tests require a test database to have been setup;
|
||||
see geopandas.tests.util for more information.
|
||||
"""
|
||||
|
||||
import os
|
||||
import warnings
|
||||
from importlib.util import find_spec
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import geopandas
|
||||
from geopandas import GeoDataFrame, read_file, read_postgis
|
||||
|
||||
import geopandas._compat as compat
|
||||
from geopandas.io.sql import _get_conn as get_conn, _write_postgis as write_postgis
|
||||
from geopandas.tests.util import create_postgis, create_spatialite, validate_boro_df
|
||||
from geopandas import GeoDataFrame, read_file, read_postgis
|
||||
from geopandas._compat import HAS_PYPROJ
|
||||
from geopandas.io.sql import _get_conn as get_conn
|
||||
from geopandas.io.sql import _write_postgis as write_postgis
|
||||
|
||||
import pytest
|
||||
from geopandas.tests.util import (
|
||||
create_postgis,
|
||||
create_spatialite,
|
||||
mock,
|
||||
validate_boro_df,
|
||||
)
|
||||
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
@@ -26,31 +35,48 @@ except ImportError:
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_nybb():
|
||||
nybb_path = geopandas.datasets.get_path("nybb")
|
||||
df = read_file(nybb_path)
|
||||
def df_nybb(nybb_filename):
|
||||
df = read_file(nybb_filename)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_postgis():
|
||||
def check_available_postgis_drivers() -> list[str]:
|
||||
"""Work out which of psycopg2 and psycopg are available.
|
||||
This prevents tests running if the relevant package isn't installed
|
||||
(rather than being skipped, as skips are treated as failures during postgis CI)
|
||||
"""
|
||||
Initiates a connection to a postGIS database that must already exist.
|
||||
See create_postgis for more information.
|
||||
"""
|
||||
psycopg2 = pytest.importorskip("psycopg2")
|
||||
from psycopg2 import OperationalError
|
||||
drivers = []
|
||||
if find_spec("psycopg"):
|
||||
drivers.append("psycopg")
|
||||
if find_spec("psycopg2"):
|
||||
drivers.append("psycopg2")
|
||||
return drivers
|
||||
|
||||
|
||||
POSTGIS_DRIVERS = check_available_postgis_drivers()
|
||||
|
||||
|
||||
def prepare_database_credentials() -> dict:
|
||||
"""Gather postgres connection credentials from environment variables."""
|
||||
return {
|
||||
"dbname": "test_geopandas",
|
||||
"user": os.environ.get("PGUSER"),
|
||||
"password": os.environ.get("PGPASSWORD"),
|
||||
"host": os.environ.get("PGHOST"),
|
||||
"port": os.environ.get("PGPORT"),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def connection_postgis(request):
|
||||
"""Create a postgres connection using either psycopg2 or psycopg.
|
||||
|
||||
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
|
||||
psycopg = pytest.importorskip(request.param)
|
||||
|
||||
dbname = "test_geopandas"
|
||||
user = os.environ.get("PGUSER")
|
||||
password = os.environ.get("PGPASSWORD")
|
||||
host = os.environ.get("PGHOST")
|
||||
port = os.environ.get("PGPORT")
|
||||
try:
|
||||
con = psycopg2.connect(
|
||||
dbname=dbname, user=user, password=password, host=host, port=port
|
||||
)
|
||||
except OperationalError:
|
||||
con = psycopg.connect(**prepare_database_credentials())
|
||||
except psycopg.OperationalError:
|
||||
pytest.skip("Cannot connect with postgresql database")
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
@@ -61,28 +87,25 @@ def connection_postgis():
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def engine_postgis():
|
||||
def engine_postgis(request):
|
||||
"""
|
||||
Initiates a connection engine to a postGIS database that must already exist.
|
||||
Initiate a sqlalchemy connection engine using either psycopg2 or psycopg.
|
||||
|
||||
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
|
||||
"""
|
||||
sqlalchemy = pytest.importorskip("sqlalchemy")
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
user = os.environ.get("PGUSER")
|
||||
password = os.environ.get("PGPASSWORD")
|
||||
host = os.environ.get("PGHOST")
|
||||
port = os.environ.get("PGPORT")
|
||||
dbname = "test_geopandas"
|
||||
|
||||
credentials = prepare_database_credentials()
|
||||
try:
|
||||
con = sqlalchemy.create_engine(
|
||||
URL.create(
|
||||
drivername="postgresql+psycopg2",
|
||||
username=user,
|
||||
database=dbname,
|
||||
password=password,
|
||||
host=host,
|
||||
port=port,
|
||||
drivername=f"postgresql+{request.param}",
|
||||
username=credentials["user"],
|
||||
database=credentials["dbname"],
|
||||
password=credentials["password"],
|
||||
host=credentials["host"],
|
||||
port=credentials["port"],
|
||||
)
|
||||
)
|
||||
con.connect()
|
||||
@@ -140,7 +163,7 @@ def drop_table_if_exists(conn_or_engine, table):
|
||||
|
||||
@pytest.fixture
|
||||
def df_mixed_single_and_multi():
|
||||
from shapely.geometry import Point, LineString, MultiLineString
|
||||
from shapely.geometry import LineString, MultiLineString, Point
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
@@ -157,7 +180,7 @@ def df_mixed_single_and_multi():
|
||||
|
||||
@pytest.fixture
|
||||
def df_geom_collection():
|
||||
from shapely.geometry import Point, LineString, Polygon, GeometryCollection
|
||||
from shapely.geometry import GeometryCollection, LineString, Point, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
@@ -188,7 +211,7 @@ def df_linear_ring():
|
||||
|
||||
@pytest.fixture
|
||||
def df_3D_geoms():
|
||||
from shapely.geometry import Point, LineString, Polygon
|
||||
from shapely.geometry import LineString, Point, Polygon
|
||||
|
||||
df = geopandas.GeoDataFrame(
|
||||
{
|
||||
@@ -204,6 +227,7 @@ def df_3D_geoms():
|
||||
|
||||
|
||||
class TestIO:
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_get_conn(self, engine_postgis):
|
||||
Connection = pytest.importorskip("sqlalchemy.engine.base").Connection
|
||||
|
||||
@@ -217,6 +241,7 @@ class TestIO:
|
||||
with get_conn(object()):
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
@@ -229,6 +254,7 @@ class TestIO:
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
@@ -239,6 +265,7 @@ class TestIO:
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_select_geom_as(self, connection_postgis, df_nybb):
|
||||
"""Tests that a SELECT {geom} AS {some_other_geom} works."""
|
||||
con = connection_postgis
|
||||
@@ -254,6 +281,7 @@ class TestIO:
|
||||
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_get_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that an SRID can be read from a geodatabase (GH #451)."""
|
||||
con = connection_postgis
|
||||
@@ -267,6 +295,7 @@ class TestIO:
|
||||
validate_boro_df(df)
|
||||
assert df.crs == crs
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_override_srid(self, connection_postgis, df_nybb):
|
||||
"""Tests that a user specified CRS overrides the geodatabase SRID."""
|
||||
con = connection_postgis
|
||||
@@ -279,6 +308,7 @@ class TestIO:
|
||||
validate_boro_df(df)
|
||||
assert df.crs == orig_crs
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_from_postgis_default(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
@@ -288,6 +318,7 @@ class TestIO:
|
||||
|
||||
validate_boro_df(df, case_sensitive=False)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_from_postgis_custom_geom_col(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
geom_col = "the_geom"
|
||||
@@ -323,6 +354,7 @@ class TestIO:
|
||||
df = read_postgis(sql, con, geom_col=geom_col)
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_postgis_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument"""
|
||||
chunksize = 2
|
||||
@@ -337,14 +369,7 @@ class TestIO:
|
||||
# by user; should not be set to 0, as from get_srid failure
|
||||
assert df.crs is None
|
||||
|
||||
def test_read_postgis_privacy(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.warns(FutureWarning):
|
||||
geopandas.io.sql.read_postgis(sql, con)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_default(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
engine = engine_postgis
|
||||
@@ -360,6 +385,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_uppercase_tablename(self, engine_postgis, df_nybb):
|
||||
"""Tests writing GeoDataFrame to PostGIS with uppercase tablename."""
|
||||
engine = engine_postgis
|
||||
@@ -375,6 +401,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_sqlalchemy_connection(self, engine_postgis, df_nybb):
|
||||
"""Tests that GeoDataFrame can be written to PostGIS with defaults."""
|
||||
with engine_postgis.begin() as con:
|
||||
@@ -390,6 +417,7 @@ class TestIO:
|
||||
df = read_postgis(sql, con, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_fail_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that uploading the same table raises error when: if_replace='fail'.
|
||||
@@ -409,6 +437,7 @@ class TestIO:
|
||||
else:
|
||||
raise e
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_replace_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that replacing a table is possible when: if_replace='replace'.
|
||||
@@ -426,6 +455,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_append_when_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that appending to existing table produces correct results when:
|
||||
@@ -445,15 +475,18 @@ class TestIO:
|
||||
|
||||
# There should be twice as many rows in the new table
|
||||
assert new_rows == orig_rows * 2, (
|
||||
"There should be {target} rows,"
|
||||
"found: {current}".format(target=orig_rows * 2, current=new_rows),
|
||||
"There should be {target} rows,found: {current}".format(
|
||||
target=orig_rows * 2, current=new_rows
|
||||
),
|
||||
)
|
||||
# Number of columns should stay the same
|
||||
assert new_cols == orig_cols, (
|
||||
"There should be {target} columns,"
|
||||
"found: {current}".format(target=orig_cols, current=new_cols),
|
||||
"There should be {target} columns,found: {current}".format(
|
||||
target=orig_cols, current=new_cols
|
||||
),
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_without_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS without CRS information.
|
||||
@@ -463,8 +496,7 @@ class TestIO:
|
||||
table = "nybb"
|
||||
|
||||
# Write to db
|
||||
df_nybb = df_nybb
|
||||
df_nybb.crs = None
|
||||
df_nybb.geometry.array.crs = None
|
||||
with pytest.warns(UserWarning, match="Could not parse CRS from the GeoDataF"):
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# Validate that srid is -1
|
||||
@@ -477,6 +509,7 @@ class TestIO:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 0, "SRID should be 0, found %s" % target_srid
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that GeoDataFrame can be written to PostGIS with ESRI Authority
|
||||
@@ -499,6 +532,7 @@ class TestIO:
|
||||
target_srid = conn.execute(sql).fetchone()[0]
|
||||
assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_geometry_collection(
|
||||
self, engine_postgis, df_geom_collection
|
||||
):
|
||||
@@ -525,6 +559,7 @@ class TestIO:
|
||||
assert geom_type.upper() == "GEOMETRYCOLLECTION"
|
||||
assert df.geom_type.unique()[0] == "GeometryCollection"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_mixed_geometry_types(
|
||||
self, engine_postgis, df_mixed_single_and_multi
|
||||
):
|
||||
@@ -551,6 +586,7 @@ class TestIO:
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_linear_ring(self, engine_postgis, df_linear_ring):
|
||||
"""
|
||||
Tests that writing a LinearRing.
|
||||
@@ -572,6 +608,7 @@ class TestIO:
|
||||
|
||||
assert geom_type.upper() == "LINESTRING"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_in_chunks(self, engine_postgis, df_mixed_single_and_multi):
|
||||
"""
|
||||
Tests writing a LinearRing works.
|
||||
@@ -605,6 +642,7 @@ class TestIO:
|
||||
assert res[1][0].upper() == "MULTILINESTRING"
|
||||
assert res[2][0].upper() == "POINT"
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_to_different_schema(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests writing data to alternative schema.
|
||||
@@ -628,6 +666,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_to_different_schema_when_table_exists(
|
||||
self, engine_postgis, df_nybb
|
||||
):
|
||||
@@ -672,6 +711,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_write_postgis_3D_geometries(self, engine_postgis, df_3D_geoms):
|
||||
"""
|
||||
Tests writing a geometries with 3 dimensions works.
|
||||
@@ -687,6 +727,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert list(df.geometry.has_z) == [True, True, True]
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_row_order(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the row order in db table follows the order of the original frame.
|
||||
@@ -703,6 +744,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
assert df["BoroCode"].tolist() == correct_order
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_before_table_exists(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that insert works with if_exists='append' when table does not exist yet.
|
||||
@@ -720,6 +762,7 @@ class TestIO:
|
||||
df = read_postgis(sql, engine, geom_col="geometry")
|
||||
validate_boro_df(df)
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_with_different_crs(self, engine_postgis, df_nybb):
|
||||
"""
|
||||
Tests that the warning is raised if table CRS differs from frame.
|
||||
@@ -736,9 +779,26 @@ class TestIO:
|
||||
with pytest.raises(ValueError, match="CRS of the target table"):
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_append_without_crs(self, engine_postgis, df_nybb):
|
||||
# This test was included in #3328 when the default value for no
|
||||
# CRS was changed from an SRID of -1 to 0. This resolves issues
|
||||
# of appending dataframes to postgis that have no CRS as postgis
|
||||
# no CRS value is 0.
|
||||
engine = engine_postgis
|
||||
df_nybb = df_nybb.set_crs(None, allow_override=True)
|
||||
table = "nybb"
|
||||
|
||||
write_postgis(df_nybb, con=engine, name=table, if_exists="replace")
|
||||
# append another dataframe with no crs
|
||||
|
||||
df_nybb2 = df_nybb
|
||||
write_postgis(df_nybb2, con=engine, name=table, if_exists="append")
|
||||
|
||||
@pytest.mark.parametrize("engine_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
@pytest.mark.xfail(
|
||||
compat.PANDAS_GE_20 and not compat.PANDAS_GE_21,
|
||||
reason="Duplicate columns are dropped in read_sql with pandas 2.0.x",
|
||||
compat.PANDAS_GE_20 and not compat.PANDAS_GE_202,
|
||||
reason="Duplicate columns are dropped in read_sql with pandas 2.0.0 and 2.0.1",
|
||||
)
|
||||
def test_duplicate_geometry_column_fails(self, engine_postgis):
|
||||
"""
|
||||
@@ -750,3 +810,69 @@ class TestIO:
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
read_postgis(sql, engine, geom_col="geom")
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_non_epsg_crs(self, connection_postgis, df_nybb):
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="esri:54052")
|
||||
create_postgis(con, df_nybb, srid=54052)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = read_postgis(sql, con)
|
||||
validate_boro_df(df)
|
||||
assert df.crs == "ESRI:54052"
|
||||
|
||||
@pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj not installed")
|
||||
@mock.patch("shapely.get_srid")
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_srid_not_in_table(self, mock_get_srid, connection_postgis, df_nybb):
|
||||
# mock a non-existent srid for edge case if shapely has an srid
|
||||
# not present in postgis table.
|
||||
pyproj = pytest.importorskip("pyproj")
|
||||
|
||||
mock_get_srid.return_value = 99999
|
||||
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="epsg:4326")
|
||||
create_postgis(con, df_nybb)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.raises(pyproj.exceptions.CRSError, match="crs not found"):
|
||||
with pytest.warns(UserWarning, match="Could not find srid 99999"):
|
||||
read_postgis(sql, con)
|
||||
|
||||
@mock.patch("geopandas.io.sql._get_spatial_ref_sys_df")
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_no_spatial_ref_sys_table_in_postgis(
|
||||
self, mock_get_spatial_ref_sys_df, connection_postgis, df_nybb
|
||||
):
|
||||
# mock for a non-existent spatial_ref_sys database
|
||||
|
||||
mock_get_spatial_ref_sys_df.side_effect = pd.errors.DatabaseError
|
||||
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="epsg:4326")
|
||||
create_postgis(con, df_nybb, srid=4326)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
with pytest.warns(
|
||||
UserWarning, match="Could not find the spatial reference system table"
|
||||
):
|
||||
df = read_postgis(sql, con)
|
||||
|
||||
assert df.crs == "EPSG:4326"
|
||||
|
||||
@pytest.mark.parametrize("connection_postgis", POSTGIS_DRIVERS, indirect=True)
|
||||
def test_read_non_epsg_crs_chunksize(self, connection_postgis, df_nybb):
|
||||
"""Test chunksize argument with non epsg crs"""
|
||||
chunksize = 2
|
||||
con = connection_postgis
|
||||
df_nybb = df_nybb.to_crs(crs="esri:54052")
|
||||
|
||||
create_postgis(con, df_nybb, srid=54052)
|
||||
|
||||
sql = "SELECT * FROM nybb;"
|
||||
df = pd.concat(read_postgis(sql, con, chunksize=chunksize))
|
||||
|
||||
validate_boro_df(df)
|
||||
assert df.crs == "ESRI:54052"
|
||||
|
||||
Reference in New Issue
Block a user