This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -1,16 +1,26 @@
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile, ZIP_DEFLATED
from zipfile import ZIP_DEFLATED, ZipFile
import pytest
import numpy as np
from pyogrio import (
__gdal_version_string__,
__version__,
list_drivers,
)
from pyogrio._compat import HAS_ARROW_API, HAS_GDAL_GEOS, HAS_SHAPELY
from pyogrio._compat import (
HAS_ARROW_API,
HAS_ARROW_WRITE_API,
HAS_GDAL_GEOS,
HAS_PYARROW,
HAS_PYPROJ,
HAS_SHAPELY,
)
from pyogrio.core import vsi_rmtree
from pyogrio.raw import read, write
import pytest
_data_dir = Path(__file__).parent.resolve() / "fixtures"
@@ -29,6 +39,15 @@ DRIVER_EXT = {driver: ext for ext, driver in DRIVERS.items()}
ALL_EXTS = [".fgb", ".geojson", ".geojsonl", ".gpkg", ".shp"]
START_FID = {
".fgb": 0,
".geojson": 0,
".geojsonl": 0,
".geojsons": 0,
".gpkg": 1,
".shp": 0,
}
def pytest_report_header(config):
drivers = ", ".join(
@@ -43,8 +62,16 @@ def pytest_report_header(config):
# marks to skip tests if optional dependecies are not present
requires_arrow_api = pytest.mark.skipif(
not HAS_ARROW_API, reason="GDAL>=3.6 and pyarrow required"
requires_arrow_api = pytest.mark.skipif(not HAS_ARROW_API, reason="GDAL>=3.6 required")
requires_pyarrow_api = pytest.mark.skipif(
not HAS_ARROW_API or not HAS_PYARROW, reason="GDAL>=3.6 and pyarrow required"
)
requires_pyproj = pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj required")
requires_arrow_write_api = pytest.mark.skipif(
not HAS_ARROW_WRITE_API or not HAS_PYARROW,
reason="GDAL>=3.8 required for Arrow write API",
)
requires_gdal_geos = pytest.mark.skipif(
@@ -99,20 +126,51 @@ def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request):
@pytest.fixture(scope="function")
def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres):
"""Wrap naturalearth_lowres as a zip file for vsi tests"""
"""Wrap naturalearth_lowres as a zip file for VSI tests"""
path = tmp_path / f"{naturalearth_lowres.name}.zip"
with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
for ext in ["dbf", "prj", "shp", "shx"]:
for ext in ["dbf", "prj", "shp", "shx", "cpg"]:
filename = f"{naturalearth_lowres.stem}.{ext}"
out.write(naturalearth_lowres.parent / filename, filename)
return path, f"/vsizip/{path}/{naturalearth_lowres.name}"
@pytest.fixture(scope="function")
def naturalearth_lowres_vsimem(naturalearth_lowres):
"""Write naturalearth_lowres to a vsimem file for VSI tests"""
meta, _, geometry, field_data = read(naturalearth_lowres)
name = f"pyogrio_fixture_{naturalearth_lowres.stem}"
dst_path = Path(f"/vsimem/{name}/{name}.gpkg")
meta["spatial_index"] = False
meta["geometry_type"] = "MultiPolygon"
write(dst_path, geometry, field_data, layer="naturalearth_lowres", **meta)
yield dst_path
vsi_rmtree(dst_path.parent)
@pytest.fixture(scope="session")
def test_fgdb_vsi():
return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip"
def line_zm_file():
return _data_dir / "line_zm.gpkg"
@pytest.fixture(scope="session")
def curve_file():
return _data_dir / "curve.gpkg"
@pytest.fixture(scope="session")
def curve_polygon_file():
return _data_dir / "curvepolygon.gpkg"
@pytest.fixture(scope="session")
def multisurface_file():
return _data_dir / "multisurface.gpkg"
@pytest.fixture(scope="session")
@@ -120,16 +178,221 @@ def test_gpkg_nulls():
return _data_dir / "test_gpkg_nulls.gpkg"
@pytest.fixture(scope="session")
def test_ogr_types_list():
return _data_dir / "test_ogr_types_list.geojson"
@pytest.fixture(scope="function")
def no_geometry_file(tmp_path):
# create a GPKG layer that does not include geometry
filename = tmp_path / "test_no_geometry.gpkg"
write(
filename,
layer="no_geometry",
geometry=None,
field_data=[np.array(["a", "b", "c"])],
fields=["col"],
)
return filename
@pytest.fixture(scope="session")
def test_datetime():
return _data_dir / "test_datetime.geojson"
@pytest.fixture(scope="function")
def list_field_values_file(tmp_path):
# Create a GeoJSON file with list values in a property
list_geojson = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": { "int64": 1, "list_int64": [0, 1] },
"geometry": { "type": "Point", "coordinates": [0, 2] }
},
{
"type": "Feature",
"properties": { "int64": 2, "list_int64": [2, 3] },
"geometry": { "type": "Point", "coordinates": [1, 2] }
},
{
"type": "Feature",
"properties": { "int64": 3, "list_int64": [4, 5] },
"geometry": { "type": "Point", "coordinates": [2, 2] }
},
{
"type": "Feature",
"properties": { "int64": 4, "list_int64": [6, 7] },
"geometry": { "type": "Point", "coordinates": [3, 2] }
},
{
"type": "Feature",
"properties": { "int64": 5, "list_int64": [8, 9] },
"geometry": { "type": "Point", "coordinates": [4, 2] }
}
]
}"""
filename = tmp_path / "test_ogr_types_list.geojson"
with open(filename, "w") as f:
_ = f.write(list_geojson)
return filename
@pytest.fixture(scope="session")
def test_datetime_tz():
return _data_dir / "test_datetime_tz.geojson"
@pytest.fixture(scope="function")
def nested_geojson_file(tmp_path):
# create GeoJSON file with nested properties
nested_geojson = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [0, 0]
},
"properties": {
"top_level": "A",
"intermediate_level": {
"bottom_level": "B"
}
}
}
]
}"""
filename = tmp_path / "test_nested.geojson"
with open(filename, "w") as f:
_ = f.write(nested_geojson)
return filename
@pytest.fixture(scope="function")
def datetime_file(tmp_path):
# create GeoJSON file with millisecond precision
datetime_geojson = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": { "col": "2020-01-01T09:00:00.123" },
"geometry": { "type": "Point", "coordinates": [1, 1] }
},
{
"type": "Feature",
"properties": { "col": "2020-01-01T10:00:00" },
"geometry": { "type": "Point", "coordinates": [2, 2] }
}
]
}"""
filename = tmp_path / "test_datetime.geojson"
with open(filename, "w") as f:
_ = f.write(datetime_geojson)
return filename
@pytest.fixture(scope="function")
def datetime_tz_file(tmp_path):
# create GeoJSON file with datetimes with timezone
datetime_tz_geojson = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" },
"geometry": { "type": "Point", "coordinates": [1, 1] }
},
{
"type": "Feature",
"properties": { "datetime_col": "2020-01-01T10:00:00-05:00" },
"geometry": { "type": "Point", "coordinates": [2, 2] }
}
]
}"""
filename = tmp_path / "test_datetime_tz.geojson"
with open(filename, "w") as f:
f.write(datetime_tz_geojson)
return filename
@pytest.fixture(scope="function")
def geojson_bytes(tmp_path):
"""Extracts first 3 records from naturalearth_lowres and writes to GeoJSON,
returning bytes"""
meta, _, geometry, field_data = read(
_data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp"), max_features=3
)
filename = tmp_path / "test.geojson"
write(filename, geometry, field_data, **meta)
with open(filename, "rb") as f:
bytes_buffer = f.read()
return bytes_buffer
@pytest.fixture(scope="function")
def geojson_filelike(tmp_path):
"""Extracts first 3 records from naturalearth_lowres and writes to GeoJSON,
returning open file handle"""
meta, _, geometry, field_data = read(
_data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp"), max_features=3
)
filename = tmp_path / "test.geojson"
write(filename, geometry, field_data, layer="test", **meta)
with open(filename, "rb") as f:
yield f
@pytest.fixture(scope="function")
def nonseekable_bytes(tmp_path):
# mock a non-seekable byte stream, such as a zstandard handle
class NonSeekableBytesIO(BytesIO):
def seekable(self):
return False
def seek(self, *args, **kwargs):
raise OSError("cannot seek")
# wrap GeoJSON into a non-seekable BytesIO
geojson = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": { },
"geometry": { "type": "Point", "coordinates": [1, 1] }
}
]
}"""
return NonSeekableBytesIO(geojson.encode("UTF-8"))
@pytest.fixture(
scope="session",
params=[
# Japanese
("CP932", ""),
# Chinese
("CP936", "中文"),
# Central European
("CP1250", "Đ"),
# Latin 1 / Western European
("CP1252", "ÿ"),
# Greek
("CP1253", "Φ"),
# Arabic
("CP1256", "ش"),
],
)
def encoded_text(request):
"""Return tuple with encoding name and very short sample text in that encoding
NOTE: it was determined through testing that code pages for MS-DOS do not
consistently work across all Python installations (in particular, fail with conda),
but ANSI code pages appear to work properly.
"""
return request.param

View File

@@ -1,13 +1,28 @@
# Test datasets
## Natural Earth lowres
## Obtaining / creating test datasets
If a test dataset can be created in code, do that instead. If it is used in a
single test, create the test dataset as part of that test. If it is used in
more than a single test, add it to `pyogrio/tests/conftest.py` instead, as a
function-scoped test fixture.
If you need to obtain 3rd party test files:
- add a section below that describes the source location and processing steps
to derive that dataset
- make sure the license is compatible with including in Pyogrio (public domain or open-source)
and record that license below
Please keep the test files no larger than necessary to use in tests.
## Included test datasets
### Natural Earth lowres
`naturalearth_lowres.shp` was copied from GeoPandas.
## FGDB test dataset
`test_fgdb.gdb.zip`
Downloaded from http://trac.osgeo.org/gdal/raw-attachment/wiki/FileGDB/test_fgdb.gdb.zip
License: public domain
### GPKG test dataset with null values
@@ -75,15 +90,19 @@ NOTE: Reading boolean values into GeoPandas using Fiona backend treats those
values as `None` and column dtype as `object`; Pyogrio treats those values as
`np.nan` and column dtype as `float64`.
### GPKG test with MultiSurface
This was extracted from https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_0308_HU4_GDB.zip
`NHDWaterbody` layer using ogr2ogr:
```bash
ogr2ogr test_mixed_surface.gpkg NHDPLUS_H_0308_HU4_GDB.gdb NHDWaterbody -where '"NHDPlusID" = 15000300070477' -select "NHDPlusID"
```
License: same as Pyogrio
### OSM PBF test
This was downloaded from https://github.com/openstreetmap/OSM-binary/blob/master/resources/sample.pbf
License: [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/)
### Test files for geometry types that are downgraded on read
`line_zm.gpkg` was created using QGIS to digitize a LineString GPKG layer with Z and M enabled. Downgraded to LineString Z on read.
`curve.gpkg` was created using QGIS to digitize a Curve GPKG layer. Downgraded to LineString on read.
`curvepolygon.gpkg` was created using QGIS to digitize a CurvePolygon GPKG layer. Downgraded to Polygon on read.
`multisurface.gpkg` was created using QGIS to digitize a MultiSurface GPKG layer. Downgraded to MultiPolygon on read.
License: same as Pyogrio

View File

@@ -1,7 +0,0 @@
{
"type": "FeatureCollection",
"features": [
{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
]
}

View File

@@ -1,8 +0,0 @@
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
]
}

View File

@@ -1,18 +0,0 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [0, 0]
},
"properties": {
"top_level": "A",
"intermediate_level": {
"bottom_level": "B"
}
}
}
]
}

View File

@@ -1,12 +0,0 @@
{
"type": "FeatureCollection",
"name": "test",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "int64": 1, "list_int64": [ 0, 1 ] }, "geometry": { "type": "Point", "coordinates": [ 0.0, 2.0 ] } },
{ "type": "Feature", "properties": { "int64": 2, "list_int64": [ 2, 3 ] }, "geometry": { "type": "Point", "coordinates": [ 1.0, 2.0 ] } },
{ "type": "Feature", "properties": { "int64": 3, "list_int64": [ 4, 5 ] }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } },
{ "type": "Feature", "properties": { "int64": 4, "list_int64": [ 6, 7 ] }, "geometry": { "type": "Point", "coordinates": [ 3.0, 2.0 ] } },
{ "type": "Feature", "properties": { "int64": 5, "list_int64": [ 8, 9 ] }, "geometry": { "type": "Point", "coordinates": [ 4.0, 2.0 ] } }
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,28 +1,35 @@
from pathlib import Path
import numpy as np
from numpy import array_equal, allclose
import pytest
from numpy import allclose, array_equal
from pyogrio import (
__gdal_version__,
__gdal_geos_version__,
__gdal_version__,
detect_write_driver,
get_gdal_config_option,
get_gdal_data_path,
list_drivers,
list_layers,
read_bounds,
read_info,
set_gdal_config_options,
get_gdal_config_option,
get_gdal_data_path,
vsi_listtree,
vsi_rmtree,
vsi_unlink,
)
from pyogrio.core import detect_write_driver
from pyogrio.errors import DataSourceError, DataLayerError
from pyogrio.tests.conftest import HAS_SHAPELY, prepare_testfile
from pyogrio._compat import GDAL_GE_38
from pyogrio._env import GDALEnv
from pyogrio.errors import DataLayerError, DataSourceError
from pyogrio.raw import read, write
from pyogrio.tests.conftest import START_FID, prepare_testfile, requires_shapely
import pytest
with GDALEnv():
# NOTE: this must be AFTER above imports, which init the GDAL and PROJ data
# search paths
from pyogrio._ogr import ogr_driver_supports_write, has_gdal_data, has_proj_data
from pyogrio._ogr import has_gdal_data, has_proj_data, ogr_driver_supports_write
try:
@@ -150,7 +157,16 @@ def test_list_drivers():
assert len(drivers) == len(expected)
def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi):
def test_list_layers(
naturalearth_lowres,
naturalearth_lowres_vsi,
naturalearth_lowres_vsimem,
line_zm_file,
curve_file,
curve_polygon_file,
multisurface_file,
no_geometry_file,
):
assert array_equal(
list_layers(naturalearth_lowres), [["naturalearth_lowres", "Polygon"]]
)
@@ -159,38 +175,98 @@ def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi
list_layers(naturalearth_lowres_vsi[1]), [["naturalearth_lowres", "Polygon"]]
)
assert array_equal(
list_layers(naturalearth_lowres_vsimem),
[["naturalearth_lowres", "MultiPolygon"]],
)
# Measured 3D is downgraded to plain 3D during read
# Make sure this warning is raised
with pytest.warns(
UserWarning, match=r"Measured \(M\) geometry types are not supported"
):
fgdb_layers = list_layers(test_fgdb_vsi)
# GDAL >= 3.4.0 includes 'another_relationship' layer
assert len(fgdb_layers) >= 7
assert array_equal(list_layers(line_zm_file), [["line_zm", "LineString Z"]])
# Make sure that nonspatial layer has None for geometry
assert array_equal(fgdb_layers[0], ["basetable_2", None])
# Curve / surface types are downgraded to plain types
assert array_equal(list_layers(curve_file), [["curve", "LineString"]])
assert array_equal(list_layers(curve_polygon_file), [["curvepolygon", "Polygon"]])
assert array_equal(
list_layers(multisurface_file), [["multisurface", "MultiPolygon"]]
)
# Confirm that measured 3D is downgraded to plain 3D during read
assert array_equal(fgdb_layers[3], ["test_lines", "MultiLineString Z"])
assert array_equal(fgdb_layers[6], ["test_areas", "MultiPolygon Z"])
# Make sure that nonspatial layer has None for geometry
assert array_equal(list_layers(no_geometry_file), [["no_geometry", None]])
def test_read_bounds(naturalearth_lowres):
fids, bounds = read_bounds(naturalearth_lowres)
def test_list_layers_bytes(geojson_bytes):
layers = list_layers(geojson_bytes)
assert layers.shape == (1, 2)
assert layers[0, 0] == "test"
def test_list_layers_nonseekable_bytes(nonseekable_bytes):
layers = list_layers(nonseekable_bytes)
assert layers.shape == (1, 2)
assert layers[0, 1] == "Point"
def test_list_layers_filelike(geojson_filelike):
layers = list_layers(geojson_filelike)
assert layers.shape == (1, 2)
assert layers[0, 0] == "test"
@pytest.mark.parametrize(
"testfile",
["naturalearth_lowres", "naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"],
)
def test_read_bounds(testfile, request):
path = request.getfixturevalue(testfile)
path = path if not isinstance(path, tuple) else path[1]
fids, bounds = read_bounds(path)
assert fids.shape == (177,)
assert bounds.shape == (4, 177)
assert fids[0] == 0
assert fids[0] == START_FID[Path(path).suffix]
# Fiji; wraps antimeridian
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
def test_read_bounds_bytes(geojson_bytes):
fids, bounds = read_bounds(geojson_bytes)
assert fids.shape == (3,)
assert bounds.shape == (4, 3)
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
def test_read_bounds_nonseekable_bytes(nonseekable_bytes):
fids, bounds = read_bounds(nonseekable_bytes)
assert fids.shape == (1,)
assert bounds.shape == (4, 1)
assert allclose(bounds[:, 0], [1, 1, 1, 1])
def test_read_bounds_filelike(geojson_filelike):
fids, bounds = read_bounds(geojson_filelike)
assert fids.shape == (3,)
assert bounds.shape == (4, 3)
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
def test_read_bounds_max_features(naturalearth_lowres):
bounds = read_bounds(naturalearth_lowres, max_features=2)[1]
assert bounds.shape == (4, 2)
def test_read_bounds_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read_bounds(data_dir / "sample.osm.pbf")
def test_read_bounds_negative_max_features(naturalearth_lowres):
with pytest.raises(ValueError, match="'max_features' must be >= 0"):
read_bounds(naturalearth_lowres, max_features=-1)
@@ -240,12 +316,9 @@ def test_read_bounds_bbox(naturalearth_lowres_all_ext):
fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))
assert fids.shape == (2,)
if naturalearth_lowres_all_ext.suffix == ".gpkg":
# fid in gpkg is 1-based
assert array_equal(fids, [34, 35]) # PAN, CRI
else:
# fid in other formats is 0-based
assert array_equal(fids, [33, 34]) # PAN, CRI
fids_expected = np.array([33, 34]) # PAN, CRI
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
assert array_equal(fids, fids_expected)
assert bounds.shape == (4, 2)
assert allclose(
@@ -257,9 +330,7 @@ def test_read_bounds_bbox(naturalearth_lowres_all_ext):
)
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
@pytest.mark.parametrize(
"mask",
[
@@ -273,9 +344,7 @@ def test_read_bounds_mask_invalid(naturalearth_lowres, mask):
read_bounds(naturalearth_lowres, mask=mask)
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
def test_read_bounds_bbox_mask_invalid(naturalearth_lowres):
with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
read_bounds(
@@ -283,9 +352,7 @@ def test_read_bounds_bbox_mask_invalid(naturalearth_lowres):
)
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
@pytest.mark.parametrize(
"mask,expected",
[
@@ -316,12 +383,8 @@ def test_read_bounds_mask(naturalearth_lowres_all_ext, mask, expected):
fids = read_bounds(naturalearth_lowres_all_ext, mask=mask)[0]
if naturalearth_lowres_all_ext.suffix == ".gpkg":
# fid in gpkg is 1-based
assert array_equal(fids, np.array(expected) + 1)
else:
# fid in other formats is 0-based
assert array_equal(fids, expected)
fids_expected = np.array(expected) + START_FID[naturalearth_lowres_all_ext.suffix]
assert array_equal(fids, fids_expected)
@pytest.mark.skipif(
@@ -337,40 +400,87 @@ def test_read_bounds_bbox_intersects_vs_envelope_overlaps(naturalearth_lowres_al
if __gdal_geos_version__ is None:
# bboxes for CAN, RUS overlap but do not intersect geometries
assert fids.shape == (4,)
if naturalearth_lowres_all_ext.suffix == ".gpkg":
# fid in gpkg is 1-based
assert array_equal(fids, [4, 5, 19, 28]) # CAN, USA, RUS, MEX
else:
# fid in other formats is 0-based
assert array_equal(fids, [3, 4, 18, 27]) # CAN, USA, RUS, MEX
fids_expected = np.array([3, 4, 18, 27]) # CAN, USA, RUS, MEX
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
assert array_equal(fids, fids_expected)
else:
assert fids.shape == (2,)
if naturalearth_lowres_all_ext.suffix == ".gpkg":
# fid in gpkg is 1-based
assert array_equal(fids, [5, 28]) # USA, MEX
else:
# fid in other formats is 0-based
assert array_equal(fids, [4, 27]) # USA, MEX
fids_expected = np.array([4, 27]) # USA, MEX
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
assert array_equal(fids, fids_expected)
@pytest.mark.parametrize("naturalearth_lowres", [".shp", ".gpkg"], indirect=True)
def test_read_info(naturalearth_lowres):
meta = read_info(naturalearth_lowres)
assert meta["layer_name"] == "naturalearth_lowres"
assert meta["crs"] == "EPSG:4326"
assert meta["geometry_type"] == "Polygon"
assert meta["encoding"] == "UTF-8"
assert meta["fields"].shape == (5,)
assert meta["dtypes"].tolist() == ["int64", "object", "object", "object", "float64"]
assert meta["features"] == 177
assert allclose(meta["total_bounds"], (-180, -90, 180, 83.64513))
assert meta["driver"] == "ESRI Shapefile"
assert meta["capabilities"]["random_read"] is True
assert meta["capabilities"]["fast_set_next_by_index"] is True
assert meta["capabilities"]["fast_spatial_filter"] is False
assert meta["capabilities"]["fast_feature_count"] is True
assert meta["capabilities"]["fast_total_bounds"] is True
if naturalearth_lowres.suffix == ".gpkg":
assert meta["fid_column"] == "fid"
assert meta["geometry_name"] == "geom"
assert meta["geometry_type"] == "MultiPolygon"
assert meta["driver"] == "GPKG"
if GDAL_GE_38:
# this capability is only True for GPKG if GDAL >= 3.8
assert meta["capabilities"]["fast_set_next_by_index"] is True
elif naturalearth_lowres.suffix == ".shp":
# fid_column == "" for formats where fid is not physically stored
assert meta["fid_column"] == ""
# geometry_name == "" for formats where geometry column name cannot be
# customized
assert meta["geometry_name"] == ""
assert meta["geometry_type"] == "Polygon"
assert meta["driver"] == "ESRI Shapefile"
assert meta["capabilities"]["fast_set_next_by_index"] is True
else:
raise ValueError(f"test not implemented for ext {naturalearth_lowres.suffix}")
@pytest.mark.parametrize(
"testfile", ["naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"]
)
def test_read_info_vsi(testfile, request):
path = request.getfixturevalue(testfile)
path = path if not isinstance(path, tuple) else path[1]
meta = read_info(path)
assert meta["fields"].shape == (5,)
assert meta["features"] == 177
def test_read_info_bytes(geojson_bytes):
meta = read_info(geojson_bytes)
assert meta["fields"].shape == (5,)
assert meta["features"] == 3
def test_read_info_nonseekable_bytes(nonseekable_bytes):
meta = read_info(nonseekable_bytes)
assert meta["fields"].shape == (0,)
assert meta["features"] == 1
def test_read_info_filelike(geojson_filelike):
meta = read_info(geojson_filelike)
assert meta["fields"].shape == (5,)
assert meta["features"] == 3
@pytest.mark.parametrize(
"dataset_kwargs,fields",
@@ -399,8 +509,8 @@ def test_read_info(naturalearth_lowres):
),
],
)
def test_read_info_dataset_kwargs(data_dir, dataset_kwargs, fields):
meta = read_info(data_dir / "test_nested.geojson", **dataset_kwargs)
def test_read_info_dataset_kwargs(nested_geojson_file, dataset_kwargs, fields):
meta = read_info(nested_geojson_file, **dataset_kwargs)
assert meta["fields"].tolist() == fields
@@ -440,10 +550,12 @@ def test_read_info_force_feature_count(data_dir, layer, force, expected):
[(True, (-180.0, -90.0, 180.0, 83.64513)), (False, None)],
)
def test_read_info_force_total_bounds(
tmpdir, naturalearth_lowres, force_total_bounds, expected_total_bounds
tmp_path, naturalearth_lowres, force_total_bounds, expected_total_bounds
):
# Geojson files don't hava a fast way to determine total_bounds
geojson_path = prepare_testfile(naturalearth_lowres, dst_dir=tmpdir, ext=".geojson")
geojson_path = prepare_testfile(
naturalearth_lowres, dst_dir=tmp_path, ext=".geojsonl"
)
info = read_info(geojson_path, force_total_bounds=force_total_bounds)
if expected_total_bounds is not None:
assert allclose(info["total_bounds"], expected_total_bounds)
@@ -451,8 +563,14 @@ def test_read_info_force_total_bounds(
assert info["total_bounds"] is None
def test_read_info_without_geometry(test_fgdb_vsi):
assert read_info(test_fgdb_vsi)["total_bounds"] is None
def test_read_info_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read_info(data_dir / "sample.osm.pbf")
def test_read_info_without_geometry(no_geometry_file):
assert read_info(no_geometry_file)["total_bounds"] is None
@pytest.mark.parametrize(
@@ -494,3 +612,67 @@ def test_error_handling_warning(capfd, naturalearth_lowres):
read_info(naturalearth_lowres, INVALID="YES")
assert capfd.readouterr().err == ""
def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres):
"""Test all basic functionalities of file handling in /vsimem/."""
# Prepare test data in /vsimem
meta, _, geometry, field_data = read(naturalearth_lowres)
meta["spatial_index"] = False
meta["geometry_type"] = "MultiPolygon"
test_file_path = Path("/vsimem/pyogrio_test_naturalearth_lowres.gpkg")
test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg")
write(test_file_path, geometry, field_data, **meta)
write(test_dir_path, geometry, field_data, **meta)
# Check if everything was created properly with listtree
files = vsi_listtree("/vsimem/")
assert test_file_path.as_posix() in files
assert test_dir_path.as_posix() in files
# Check listtree with pattern
files = vsi_listtree("/vsimem/", pattern="pyogrio_dir_test*.gpkg")
assert test_file_path.as_posix() not in files
assert test_dir_path.as_posix() in files
files = vsi_listtree("/vsimem/", pattern="pyogrio_test*.gpkg")
assert test_file_path.as_posix() in files
assert test_dir_path.as_posix() not in files
# Remove test_dir and its contents
vsi_rmtree(test_dir_path.parent)
files = vsi_listtree("/vsimem/")
assert test_file_path.as_posix() in files
assert test_dir_path.as_posix() not in files
# Remove test_file
vsi_unlink(test_file_path)
def test_vsimem_rmtree_error(naturalearth_lowres_vsimem):
with pytest.raises(NotADirectoryError, match="Path is not a directory"):
vsi_rmtree(naturalearth_lowres_vsimem)
with pytest.raises(FileNotFoundError, match="Path does not exist"):
vsi_rmtree("/vsimem/non-existent")
with pytest.raises(
OSError, match="path to in-memory file or directory is required"
):
vsi_rmtree("/vsimem")
with pytest.raises(
OSError, match="path to in-memory file or directory is required"
):
vsi_rmtree("/vsimem/")
# Verify that naturalearth_lowres_vsimem still exists.
assert naturalearth_lowres_vsimem.as_posix() in vsi_listtree("/vsimem")
def test_vsimem_unlink_error(naturalearth_lowres_vsimem):
with pytest.raises(IsADirectoryError, match="Path is a directory"):
vsi_unlink(naturalearth_lowres_vsimem.parent)
with pytest.raises(FileNotFoundError, match="Path does not exist"):
vsi_unlink("/vsimem/non-existent.gpkg")

View File

@@ -1,15 +1,17 @@
import os
import contextlib
from zipfile import ZipFile, ZIP_DEFLATED
import pytest
import os
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile
import pyogrio
import pyogrio.raw
from pyogrio.util import vsi_path
from pyogrio._compat import HAS_PYPROJ
from pyogrio.util import get_vsi_path_or_buffer, vsi_path
import pytest
try:
import geopandas # NOQA
import geopandas # noqa: F401
has_geopandas = True
except ImportError:
@@ -31,9 +33,11 @@ def change_cwd(path):
[
# local file paths that should be passed through as is
("data.gpkg", "data.gpkg"),
(Path("data.gpkg"), "data.gpkg"),
("/home/user/data.gpkg", "/home/user/data.gpkg"),
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
# cloud URIs
("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
("s3://testing/data.gpkg", "/vsis3/testing/data.gpkg"),
@@ -82,6 +86,8 @@ def change_cwd(path):
"s3://testing/test.zip!a/b/item.shp",
"/vsizip/vsis3/testing/test.zip/a/b/item.shp",
),
("/vsimem/data.gpkg", "/vsimem/data.gpkg"),
(Path("/vsimem/data.gpkg"), "/vsimem/data.gpkg"),
],
)
def test_vsi_path(path, expected):
@@ -236,6 +242,9 @@ def test_detect_zip_path(tmp_path, naturalearth_lowres):
path = tmp_path / "test.zip"
with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
for ext in ["dbf", "prj", "shp", "shx"]:
if not HAS_PYPROJ and ext == "prj":
continue
filename = f"test1.{ext}"
out.write(tmp_path / filename, filename)
@@ -265,7 +274,7 @@ def test_detect_zip_path(tmp_path, naturalearth_lowres):
@pytest.mark.network
def test_url():
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" # NOQA
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp"
result = pyogrio.raw.read(url)
assert len(result[2]) == 177
@@ -277,9 +286,10 @@ def test_url():
assert len(result[0]) == 177
@pytest.mark.network
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
def test_url_dataframe():
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" # NOQA
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp"
assert len(pyogrio.read_dataframe(url)) == 177
@@ -330,3 +340,25 @@ def test_uri_s3(aws_env_setup):
def test_uri_s3_dataframe(aws_env_setup):
df = pyogrio.read_dataframe("zip+s3://fiona-testing/coutwildrnp.zip")
assert len(df) == 67
@pytest.mark.parametrize(
"path, expected",
[
(Path("/tmp/test.gpkg"), str(Path("/tmp/test.gpkg"))),
(Path("/vsimem/test.gpkg"), "/vsimem/test.gpkg"),
],
)
def test_get_vsi_path_or_buffer_obj_to_string(path, expected):
"""Verify that get_vsi_path_or_buffer retains forward slashes in /vsimem paths.
The /vsimem paths should keep forward slashes for GDAL to recognize them as such.
However, on Windows systems, forward slashes are by default replaced by backslashes,
so this test verifies that this doesn't happen for /vsimem paths.
"""
assert get_vsi_path_or_buffer(path) == expected
def test_get_vsi_path_or_buffer_fixtures_to_string(tmp_path):
path = tmp_path / "test.gpkg"
assert get_vsi_path_or_buffer(path) == str(path)

View File

@@ -1,29 +1,36 @@
import contextlib
import ctypes
import json
import os
import sys
from io import BytesIO
from zipfile import ZipFile
import numpy as np
from numpy import array_equal
import pytest
import pyogrio
from pyogrio import (
list_layers,
__gdal_version__,
get_gdal_config_option,
list_drivers,
list_layers,
read_info,
set_gdal_config_options,
__gdal_version__,
)
from pyogrio._compat import HAS_SHAPELY
from pyogrio.raw import read, write
from pyogrio.errors import DataSourceError, DataLayerError, FeatureError
from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
from pyogrio.raw import open_arrow, read, write
from pyogrio.tests.conftest import (
DRIVERS,
DRIVER_EXT,
DRIVERS,
prepare_testfile,
requires_arrow_api,
requires_pyarrow_api,
requires_shapely,
)
import pytest
try:
import shapely
except ImportError:
@@ -79,6 +86,12 @@ def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
assert len(geometry) == len(fields[0])
def test_read_arrow_unspecified_layer_warning(data_dir):
"""Reading a multi-layer file without specifying a layer gives a warning."""
with pytest.warns(UserWarning, match="More than one layer found "):
read(data_dir / "sample.osm.pbf")
def test_read_invalid_layer(naturalearth_lowres):
with pytest.raises(DataLayerError, match="Layer 'invalid' could not be opened"):
read(naturalearth_lowres, layer="invalid")
@@ -106,6 +119,29 @@ def test_read_no_geometry(naturalearth_lowres):
assert geometry is None
@requires_shapely
def test_read_no_geometry__mask(naturalearth_lowres):
geometry, fields = read(
naturalearth_lowres,
read_geometry=False,
mask=shapely.Point(-105, 55),
)[2:]
assert np.array_equal(fields[3], ["CAN"])
assert geometry is None
def test_read_no_geometry__bbox(naturalearth_lowres):
geometry, fields = read(
naturalearth_lowres,
read_geometry=False,
bbox=(-109.0, 55.0, -109.0, 55.0),
)[2:]
assert np.array_equal(fields[3], ["CAN"])
assert geometry is None
def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres):
with pytest.raises(
ValueError,
@@ -245,9 +281,7 @@ def test_read_bbox_where(naturalearth_lowres_all_ext):
assert np.array_equal(fields[3], ["CAN"])
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
@pytest.mark.parametrize(
"mask",
[
@@ -261,17 +295,13 @@ def test_read_mask_invalid(naturalearth_lowres, mask):
read(naturalearth_lowres, mask=mask)
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
def test_read_bbox_mask_invalid(naturalearth_lowres):
with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
read(naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55))
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
@pytest.mark.parametrize(
"mask,expected",
[
@@ -306,9 +336,7 @@ def test_read_mask(naturalearth_lowres_all_ext, mask, expected):
assert len(geometry) == len(expected)
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
def test_read_mask_sql(naturalearth_lowres_all_ext):
fields = read(
naturalearth_lowres_all_ext,
@@ -319,9 +347,7 @@ def test_read_mask_sql(naturalearth_lowres_all_ext):
assert np.array_equal(fields[3], ["CAN"])
@pytest.mark.skipif(
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
)
@requires_shapely
def test_read_mask_where(naturalearth_lowres_all_ext):
fields = read(
naturalearth_lowres_all_ext,
@@ -414,35 +440,43 @@ def test_read_return_only_fids(naturalearth_lowres):
assert len(field_data) == 0
def test_write(tmpdir, naturalearth_lowres):
@pytest.mark.parametrize("encoding", [None, "ISO-8859-1"])
def test_write_shp(tmp_path, naturalearth_lowres, encoding):
meta, _, geometry, field_data = read(naturalearth_lowres)
filename = os.path.join(str(tmpdir), "test.shp")
filename = tmp_path / "test.shp"
meta["encoding"] = encoding
write(filename, geometry, field_data, **meta)
assert os.path.exists(filename)
assert filename.exists()
for ext in (".dbf", ".prj"):
assert os.path.exists(filename.replace(".shp", ext))
assert filename.with_suffix(ext).exists()
# We write shapefiles in UTF-8 by default on all platforms
expected_encoding = encoding if encoding is not None else "UTF-8"
with open(filename.with_suffix(".cpg")) as cpg_file:
result_encoding = cpg_file.read()
assert result_encoding == expected_encoding
def test_write_gpkg(tmpdir, naturalearth_lowres):
def test_write_gpkg(tmp_path, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "MultiPolygon"})
filename = os.path.join(str(tmpdir), "test.gpkg")
filename = tmp_path / "test.gpkg"
write(filename, geometry, field_data, driver="GPKG", **meta)
assert os.path.exists(filename)
assert filename.exists()
def test_write_gpkg_multiple_layers(tmpdir, naturalearth_lowres):
def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
meta["geometry_type"] = "MultiPolygon"
filename = os.path.join(str(tmpdir), "test.gpkg")
filename = tmp_path / "test.gpkg"
write(filename, geometry, field_data, driver="GPKG", layer="first", **meta)
assert os.path.exists(filename)
assert filename.exists()
assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
@@ -453,13 +487,13 @@ def test_write_gpkg_multiple_layers(tmpdir, naturalearth_lowres):
)
def test_write_geojson(tmpdir, naturalearth_lowres):
def test_write_geojson(tmp_path, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
filename = os.path.join(str(tmpdir), "test.json")
filename = tmp_path / "test.json"
write(filename, geometry, field_data, driver="GeoJSON", **meta)
assert os.path.exists(filename)
assert filename.exists()
data = json.loads(open(filename).read())
@@ -478,17 +512,21 @@ def test_write_no_fields(tmp_path, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
field_data = None
meta["fields"] = None
# naturalearth_lowres actually contains MultiPolygons. A shapefile doesn't make the
# distinction, so the metadata just reports Polygon. GPKG does, so override here to
# avoid GDAL warnings.
meta["geometry_type"] = "MultiPolygon"
# Test
filename = tmp_path / "test.gpkg"
write(filename, geometry, field_data, driver="GPKG", **meta)
# Check result
assert os.path.exists(filename)
assert filename.exists()
meta, _, geometry, fields = read(filename)
assert meta["crs"] == "EPSG:4326"
assert meta["geometry_type"] == "Polygon"
assert meta["geometry_type"] == "MultiPolygon"
assert meta["encoding"] == "UTF-8"
assert meta["fields"].shape == (0,)
assert len(fields) == 0
@@ -510,7 +548,7 @@ def test_write_no_geom(tmp_path, naturalearth_lowres):
write(filename, geometry, field_data, driver="GPKG", **meta)
# Check result
assert os.path.exists(filename)
assert filename.exists()
meta, _, geometry, fields = read(filename)
assert meta["crs"] is None
@@ -547,7 +585,7 @@ def test_write_no_geom_data(tmp_path, naturalearth_lowres):
write(filename, geometry, field_data, driver="GPKG", **meta)
# Check result
assert os.path.exists(filename)
assert filename.exists()
result_meta, _, result_geometry, result_field_data = read(filename)
assert result_meta["crs"] is None
@@ -581,17 +619,84 @@ def test_write_no_geom_no_fields():
__gdal_version__ < (3, 6, 0),
reason="OpenFileGDB write support only available for GDAL >= 3.6.0",
)
def test_write_openfilegdb(tmpdir, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
@pytest.mark.parametrize(
"write_int64",
[
False,
pytest.param(
True,
marks=pytest.mark.skipif(
__gdal_version__ < (3, 9, 0),
reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0",
),
),
],
)
def test_write_openfilegdb(tmp_path, write_int64):
# Point(0, 0)
expected_geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
)
expected_field_data = [
np.array([True, False, True], dtype="bool"),
np.array([1, 2, 3], dtype="int16"),
np.array([1, 2, 3], dtype="int32"),
np.array([1, 2, 3], dtype="int64"),
np.array([1, 2, 3], dtype="float32"),
np.array([1, 2, 3], dtype="float64"),
]
expected_fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
expected_meta = {
"geometry_type": "Point",
"crs": "EPSG:4326",
"fields": expected_fields,
}
filename = os.path.join(str(tmpdir), "test.gdb")
write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
filename = tmp_path / "test.gdb"
assert os.path.exists(filename)
# int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support
# it is converted to float64 by default and raises a warning
# (for GDAL >= 3.9.0 only)
write_params = (
{"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {}
)
if write_int64 or __gdal_version__ < (3, 9, 0):
ctx = contextlib.nullcontext()
else:
ctx = pytest.warns(
RuntimeWarning, match="Integer64 will be written as a Float64"
)
with ctx:
write(
filename,
expected_geometry,
expected_field_data,
driver="OpenFileGDB",
**expected_meta,
**write_params,
)
meta, _, geometry, field_data = read(filename)
if not write_int64:
expected_field_data[3] = expected_field_data[3].astype("float64")
# bool types are converted to int32
expected_field_data[0] = expected_field_data[0].astype("int32")
assert meta["crs"] == expected_meta["crs"]
assert np.array_equal(meta["fields"], expected_meta["fields"])
assert np.array_equal(geometry, expected_geometry)
for i in range(len(expected_field_data)):
assert field_data[i].dtype == expected_field_data[i].dtype
assert np.array_equal(field_data[i], expected_field_data[i])
@pytest.mark.parametrize("ext", DRIVERS)
def test_write_append(tmpdir, naturalearth_lowres, ext):
def test_write_append(tmp_path, naturalearth_lowres, ext):
if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
@@ -603,10 +708,10 @@ def test_write_append(tmpdir, naturalearth_lowres, ext):
# coerce output layer to MultiPolygon to avoid mixed type errors
meta["geometry_type"] = "MultiPolygon"
filename = os.path.join(str(tmpdir), f"test{ext}")
filename = tmp_path / f"test{ext}"
write(filename, geometry, field_data, **meta)
assert os.path.exists(filename)
assert filename.exists()
assert read_info(filename)["features"] == 177
@@ -617,17 +722,17 @@ def test_write_append(tmpdir, naturalearth_lowres, ext):
@pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
def test_write_append_unsupported(tmpdir, naturalearth_lowres, driver, ext):
def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
if ext == ".geojsons" and __gdal_version__ >= (3, 6, 0):
pytest.skip("Append to GeoJSONSeq supported for GDAL >= 3.6.0")
meta, _, geometry, field_data = read(naturalearth_lowres)
# GML does not support append functionality
filename = os.path.join(str(tmpdir), f"test{ext}")
filename = tmp_path / f"test{ext}"
write(filename, geometry, field_data, driver=driver, **meta)
assert os.path.exists(filename)
assert filename.exists()
assert read_info(filename, force_feature_count=True)["features"] == 177
@@ -639,16 +744,16 @@ def test_write_append_unsupported(tmpdir, naturalearth_lowres, driver, ext):
__gdal_version__ > (3, 5, 0),
reason="segfaults on FlatGeobuf limited to GDAL <= 3.5.0",
)
def test_write_append_prevent_gdal_segfault(tmpdir, naturalearth_lowres):
def test_write_append_prevent_gdal_segfault(tmp_path, naturalearth_lowres):
"""GDAL <= 3.5.0 segfaults when appending to FlatGeobuf; this test
verifies that we catch that before segfault"""
meta, _, geometry, field_data = read(naturalearth_lowres)
meta["geometry_type"] = "MultiPolygon"
filename = os.path.join(str(tmpdir), "test.fgb")
filename = tmp_path / "test.fgb"
write(filename, geometry, field_data, **meta)
assert os.path.exists(filename)
assert filename.exists()
with pytest.raises(
RuntimeError, # match="append to FlatGeobuf is not supported for GDAL <= 3.5.0"
@@ -664,7 +769,7 @@ def test_write_append_prevent_gdal_segfault(tmpdir, naturalearth_lowres):
if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
},
)
def test_write_supported(tmpdir, naturalearth_lowres, driver):
def test_write_supported(tmp_path, naturalearth_lowres, driver):
"""Test drivers known to work that are not specifically tested above"""
meta, _, geometry, field_data = read(naturalearth_lowres, columns=["iso_a3"])
@@ -673,7 +778,7 @@ def test_write_supported(tmpdir, naturalearth_lowres, driver):
# we take the first record only.
meta["geometry_type"] = "MultiPolygon"
filename = tmpdir / f"test{DRIVER_EXT[driver]}"
filename = tmp_path / f"test{DRIVER_EXT[driver]}"
write(
filename,
geometry[:1],
@@ -688,10 +793,10 @@ def test_write_supported(tmpdir, naturalearth_lowres, driver):
@pytest.mark.skipif(
__gdal_version__ >= (3, 6, 0), reason="OpenFileGDB supports write for GDAL >= 3.6.0"
)
def test_write_unsupported(tmpdir, naturalearth_lowres):
def test_write_unsupported(tmp_path, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
filename = os.path.join(str(tmpdir), "test.gdb")
filename = tmp_path / "test.gdb"
with pytest.raises(DataSourceError, match="does not support write functionality"):
write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
@@ -721,7 +826,7 @@ def assert_equal_result(result1, result2):
assert np.array_equal(meta1["fields"], meta2["fields"])
assert np.array_equal(index1, index2)
assert all([np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2)])
assert all(np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2))
if HAS_SHAPELY:
# a plain `assert np.array_equal(geometry1, geometry2)` doesn't work
@@ -734,10 +839,10 @@ def assert_equal_result(result1, result2):
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
@pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
def test_read_from_bytes(tmp_path, naturalearth_lowres, driver, ext):
meta, index, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "Unknown"})
filename = os.path.join(str(tmpdir), f"test.{ext}")
filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, driver=driver, **meta)
with open(filename, "rb") as f:
@@ -747,7 +852,7 @@ def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
assert_equal_result((meta, index, geometry, field_data), result2)
def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
def test_read_from_bytes_zipped(naturalearth_lowres_vsi):
path, vsi_path = naturalearth_lowres_vsi
meta, index, geometry, field_data = read(vsi_path)
@@ -760,10 +865,10 @@ def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
@pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
def test_read_from_file_like(tmp_path, naturalearth_lowres, driver, ext):
meta, index, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "Unknown"})
filename = os.path.join(str(tmpdir), f"test.{ext}")
filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, driver=driver, **meta)
with open(filename, "rb") as f:
@@ -772,6 +877,12 @@ def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
assert_equal_result((meta, index, geometry, field_data), result2)
def test_read_from_nonseekable_bytes(nonseekable_bytes):
meta, _, geometry, _ = read(nonseekable_bytes)
assert meta["fields"].shape == (0,)
assert len(geometry) == 1
@pytest.mark.parametrize("ext", ["gpkg", "fgb"])
def test_read_write_data_types_numeric(tmp_path, ext):
# Point(0, 0)
@@ -787,13 +898,13 @@ def test_read_write_data_types_numeric(tmp_path, ext):
np.array([1, 2, 3], dtype="float64"),
]
fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, fields, **meta)
result = read(filename)[3]
assert all([np.array_equal(f1, f2) for f1, f2 in zip(result, field_data)])
assert all([f1.dtype == f2.dtype for f1, f2 in zip(result, field_data)])
assert all(np.array_equal(f1, f2) for f1, f2 in zip(result, field_data))
assert all(f1.dtype == f2.dtype for f1, f2 in zip(result, field_data))
# other integer data types that don't roundtrip exactly
# these are generally promoted to a larger integer type except for uint64
@@ -844,7 +955,7 @@ def test_read_write_datetime(tmp_path):
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
)
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
filename = tmp_path / "test.gpkg"
write(filename, geometry, field_data, fields, **meta)
@@ -867,7 +978,7 @@ def test_read_write_int64_large(tmp_path, ext):
)
field_data = [np.array([1, 2192502720, -5], dtype="int64")]
fields = ["overflow_int64"]
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, fields, **meta)
@@ -890,17 +1001,17 @@ def test_read_data_types_numeric_with_null(test_gpkg_nulls):
assert field.dtype == "float64"
def test_read_unsupported_types(test_ogr_types_list):
fields = read(test_ogr_types_list)[3]
def test_read_unsupported_types(list_field_values_file):
fields = read(list_field_values_file)[3]
# list field gets skipped, only integer field is read
assert len(fields) == 1
fields = read(test_ogr_types_list, columns=["int64"])[3]
fields = read(list_field_values_file, columns=["int64"])[3]
assert len(fields) == 1
def test_read_datetime_millisecond(test_datetime):
field = read(test_datetime)[3][0]
def test_read_datetime_millisecond(datetime_file):
field = read(datetime_file)[3][0]
assert field.dtype == "datetime64[ms]"
assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
@@ -929,13 +1040,14 @@ def test_read_unsupported_ext_with_prefix(tmp_path):
assert field_data[0] == "data1"
def test_read_datetime_as_string(test_datetime_tz):
field = read(test_datetime_tz)[3][0]
def test_read_datetime_as_string(datetime_tz_file):
field = read(datetime_tz_file)[3][0]
assert field.dtype == "datetime64[ms]"
# timezone is ignored in numpy layer
assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
field = read(test_datetime_tz, datetime_as_string=True)[3][0]
field = read(datetime_tz_file, datetime_as_string=True)[3][0]
assert field.dtype == "object"
# GDAL doesn't return strings in ISO format (yet)
assert field[0] == "2020/01/01 09:00:00.123-05"
@@ -951,7 +1063,7 @@ def test_read_write_null_geometry(tmp_path, ext):
)
field_data = [np.array([1, 2], dtype="int32")]
fields = ["col"]
meta = dict(geometry_type="Point", crs="EPSG:4326")
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
if ext == "gpkg":
meta["spatial_index"] = False
@@ -971,12 +1083,12 @@ def test_write_float_nan_null(tmp_path, dtype):
)
field_data = [np.array([1.5, np.nan], dtype=dtype)]
fields = ["col"]
meta = dict(geometry_type="Point", crs="EPSG:4326")
fname = tmp_path / "test.geojson"
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
filename = tmp_path / "test.geojson"
# default nan_as_null=True
write(fname, geometry, field_data, fields, **meta)
with open(str(fname), "r") as f:
write(filename, geometry, field_data, fields, **meta)
with open(filename) as f:
content = f.read()
assert '{ "col": null }' in content
@@ -987,14 +1099,14 @@ def test_write_float_nan_null(tmp_path, dtype):
else:
ctx = contextlib.nullcontext()
with ctx:
write(fname, geometry, field_data, fields, **meta, nan_as_null=False)
with open(str(fname), "r") as f:
write(filename, geometry, field_data, fields, **meta, nan_as_null=False)
with open(filename) as f:
content = f.read()
assert '"properties": { }' in content
# but can instruct GDAL to write NaN to json
write(
fname,
filename,
geometry,
field_data,
fields,
@@ -1002,12 +1114,12 @@ def test_write_float_nan_null(tmp_path, dtype):
nan_as_null=False,
WRITE_NON_FINITE_VALUES="YES",
)
with open(str(fname), "r") as f:
with open(filename) as f:
content = f.read()
assert '{ "col": NaN }' in content
@requires_arrow_api
@requires_pyarrow_api
@pytest.mark.skipif(
"Arrow" not in list_drivers(), reason="Arrow driver is not available"
)
@@ -1021,7 +1133,7 @@ def test_write_float_nan_null_arrow(tmp_path):
)
field_data = [np.array([1.5, np.nan], dtype="float64")]
fields = ["col"]
meta = dict(geometry_type="Point", crs="EPSG:4326")
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
fname = tmp_path / "test.arrow"
# default nan_as_null=True
@@ -1039,6 +1151,112 @@ def test_write_float_nan_null_arrow(tmp_path):
assert pc.is_nan(table["col"]).to_pylist() == [False, True]
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
def test_write_memory(naturalearth_lowres, driver):
meta, _, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "MultiPolygon"})
buffer = BytesIO()
write(buffer, geometry, field_data, driver=driver, layer="test", **meta)
assert len(buffer.getbuffer()) > 0
assert list_layers(buffer)[0][0] == "test"
actual_meta, _, actual_geometry, actual_field_data = read(buffer)
assert np.array_equal(actual_meta["fields"], meta["fields"])
assert np.array_equal(actual_field_data, field_data)
assert len(actual_geometry) == len(geometry)
def test_write_memory_driver_required(naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
buffer = BytesIO()
with pytest.raises(
ValueError,
match="driver must be provided to write to in-memory file",
):
write(buffer, geometry, field_data, driver=None, layer="test", **meta)
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
meta, _, geometry, field_data = read(naturalearth_lowres)
buffer = BytesIO()
with pytest.raises(
ValueError, match=f"writing to in-memory file is not supported for {driver}"
):
write(
buffer,
geometry,
field_data,
driver=driver,
layer="test",
append=True,
**meta,
)
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
meta, _, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "MultiPolygon"})
buffer = BytesIO()
with pytest.raises(
NotImplementedError, match="append is not supported for in-memory files"
):
write(
buffer,
geometry,
field_data,
driver=driver,
layer="test",
append=True,
**meta,
)
def test_write_memory_existing_unsupported(naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
buffer = BytesIO(b"0000")
with pytest.raises(
NotImplementedError,
match="writing to existing in-memory object is not supported",
):
write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta)
def test_write_open_file_handle(tmp_path, naturalearth_lowres):
"""Verify that writing to an open file handle is not currently supported"""
meta, _, geometry, field_data = read(naturalearth_lowres)
# verify it fails for regular file handle
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with open(tmp_path / "test.geojson", "wb") as f:
write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
# verify it fails for ZipFile
with pytest.raises(
NotImplementedError, match="writing to an open file handle is not yet supported"
):
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
with z.open("test.geojson", "w") as f:
write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
@pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"])
@pytest.mark.parametrize(
"read_encoding,write_encoding",
@@ -1075,7 +1293,7 @@ def test_encoding_io(tmp_path, ext, read_encoding, write_encoding):
np.array([mandarin], dtype=object),
]
fields = [arabic, cree, mandarin]
meta = dict(geometry_type="Point", crs="EPSG:4326", encoding=write_encoding)
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": write_encoding}
filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, fields, **meta)
@@ -1125,7 +1343,7 @@ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
# character level) by GDAL when output to shapefile, so we have to truncate
# before writing
fields = [arabic[:5], cree[:3], mandarin]
meta = dict(geometry_type="Point", crs="EPSG:4326", encoding="UTF-8")
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": "UTF-8"}
filename = tmp_path / "test.shp"
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
@@ -1141,7 +1359,7 @@ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
# verify that if cpg file is not present, that user-provided encoding is used,
# otherwise it defaults to ISO-8859-1
if read_encoding is not None:
os.unlink(str(filename).replace(".shp", ".cpg"))
filename.with_suffix(".cpg").unlink()
actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
assert np.array_equal(fields, actual_meta["fields"])
assert np.array_equal(field_data, actual_field_data)
@@ -1150,6 +1368,97 @@ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
)
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
"""Verify that we write non-UTF data to the data source
IMPORTANT: this may not be valid for the data source and will likely render
them unusable in other tools, but should successfully roundtrip unless we
disable writing using other encodings.
NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
"""
encoding, text = encoded_text
# Point(0, 0)
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
)
field_data = [np.array([text], dtype=object)]
fields = [text]
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, fields, **meta)
# cannot open these files without specifying encoding
with pytest.raises(UnicodeDecodeError):
read(filename)
with pytest.raises(UnicodeDecodeError):
read_info(filename)
# must provide encoding to read these properly
actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
assert actual_meta["fields"][0] == text
assert actual_field_data[0] == text
assert read_info(filename, encoding=encoding)["fields"][0] == text
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
encoding, text = encoded_text
# Point(0, 0)
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
)
field_data = [np.array([text], dtype=object)]
fields = [text]
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
filename = tmp_path / "test.shp"
write(filename, geometry, field_data, fields, **meta)
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
# means that if we read this without specifying the encoding it uses the
# correct one
actual_meta, _, _, actual_field_data = read(filename)
assert actual_meta["fields"][0] == text
assert actual_field_data[0] == text
assert read_info(filename)["fields"][0] == text
# verify that if cpg file is not present, that user-provided encoding must be used
filename.with_suffix(".cpg").unlink()
# We will assume ISO-8859-1, which is wrong
miscoded = text.encode(encoding).decode("ISO-8859-1")
bad_meta, _, _, bad_field_data = read(filename)
assert bad_meta["fields"][0] == miscoded
assert bad_field_data[0] == miscoded
assert read_info(filename)["fields"][0] == miscoded
# If encoding is provided, that should yield correct text
actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
assert actual_meta["fields"][0] == text
assert actual_field_data[0] == text
assert read_info(filename, encoding=encoding)["fields"][0] == text
# verify that setting encoding does not corrupt SHAPE_ENCODING option if set
# globally (it is ignored during read when encoding is specified by user)
try:
set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
_ = read(filename, encoding=encoding)
assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
finally:
# reset to clear between tests
set_gdal_config_options({"SHAPE_ENCODING": None})
def test_write_with_mask(tmp_path):
# Point(0, 0), null
geometry = np.array(
@@ -1159,7 +1468,7 @@ def test_write_with_mask(tmp_path):
field_data = [np.array([1, 2, 3], dtype="int32")]
field_mask = [np.array([False, True, False])]
fields = ["col"]
meta = dict(geometry_type="Point", crs="EPSG:4326")
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
filename = tmp_path / "test.geojson"
write(filename, geometry, field_data, fields, field_mask, **meta)
@@ -1176,3 +1485,31 @@ def test_write_with_mask(tmp_path):
field_mask = [np.array([False, True, False])] * 2
with pytest.raises(ValueError):
write(filename, geometry, field_data, fields, field_mask, **meta)
@requires_arrow_api
def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
# this test is included here instead of test_arrow.py to ensure we also run
# it when pyarrow is not installed
with open_arrow(naturalearth_lowres) as (meta, reader):
assert isinstance(meta, dict)
assert isinstance(reader, pyogrio._io._ArrowStream)
capsule = reader.__arrow_c_stream__()
assert (
ctypes.pythonapi.PyCapsule_IsValid(
ctypes.py_object(capsule), b"arrow_array_stream"
)
== 1
)
@pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed")
@requires_arrow_api
def test_open_arrow_error_no_pyarrow(naturalearth_lowres):
# this test is included here instead of test_arrow.py to ensure we run
# it when pyarrow is not installed
with pytest.raises(ImportError):
with open_arrow(naturalearth_lowres, use_pyarrow=True) as _:
pass

View File

@@ -1,86 +0,0 @@
"""Run pytest tests manually on Windows due to import errors
"""
from pathlib import Path
import platform
from tempfile import TemporaryDirectory
data_dir = Path(__file__).parent.resolve() / "fixtures"
if platform.system() == "Windows":
naturalearth_lowres = data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
test_fgdb_vsi = f"/vsizip/{data_dir}/test_fgdb.gdb.zip"
from pyogrio.tests.test_core import test_read_info
try:
test_read_info(naturalearth_lowres)
except Exception as ex:
print(ex)
from pyogrio.tests.test_raw_io import (
test_read,
test_read_no_geometry,
test_read_columns,
test_read_skip_features,
test_read_max_features,
test_read_where,
test_read_where_invalid,
test_write,
test_write_gpkg,
test_write_geojson,
)
try:
test_read(naturalearth_lowres)
except Exception as ex:
print(ex)
try:
test_read_no_geometry(naturalearth_lowres)
except Exception as ex:
print(ex)
try:
test_read_columns(naturalearth_lowres)
except Exception as ex:
print(ex)
try:
test_read_skip_features(naturalearth_lowres)
except Exception as ex:
print(ex)
try:
test_read_max_features(naturalearth_lowres)
except Exception as ex:
print(ex)
try:
test_read_where(naturalearth_lowres)
except Exception as ex:
print(ex)
try:
test_read_where_invalid(naturalearth_lowres)
except Exception as ex:
print(ex)
with TemporaryDirectory() as tmpdir:
try:
test_write(tmpdir, naturalearth_lowres)
except Exception as ex:
print(ex)
with TemporaryDirectory() as tmpdir:
try:
test_write_gpkg(tmpdir, naturalearth_lowres)
except Exception as ex:
print(ex)
with TemporaryDirectory() as tmpdir:
try:
test_write_geojson(tmpdir, naturalearth_lowres)
except Exception as ex:
print(ex)