venv
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,16 +1,26 @@
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
from zipfile import ZIP_DEFLATED, ZipFile
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from pyogrio import (
|
||||
__gdal_version_string__,
|
||||
__version__,
|
||||
list_drivers,
|
||||
)
|
||||
from pyogrio._compat import HAS_ARROW_API, HAS_GDAL_GEOS, HAS_SHAPELY
|
||||
from pyogrio._compat import (
|
||||
HAS_ARROW_API,
|
||||
HAS_ARROW_WRITE_API,
|
||||
HAS_GDAL_GEOS,
|
||||
HAS_PYARROW,
|
||||
HAS_PYPROJ,
|
||||
HAS_SHAPELY,
|
||||
)
|
||||
from pyogrio.core import vsi_rmtree
|
||||
from pyogrio.raw import read, write
|
||||
|
||||
import pytest
|
||||
|
||||
_data_dir = Path(__file__).parent.resolve() / "fixtures"
|
||||
|
||||
@@ -29,6 +39,15 @@ DRIVER_EXT = {driver: ext for ext, driver in DRIVERS.items()}
|
||||
|
||||
ALL_EXTS = [".fgb", ".geojson", ".geojsonl", ".gpkg", ".shp"]
|
||||
|
||||
START_FID = {
|
||||
".fgb": 0,
|
||||
".geojson": 0,
|
||||
".geojsonl": 0,
|
||||
".geojsons": 0,
|
||||
".gpkg": 1,
|
||||
".shp": 0,
|
||||
}
|
||||
|
||||
|
||||
def pytest_report_header(config):
|
||||
drivers = ", ".join(
|
||||
@@ -43,8 +62,16 @@ def pytest_report_header(config):
|
||||
|
||||
|
||||
# marks to skip tests if optional dependecies are not present
|
||||
requires_arrow_api = pytest.mark.skipif(
|
||||
not HAS_ARROW_API, reason="GDAL>=3.6 and pyarrow required"
|
||||
requires_arrow_api = pytest.mark.skipif(not HAS_ARROW_API, reason="GDAL>=3.6 required")
|
||||
requires_pyarrow_api = pytest.mark.skipif(
|
||||
not HAS_ARROW_API or not HAS_PYARROW, reason="GDAL>=3.6 and pyarrow required"
|
||||
)
|
||||
|
||||
requires_pyproj = pytest.mark.skipif(not HAS_PYPROJ, reason="pyproj required")
|
||||
|
||||
requires_arrow_write_api = pytest.mark.skipif(
|
||||
not HAS_ARROW_WRITE_API or not HAS_PYARROW,
|
||||
reason="GDAL>=3.8 required for Arrow write API",
|
||||
)
|
||||
|
||||
requires_gdal_geos = pytest.mark.skipif(
|
||||
@@ -99,20 +126,51 @@ def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request):
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres):
|
||||
"""Wrap naturalearth_lowres as a zip file for vsi tests"""
|
||||
"""Wrap naturalearth_lowres as a zip file for VSI tests"""
|
||||
|
||||
path = tmp_path / f"{naturalearth_lowres.name}.zip"
|
||||
with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
|
||||
for ext in ["dbf", "prj", "shp", "shx"]:
|
||||
for ext in ["dbf", "prj", "shp", "shx", "cpg"]:
|
||||
filename = f"{naturalearth_lowres.stem}.{ext}"
|
||||
out.write(naturalearth_lowres.parent / filename, filename)
|
||||
|
||||
return path, f"/vsizip/{path}/{naturalearth_lowres.name}"
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def naturalearth_lowres_vsimem(naturalearth_lowres):
|
||||
"""Write naturalearth_lowres to a vsimem file for VSI tests"""
|
||||
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
name = f"pyogrio_fixture_{naturalearth_lowres.stem}"
|
||||
dst_path = Path(f"/vsimem/{name}/{name}.gpkg")
|
||||
meta["spatial_index"] = False
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
write(dst_path, geometry, field_data, layer="naturalearth_lowres", **meta)
|
||||
yield dst_path
|
||||
|
||||
vsi_rmtree(dst_path.parent)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_fgdb_vsi():
|
||||
return f"/vsizip/{_data_dir}/test_fgdb.gdb.zip"
|
||||
def line_zm_file():
|
||||
return _data_dir / "line_zm.gpkg"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def curve_file():
|
||||
return _data_dir / "curve.gpkg"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def curve_polygon_file():
|
||||
return _data_dir / "curvepolygon.gpkg"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def multisurface_file():
|
||||
return _data_dir / "multisurface.gpkg"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -120,16 +178,221 @@ def test_gpkg_nulls():
|
||||
return _data_dir / "test_gpkg_nulls.gpkg"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_ogr_types_list():
|
||||
return _data_dir / "test_ogr_types_list.geojson"
|
||||
@pytest.fixture(scope="function")
|
||||
def no_geometry_file(tmp_path):
|
||||
# create a GPKG layer that does not include geometry
|
||||
filename = tmp_path / "test_no_geometry.gpkg"
|
||||
write(
|
||||
filename,
|
||||
layer="no_geometry",
|
||||
geometry=None,
|
||||
field_data=[np.array(["a", "b", "c"])],
|
||||
fields=["col"],
|
||||
)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_datetime():
|
||||
return _data_dir / "test_datetime.geojson"
|
||||
@pytest.fixture(scope="function")
|
||||
def list_field_values_file(tmp_path):
|
||||
# Create a GeoJSON file with list values in a property
|
||||
list_geojson = """{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "int64": 1, "list_int64": [0, 1] },
|
||||
"geometry": { "type": "Point", "coordinates": [0, 2] }
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "int64": 2, "list_int64": [2, 3] },
|
||||
"geometry": { "type": "Point", "coordinates": [1, 2] }
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "int64": 3, "list_int64": [4, 5] },
|
||||
"geometry": { "type": "Point", "coordinates": [2, 2] }
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "int64": 4, "list_int64": [6, 7] },
|
||||
"geometry": { "type": "Point", "coordinates": [3, 2] }
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "int64": 5, "list_int64": [8, 9] },
|
||||
"geometry": { "type": "Point", "coordinates": [4, 2] }
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
filename = tmp_path / "test_ogr_types_list.geojson"
|
||||
with open(filename, "w") as f:
|
||||
_ = f.write(list_geojson)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def test_datetime_tz():
|
||||
return _data_dir / "test_datetime_tz.geojson"
|
||||
@pytest.fixture(scope="function")
|
||||
def nested_geojson_file(tmp_path):
|
||||
# create GeoJSON file with nested properties
|
||||
nested_geojson = """{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [0, 0]
|
||||
},
|
||||
"properties": {
|
||||
"top_level": "A",
|
||||
"intermediate_level": {
|
||||
"bottom_level": "B"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
filename = tmp_path / "test_nested.geojson"
|
||||
with open(filename, "w") as f:
|
||||
_ = f.write(nested_geojson)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def datetime_file(tmp_path):
|
||||
# create GeoJSON file with millisecond precision
|
||||
datetime_geojson = """{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "col": "2020-01-01T09:00:00.123" },
|
||||
"geometry": { "type": "Point", "coordinates": [1, 1] }
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "col": "2020-01-01T10:00:00" },
|
||||
"geometry": { "type": "Point", "coordinates": [2, 2] }
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
filename = tmp_path / "test_datetime.geojson"
|
||||
with open(filename, "w") as f:
|
||||
_ = f.write(datetime_geojson)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def datetime_tz_file(tmp_path):
|
||||
# create GeoJSON file with datetimes with timezone
|
||||
datetime_tz_geojson = """{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" },
|
||||
"geometry": { "type": "Point", "coordinates": [1, 1] }
|
||||
},
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { "datetime_col": "2020-01-01T10:00:00-05:00" },
|
||||
"geometry": { "type": "Point", "coordinates": [2, 2] }
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
filename = tmp_path / "test_datetime_tz.geojson"
|
||||
with open(filename, "w") as f:
|
||||
f.write(datetime_tz_geojson)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def geojson_bytes(tmp_path):
|
||||
"""Extracts first 3 records from naturalearth_lowres and writes to GeoJSON,
|
||||
returning bytes"""
|
||||
meta, _, geometry, field_data = read(
|
||||
_data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp"), max_features=3
|
||||
)
|
||||
|
||||
filename = tmp_path / "test.geojson"
|
||||
write(filename, geometry, field_data, **meta)
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
bytes_buffer = f.read()
|
||||
|
||||
return bytes_buffer
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def geojson_filelike(tmp_path):
|
||||
"""Extracts first 3 records from naturalearth_lowres and writes to GeoJSON,
|
||||
returning open file handle"""
|
||||
meta, _, geometry, field_data = read(
|
||||
_data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp"), max_features=3
|
||||
)
|
||||
|
||||
filename = tmp_path / "test.geojson"
|
||||
write(filename, geometry, field_data, layer="test", **meta)
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
yield f
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def nonseekable_bytes(tmp_path):
|
||||
# mock a non-seekable byte stream, such as a zstandard handle
|
||||
class NonSeekableBytesIO(BytesIO):
|
||||
def seekable(self):
|
||||
return False
|
||||
|
||||
def seek(self, *args, **kwargs):
|
||||
raise OSError("cannot seek")
|
||||
|
||||
# wrap GeoJSON into a non-seekable BytesIO
|
||||
geojson = """{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": { },
|
||||
"geometry": { "type": "Point", "coordinates": [1, 1] }
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
return NonSeekableBytesIO(geojson.encode("UTF-8"))
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
scope="session",
|
||||
params=[
|
||||
# Japanese
|
||||
("CP932", "ホ"),
|
||||
# Chinese
|
||||
("CP936", "中文"),
|
||||
# Central European
|
||||
("CP1250", "Đ"),
|
||||
# Latin 1 / Western European
|
||||
("CP1252", "ÿ"),
|
||||
# Greek
|
||||
("CP1253", "Φ"),
|
||||
# Arabic
|
||||
("CP1256", "ش"),
|
||||
],
|
||||
)
|
||||
def encoded_text(request):
|
||||
"""Return tuple with encoding name and very short sample text in that encoding
|
||||
NOTE: it was determined through testing that code pages for MS-DOS do not
|
||||
consistently work across all Python installations (in particular, fail with conda),
|
||||
but ANSI code pages appear to work properly.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
@@ -1,13 +1,28 @@
|
||||
# Test datasets
|
||||
|
||||
## Natural Earth lowres
|
||||
## Obtaining / creating test datasets
|
||||
|
||||
If a test dataset can be created in code, do that instead. If it is used in a
|
||||
single test, create the test dataset as part of that test. If it is used in
|
||||
more than a single test, add it to `pyogrio/tests/conftest.py` instead, as a
|
||||
function-scoped test fixture.
|
||||
|
||||
If you need to obtain 3rd party test files:
|
||||
|
||||
- add a section below that describes the source location and processing steps
|
||||
to derive that dataset
|
||||
- make sure the license is compatible with including in Pyogrio (public domain or open-source)
|
||||
and record that license below
|
||||
|
||||
Please keep the test files no larger than necessary to use in tests.
|
||||
|
||||
## Included test datasets
|
||||
|
||||
### Natural Earth lowres
|
||||
|
||||
`naturalearth_lowres.shp` was copied from GeoPandas.
|
||||
|
||||
## FGDB test dataset
|
||||
|
||||
`test_fgdb.gdb.zip`
|
||||
Downloaded from http://trac.osgeo.org/gdal/raw-attachment/wiki/FileGDB/test_fgdb.gdb.zip
|
||||
License: public domain
|
||||
|
||||
### GPKG test dataset with null values
|
||||
|
||||
@@ -75,15 +90,19 @@ NOTE: Reading boolean values into GeoPandas using Fiona backend treats those
|
||||
values as `None` and column dtype as `object`; Pyogrio treats those values as
|
||||
`np.nan` and column dtype as `float64`.
|
||||
|
||||
### GPKG test with MultiSurface
|
||||
|
||||
This was extracted from https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_0308_HU4_GDB.zip
|
||||
`NHDWaterbody` layer using ogr2ogr:
|
||||
|
||||
```bash
|
||||
ogr2ogr test_mixed_surface.gpkg NHDPLUS_H_0308_HU4_GDB.gdb NHDWaterbody -where '"NHDPlusID" = 15000300070477' -select "NHDPlusID"
|
||||
```
|
||||
License: same as Pyogrio
|
||||
|
||||
### OSM PBF test
|
||||
|
||||
This was downloaded from https://github.com/openstreetmap/OSM-binary/blob/master/resources/sample.pbf
|
||||
|
||||
License: [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/)
|
||||
|
||||
### Test files for geometry types that are downgraded on read
|
||||
|
||||
`line_zm.gpkg` was created using QGIS to digitize a LineString GPKG layer with Z and M enabled. Downgraded to LineString Z on read.
|
||||
`curve.gpkg` was created using QGIS to digitize a Curve GPKG layer. Downgraded to LineString on read.
|
||||
`curvepolygon.gpkg` was created using QGIS to digitize a CurvePolygon GPKG layer. Downgraded to Polygon on read.
|
||||
`multisurface.gpkg` was created using QGIS to digitize a MultiSurface GPKG layer. Downgraded to MultiPolygon on read.
|
||||
|
||||
License: same as Pyogrio
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "col": "2020-01-01T09:00:00.123" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
|
||||
{ "type": "Feature", "properties": { "col": "2020-01-01T10:00:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
|
||||
]
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T09:00:00.123-05:00" }, "geometry": { "type": "Point", "coordinates": [ 1.0, 1.0 ] } },
|
||||
{ "type": "Feature", "properties": { "datetime_col": "2020-01-01T10:00:00-05:00" }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } }
|
||||
]
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [0, 0]
|
||||
},
|
||||
"properties": {
|
||||
"top_level": "A",
|
||||
"intermediate_level": {
|
||||
"bottom_level": "B"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"type": "FeatureCollection",
|
||||
"name": "test",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "int64": 1, "list_int64": [ 0, 1 ] }, "geometry": { "type": "Point", "coordinates": [ 0.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 2, "list_int64": [ 2, 3 ] }, "geometry": { "type": "Point", "coordinates": [ 1.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 3, "list_int64": [ 4, 5 ] }, "geometry": { "type": "Point", "coordinates": [ 2.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 4, "list_int64": [ 6, 7 ] }, "geometry": { "type": "Point", "coordinates": [ 3.0, 2.0 ] } },
|
||||
{ "type": "Feature", "properties": { "int64": 5, "list_int64": [ 8, 9 ] }, "geometry": { "type": "Point", "coordinates": [ 4.0, 2.0 ] } }
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,28 +1,35 @@
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from numpy import array_equal, allclose
|
||||
import pytest
|
||||
from numpy import allclose, array_equal
|
||||
|
||||
from pyogrio import (
|
||||
__gdal_version__,
|
||||
__gdal_geos_version__,
|
||||
__gdal_version__,
|
||||
detect_write_driver,
|
||||
get_gdal_config_option,
|
||||
get_gdal_data_path,
|
||||
list_drivers,
|
||||
list_layers,
|
||||
read_bounds,
|
||||
read_info,
|
||||
set_gdal_config_options,
|
||||
get_gdal_config_option,
|
||||
get_gdal_data_path,
|
||||
vsi_listtree,
|
||||
vsi_rmtree,
|
||||
vsi_unlink,
|
||||
)
|
||||
from pyogrio.core import detect_write_driver
|
||||
from pyogrio.errors import DataSourceError, DataLayerError
|
||||
from pyogrio.tests.conftest import HAS_SHAPELY, prepare_testfile
|
||||
|
||||
from pyogrio._compat import GDAL_GE_38
|
||||
from pyogrio._env import GDALEnv
|
||||
from pyogrio.errors import DataLayerError, DataSourceError
|
||||
from pyogrio.raw import read, write
|
||||
from pyogrio.tests.conftest import START_FID, prepare_testfile, requires_shapely
|
||||
|
||||
import pytest
|
||||
|
||||
with GDALEnv():
|
||||
# NOTE: this must be AFTER above imports, which init the GDAL and PROJ data
|
||||
# search paths
|
||||
from pyogrio._ogr import ogr_driver_supports_write, has_gdal_data, has_proj_data
|
||||
from pyogrio._ogr import has_gdal_data, has_proj_data, ogr_driver_supports_write
|
||||
|
||||
|
||||
try:
|
||||
@@ -150,7 +157,16 @@ def test_list_drivers():
|
||||
assert len(drivers) == len(expected)
|
||||
|
||||
|
||||
def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi):
|
||||
def test_list_layers(
|
||||
naturalearth_lowres,
|
||||
naturalearth_lowres_vsi,
|
||||
naturalearth_lowres_vsimem,
|
||||
line_zm_file,
|
||||
curve_file,
|
||||
curve_polygon_file,
|
||||
multisurface_file,
|
||||
no_geometry_file,
|
||||
):
|
||||
assert array_equal(
|
||||
list_layers(naturalearth_lowres), [["naturalearth_lowres", "Polygon"]]
|
||||
)
|
||||
@@ -159,38 +175,98 @@ def test_list_layers(naturalearth_lowres, naturalearth_lowres_vsi, test_fgdb_vsi
|
||||
list_layers(naturalearth_lowres_vsi[1]), [["naturalearth_lowres", "Polygon"]]
|
||||
)
|
||||
|
||||
assert array_equal(
|
||||
list_layers(naturalearth_lowres_vsimem),
|
||||
[["naturalearth_lowres", "MultiPolygon"]],
|
||||
)
|
||||
|
||||
# Measured 3D is downgraded to plain 3D during read
|
||||
# Make sure this warning is raised
|
||||
with pytest.warns(
|
||||
UserWarning, match=r"Measured \(M\) geometry types are not supported"
|
||||
):
|
||||
fgdb_layers = list_layers(test_fgdb_vsi)
|
||||
# GDAL >= 3.4.0 includes 'another_relationship' layer
|
||||
assert len(fgdb_layers) >= 7
|
||||
assert array_equal(list_layers(line_zm_file), [["line_zm", "LineString Z"]])
|
||||
|
||||
# Make sure that nonspatial layer has None for geometry
|
||||
assert array_equal(fgdb_layers[0], ["basetable_2", None])
|
||||
# Curve / surface types are downgraded to plain types
|
||||
assert array_equal(list_layers(curve_file), [["curve", "LineString"]])
|
||||
assert array_equal(list_layers(curve_polygon_file), [["curvepolygon", "Polygon"]])
|
||||
assert array_equal(
|
||||
list_layers(multisurface_file), [["multisurface", "MultiPolygon"]]
|
||||
)
|
||||
|
||||
# Confirm that measured 3D is downgraded to plain 3D during read
|
||||
assert array_equal(fgdb_layers[3], ["test_lines", "MultiLineString Z"])
|
||||
assert array_equal(fgdb_layers[6], ["test_areas", "MultiPolygon Z"])
|
||||
# Make sure that nonspatial layer has None for geometry
|
||||
assert array_equal(list_layers(no_geometry_file), [["no_geometry", None]])
|
||||
|
||||
|
||||
def test_read_bounds(naturalearth_lowres):
|
||||
fids, bounds = read_bounds(naturalearth_lowres)
|
||||
def test_list_layers_bytes(geojson_bytes):
|
||||
layers = list_layers(geojson_bytes)
|
||||
|
||||
assert layers.shape == (1, 2)
|
||||
assert layers[0, 0] == "test"
|
||||
|
||||
|
||||
def test_list_layers_nonseekable_bytes(nonseekable_bytes):
|
||||
layers = list_layers(nonseekable_bytes)
|
||||
|
||||
assert layers.shape == (1, 2)
|
||||
assert layers[0, 1] == "Point"
|
||||
|
||||
|
||||
def test_list_layers_filelike(geojson_filelike):
|
||||
layers = list_layers(geojson_filelike)
|
||||
|
||||
assert layers.shape == (1, 2)
|
||||
assert layers[0, 0] == "test"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"testfile",
|
||||
["naturalearth_lowres", "naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"],
|
||||
)
|
||||
def test_read_bounds(testfile, request):
|
||||
path = request.getfixturevalue(testfile)
|
||||
path = path if not isinstance(path, tuple) else path[1]
|
||||
|
||||
fids, bounds = read_bounds(path)
|
||||
assert fids.shape == (177,)
|
||||
assert bounds.shape == (4, 177)
|
||||
|
||||
assert fids[0] == 0
|
||||
assert fids[0] == START_FID[Path(path).suffix]
|
||||
# Fiji; wraps antimeridian
|
||||
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
|
||||
|
||||
|
||||
def test_read_bounds_bytes(geojson_bytes):
|
||||
fids, bounds = read_bounds(geojson_bytes)
|
||||
assert fids.shape == (3,)
|
||||
assert bounds.shape == (4, 3)
|
||||
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
|
||||
|
||||
|
||||
def test_read_bounds_nonseekable_bytes(nonseekable_bytes):
|
||||
fids, bounds = read_bounds(nonseekable_bytes)
|
||||
assert fids.shape == (1,)
|
||||
assert bounds.shape == (4, 1)
|
||||
assert allclose(bounds[:, 0], [1, 1, 1, 1])
|
||||
|
||||
|
||||
def test_read_bounds_filelike(geojson_filelike):
|
||||
fids, bounds = read_bounds(geojson_filelike)
|
||||
assert fids.shape == (3,)
|
||||
assert bounds.shape == (4, 3)
|
||||
assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088])
|
||||
|
||||
|
||||
def test_read_bounds_max_features(naturalearth_lowres):
|
||||
bounds = read_bounds(naturalearth_lowres, max_features=2)[1]
|
||||
assert bounds.shape == (4, 2)
|
||||
|
||||
|
||||
def test_read_bounds_unspecified_layer_warning(data_dir):
|
||||
"""Reading a multi-layer file without specifying a layer gives a warning."""
|
||||
with pytest.warns(UserWarning, match="More than one layer found "):
|
||||
read_bounds(data_dir / "sample.osm.pbf")
|
||||
|
||||
|
||||
def test_read_bounds_negative_max_features(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="'max_features' must be >= 0"):
|
||||
read_bounds(naturalearth_lowres, max_features=-1)
|
||||
@@ -240,12 +316,9 @@ def test_read_bounds_bbox(naturalearth_lowres_all_ext):
|
||||
fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))
|
||||
|
||||
assert fids.shape == (2,)
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, [34, 35]) # PAN, CRI
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, [33, 34]) # PAN, CRI
|
||||
fids_expected = np.array([33, 34]) # PAN, CRI
|
||||
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
|
||||
assert array_equal(fids, fids_expected)
|
||||
|
||||
assert bounds.shape == (4, 2)
|
||||
assert allclose(
|
||||
@@ -257,9 +330,7 @@ def test_read_bounds_bbox(naturalearth_lowres_all_ext):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
@@ -273,9 +344,7 @@ def test_read_bounds_mask_invalid(naturalearth_lowres, mask):
|
||||
read_bounds(naturalearth_lowres, mask=mask)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
def test_read_bounds_bbox_mask_invalid(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
|
||||
read_bounds(
|
||||
@@ -283,9 +352,7 @@ def test_read_bounds_bbox_mask_invalid(naturalearth_lowres):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
@pytest.mark.parametrize(
|
||||
"mask,expected",
|
||||
[
|
||||
@@ -316,12 +383,8 @@ def test_read_bounds_mask(naturalearth_lowres_all_ext, mask, expected):
|
||||
|
||||
fids = read_bounds(naturalearth_lowres_all_ext, mask=mask)[0]
|
||||
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, np.array(expected) + 1)
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, expected)
|
||||
fids_expected = np.array(expected) + START_FID[naturalearth_lowres_all_ext.suffix]
|
||||
assert array_equal(fids, fids_expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
@@ -337,40 +400,87 @@ def test_read_bounds_bbox_intersects_vs_envelope_overlaps(naturalearth_lowres_al
|
||||
if __gdal_geos_version__ is None:
|
||||
# bboxes for CAN, RUS overlap but do not intersect geometries
|
||||
assert fids.shape == (4,)
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, [4, 5, 19, 28]) # CAN, USA, RUS, MEX
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, [3, 4, 18, 27]) # CAN, USA, RUS, MEX
|
||||
fids_expected = np.array([3, 4, 18, 27]) # CAN, USA, RUS, MEX
|
||||
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
|
||||
assert array_equal(fids, fids_expected)
|
||||
|
||||
else:
|
||||
assert fids.shape == (2,)
|
||||
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
||||
# fid in gpkg is 1-based
|
||||
assert array_equal(fids, [5, 28]) # USA, MEX
|
||||
else:
|
||||
# fid in other formats is 0-based
|
||||
assert array_equal(fids, [4, 27]) # USA, MEX
|
||||
fids_expected = np.array([4, 27]) # USA, MEX
|
||||
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
|
||||
assert array_equal(fids, fids_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("naturalearth_lowres", [".shp", ".gpkg"], indirect=True)
|
||||
def test_read_info(naturalearth_lowres):
|
||||
meta = read_info(naturalearth_lowres)
|
||||
|
||||
assert meta["layer_name"] == "naturalearth_lowres"
|
||||
assert meta["crs"] == "EPSG:4326"
|
||||
assert meta["geometry_type"] == "Polygon"
|
||||
assert meta["encoding"] == "UTF-8"
|
||||
assert meta["fields"].shape == (5,)
|
||||
assert meta["dtypes"].tolist() == ["int64", "object", "object", "object", "float64"]
|
||||
assert meta["features"] == 177
|
||||
assert allclose(meta["total_bounds"], (-180, -90, 180, 83.64513))
|
||||
assert meta["driver"] == "ESRI Shapefile"
|
||||
assert meta["capabilities"]["random_read"] is True
|
||||
assert meta["capabilities"]["fast_set_next_by_index"] is True
|
||||
assert meta["capabilities"]["fast_spatial_filter"] is False
|
||||
assert meta["capabilities"]["fast_feature_count"] is True
|
||||
assert meta["capabilities"]["fast_total_bounds"] is True
|
||||
|
||||
if naturalearth_lowres.suffix == ".gpkg":
|
||||
assert meta["fid_column"] == "fid"
|
||||
assert meta["geometry_name"] == "geom"
|
||||
assert meta["geometry_type"] == "MultiPolygon"
|
||||
assert meta["driver"] == "GPKG"
|
||||
if GDAL_GE_38:
|
||||
# this capability is only True for GPKG if GDAL >= 3.8
|
||||
assert meta["capabilities"]["fast_set_next_by_index"] is True
|
||||
elif naturalearth_lowres.suffix == ".shp":
|
||||
# fid_column == "" for formats where fid is not physically stored
|
||||
assert meta["fid_column"] == ""
|
||||
# geometry_name == "" for formats where geometry column name cannot be
|
||||
# customized
|
||||
assert meta["geometry_name"] == ""
|
||||
assert meta["geometry_type"] == "Polygon"
|
||||
assert meta["driver"] == "ESRI Shapefile"
|
||||
assert meta["capabilities"]["fast_set_next_by_index"] is True
|
||||
else:
|
||||
raise ValueError(f"test not implemented for ext {naturalearth_lowres.suffix}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"testfile", ["naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"]
|
||||
)
|
||||
def test_read_info_vsi(testfile, request):
|
||||
path = request.getfixturevalue(testfile)
|
||||
path = path if not isinstance(path, tuple) else path[1]
|
||||
|
||||
meta = read_info(path)
|
||||
|
||||
assert meta["fields"].shape == (5,)
|
||||
assert meta["features"] == 177
|
||||
|
||||
|
||||
def test_read_info_bytes(geojson_bytes):
|
||||
meta = read_info(geojson_bytes)
|
||||
|
||||
assert meta["fields"].shape == (5,)
|
||||
assert meta["features"] == 3
|
||||
|
||||
|
||||
def test_read_info_nonseekable_bytes(nonseekable_bytes):
|
||||
meta = read_info(nonseekable_bytes)
|
||||
|
||||
assert meta["fields"].shape == (0,)
|
||||
assert meta["features"] == 1
|
||||
|
||||
|
||||
def test_read_info_filelike(geojson_filelike):
|
||||
meta = read_info(geojson_filelike)
|
||||
|
||||
assert meta["fields"].shape == (5,)
|
||||
assert meta["features"] == 3
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_kwargs,fields",
|
||||
@@ -399,8 +509,8 @@ def test_read_info(naturalearth_lowres):
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_read_info_dataset_kwargs(data_dir, dataset_kwargs, fields):
|
||||
meta = read_info(data_dir / "test_nested.geojson", **dataset_kwargs)
|
||||
def test_read_info_dataset_kwargs(nested_geojson_file, dataset_kwargs, fields):
|
||||
meta = read_info(nested_geojson_file, **dataset_kwargs)
|
||||
assert meta["fields"].tolist() == fields
|
||||
|
||||
|
||||
@@ -440,10 +550,12 @@ def test_read_info_force_feature_count(data_dir, layer, force, expected):
|
||||
[(True, (-180.0, -90.0, 180.0, 83.64513)), (False, None)],
|
||||
)
|
||||
def test_read_info_force_total_bounds(
|
||||
tmpdir, naturalearth_lowres, force_total_bounds, expected_total_bounds
|
||||
tmp_path, naturalearth_lowres, force_total_bounds, expected_total_bounds
|
||||
):
|
||||
# Geojson files don't hava a fast way to determine total_bounds
|
||||
geojson_path = prepare_testfile(naturalearth_lowres, dst_dir=tmpdir, ext=".geojson")
|
||||
geojson_path = prepare_testfile(
|
||||
naturalearth_lowres, dst_dir=tmp_path, ext=".geojsonl"
|
||||
)
|
||||
|
||||
info = read_info(geojson_path, force_total_bounds=force_total_bounds)
|
||||
if expected_total_bounds is not None:
|
||||
assert allclose(info["total_bounds"], expected_total_bounds)
|
||||
@@ -451,8 +563,14 @@ def test_read_info_force_total_bounds(
|
||||
assert info["total_bounds"] is None
|
||||
|
||||
|
||||
def test_read_info_without_geometry(test_fgdb_vsi):
|
||||
assert read_info(test_fgdb_vsi)["total_bounds"] is None
|
||||
def test_read_info_unspecified_layer_warning(data_dir):
|
||||
"""Reading a multi-layer file without specifying a layer gives a warning."""
|
||||
with pytest.warns(UserWarning, match="More than one layer found "):
|
||||
read_info(data_dir / "sample.osm.pbf")
|
||||
|
||||
|
||||
def test_read_info_without_geometry(no_geometry_file):
|
||||
assert read_info(no_geometry_file)["total_bounds"] is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -494,3 +612,67 @@ def test_error_handling_warning(capfd, naturalearth_lowres):
|
||||
read_info(naturalearth_lowres, INVALID="YES")
|
||||
|
||||
assert capfd.readouterr().err == ""
|
||||
|
||||
|
||||
def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres):
|
||||
"""Test all basic functionalities of file handling in /vsimem/."""
|
||||
# Prepare test data in /vsimem
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
meta["spatial_index"] = False
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
test_file_path = Path("/vsimem/pyogrio_test_naturalearth_lowres.gpkg")
|
||||
test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg")
|
||||
|
||||
write(test_file_path, geometry, field_data, **meta)
|
||||
write(test_dir_path, geometry, field_data, **meta)
|
||||
|
||||
# Check if everything was created properly with listtree
|
||||
files = vsi_listtree("/vsimem/")
|
||||
assert test_file_path.as_posix() in files
|
||||
assert test_dir_path.as_posix() in files
|
||||
|
||||
# Check listtree with pattern
|
||||
files = vsi_listtree("/vsimem/", pattern="pyogrio_dir_test*.gpkg")
|
||||
assert test_file_path.as_posix() not in files
|
||||
assert test_dir_path.as_posix() in files
|
||||
|
||||
files = vsi_listtree("/vsimem/", pattern="pyogrio_test*.gpkg")
|
||||
assert test_file_path.as_posix() in files
|
||||
assert test_dir_path.as_posix() not in files
|
||||
|
||||
# Remove test_dir and its contents
|
||||
vsi_rmtree(test_dir_path.parent)
|
||||
files = vsi_listtree("/vsimem/")
|
||||
assert test_file_path.as_posix() in files
|
||||
assert test_dir_path.as_posix() not in files
|
||||
|
||||
# Remove test_file
|
||||
vsi_unlink(test_file_path)
|
||||
|
||||
|
||||
def test_vsimem_rmtree_error(naturalearth_lowres_vsimem):
|
||||
with pytest.raises(NotADirectoryError, match="Path is not a directory"):
|
||||
vsi_rmtree(naturalearth_lowres_vsimem)
|
||||
|
||||
with pytest.raises(FileNotFoundError, match="Path does not exist"):
|
||||
vsi_rmtree("/vsimem/non-existent")
|
||||
|
||||
with pytest.raises(
|
||||
OSError, match="path to in-memory file or directory is required"
|
||||
):
|
||||
vsi_rmtree("/vsimem")
|
||||
with pytest.raises(
|
||||
OSError, match="path to in-memory file or directory is required"
|
||||
):
|
||||
vsi_rmtree("/vsimem/")
|
||||
|
||||
# Verify that naturalearth_lowres_vsimem still exists.
|
||||
assert naturalearth_lowres_vsimem.as_posix() in vsi_listtree("/vsimem")
|
||||
|
||||
|
||||
def test_vsimem_unlink_error(naturalearth_lowres_vsimem):
|
||||
with pytest.raises(IsADirectoryError, match="Path is a directory"):
|
||||
vsi_unlink(naturalearth_lowres_vsimem.parent)
|
||||
|
||||
with pytest.raises(FileNotFoundError, match="Path does not exist"):
|
||||
vsi_unlink("/vsimem/non-existent.gpkg")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,15 +1,17 @@
|
||||
import os
|
||||
import contextlib
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
|
||||
import pytest
|
||||
import os
|
||||
from pathlib import Path
|
||||
from zipfile import ZIP_DEFLATED, ZipFile
|
||||
|
||||
import pyogrio
|
||||
import pyogrio.raw
|
||||
from pyogrio.util import vsi_path
|
||||
from pyogrio._compat import HAS_PYPROJ
|
||||
from pyogrio.util import get_vsi_path_or_buffer, vsi_path
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import geopandas # NOQA
|
||||
import geopandas # noqa: F401
|
||||
|
||||
has_geopandas = True
|
||||
except ImportError:
|
||||
@@ -31,9 +33,11 @@ def change_cwd(path):
|
||||
[
|
||||
# local file paths that should be passed through as is
|
||||
("data.gpkg", "data.gpkg"),
|
||||
(Path("data.gpkg"), "data.gpkg"),
|
||||
("/home/user/data.gpkg", "/home/user/data.gpkg"),
|
||||
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
|
||||
("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
|
||||
("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
|
||||
# cloud URIs
|
||||
("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
|
||||
("s3://testing/data.gpkg", "/vsis3/testing/data.gpkg"),
|
||||
@@ -82,6 +86,8 @@ def change_cwd(path):
|
||||
"s3://testing/test.zip!a/b/item.shp",
|
||||
"/vsizip/vsis3/testing/test.zip/a/b/item.shp",
|
||||
),
|
||||
("/vsimem/data.gpkg", "/vsimem/data.gpkg"),
|
||||
(Path("/vsimem/data.gpkg"), "/vsimem/data.gpkg"),
|
||||
],
|
||||
)
|
||||
def test_vsi_path(path, expected):
|
||||
@@ -236,6 +242,9 @@ def test_detect_zip_path(tmp_path, naturalearth_lowres):
|
||||
path = tmp_path / "test.zip"
|
||||
with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out:
|
||||
for ext in ["dbf", "prj", "shp", "shx"]:
|
||||
if not HAS_PYPROJ and ext == "prj":
|
||||
continue
|
||||
|
||||
filename = f"test1.{ext}"
|
||||
out.write(tmp_path / filename, filename)
|
||||
|
||||
@@ -265,7 +274,7 @@ def test_detect_zip_path(tmp_path, naturalearth_lowres):
|
||||
|
||||
@pytest.mark.network
|
||||
def test_url():
|
||||
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" # NOQA
|
||||
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp"
|
||||
|
||||
result = pyogrio.raw.read(url)
|
||||
assert len(result[2]) == 177
|
||||
@@ -277,9 +286,10 @@ def test_url():
|
||||
assert len(result[0]) == 177
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(not has_geopandas, reason="GeoPandas not available")
|
||||
def test_url_dataframe():
|
||||
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" # NOQA
|
||||
url = "https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp"
|
||||
|
||||
assert len(pyogrio.read_dataframe(url)) == 177
|
||||
|
||||
@@ -330,3 +340,25 @@ def test_uri_s3(aws_env_setup):
|
||||
def test_uri_s3_dataframe(aws_env_setup):
|
||||
df = pyogrio.read_dataframe("zip+s3://fiona-testing/coutwildrnp.zip")
|
||||
assert len(df) == 67
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"path, expected",
|
||||
[
|
||||
(Path("/tmp/test.gpkg"), str(Path("/tmp/test.gpkg"))),
|
||||
(Path("/vsimem/test.gpkg"), "/vsimem/test.gpkg"),
|
||||
],
|
||||
)
|
||||
def test_get_vsi_path_or_buffer_obj_to_string(path, expected):
|
||||
"""Verify that get_vsi_path_or_buffer retains forward slashes in /vsimem paths.
|
||||
|
||||
The /vsimem paths should keep forward slashes for GDAL to recognize them as such.
|
||||
However, on Windows systems, forward slashes are by default replaced by backslashes,
|
||||
so this test verifies that this doesn't happen for /vsimem paths.
|
||||
"""
|
||||
assert get_vsi_path_or_buffer(path) == expected
|
||||
|
||||
|
||||
def test_get_vsi_path_or_buffer_fixtures_to_string(tmp_path):
|
||||
path = tmp_path / "test.gpkg"
|
||||
assert get_vsi_path_or_buffer(path) == str(path)
|
||||
|
||||
@@ -1,29 +1,36 @@
|
||||
import contextlib
|
||||
import ctypes
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from io import BytesIO
|
||||
from zipfile import ZipFile
|
||||
|
||||
import numpy as np
|
||||
from numpy import array_equal
|
||||
import pytest
|
||||
|
||||
import pyogrio
|
||||
from pyogrio import (
|
||||
list_layers,
|
||||
__gdal_version__,
|
||||
get_gdal_config_option,
|
||||
list_drivers,
|
||||
list_layers,
|
||||
read_info,
|
||||
set_gdal_config_options,
|
||||
__gdal_version__,
|
||||
)
|
||||
from pyogrio._compat import HAS_SHAPELY
|
||||
from pyogrio.raw import read, write
|
||||
from pyogrio.errors import DataSourceError, DataLayerError, FeatureError
|
||||
from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
|
||||
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
|
||||
from pyogrio.raw import open_arrow, read, write
|
||||
from pyogrio.tests.conftest import (
|
||||
DRIVERS,
|
||||
DRIVER_EXT,
|
||||
DRIVERS,
|
||||
prepare_testfile,
|
||||
requires_arrow_api,
|
||||
requires_pyarrow_api,
|
||||
requires_shapely,
|
||||
)
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import shapely
|
||||
except ImportError:
|
||||
@@ -79,6 +86,12 @@ def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
|
||||
assert len(geometry) == len(fields[0])
|
||||
|
||||
|
||||
def test_read_arrow_unspecified_layer_warning(data_dir):
|
||||
"""Reading a multi-layer file without specifying a layer gives a warning."""
|
||||
with pytest.warns(UserWarning, match="More than one layer found "):
|
||||
read(data_dir / "sample.osm.pbf")
|
||||
|
||||
|
||||
def test_read_invalid_layer(naturalearth_lowres):
|
||||
with pytest.raises(DataLayerError, match="Layer 'invalid' could not be opened"):
|
||||
read(naturalearth_lowres, layer="invalid")
|
||||
@@ -106,6 +119,29 @@ def test_read_no_geometry(naturalearth_lowres):
|
||||
assert geometry is None
|
||||
|
||||
|
||||
@requires_shapely
|
||||
def test_read_no_geometry__mask(naturalearth_lowres):
|
||||
geometry, fields = read(
|
||||
naturalearth_lowres,
|
||||
read_geometry=False,
|
||||
mask=shapely.Point(-105, 55),
|
||||
)[2:]
|
||||
|
||||
assert np.array_equal(fields[3], ["CAN"])
|
||||
assert geometry is None
|
||||
|
||||
|
||||
def test_read_no_geometry__bbox(naturalearth_lowres):
|
||||
geometry, fields = read(
|
||||
naturalearth_lowres,
|
||||
read_geometry=False,
|
||||
bbox=(-109.0, 55.0, -109.0, 55.0),
|
||||
)[2:]
|
||||
|
||||
assert np.array_equal(fields[3], ["CAN"])
|
||||
assert geometry is None
|
||||
|
||||
|
||||
def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres):
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
@@ -245,9 +281,7 @@ def test_read_bbox_where(naturalearth_lowres_all_ext):
|
||||
assert np.array_equal(fields[3], ["CAN"])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
@pytest.mark.parametrize(
|
||||
"mask",
|
||||
[
|
||||
@@ -261,17 +295,13 @@ def test_read_mask_invalid(naturalearth_lowres, mask):
|
||||
read(naturalearth_lowres, mask=mask)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
def test_read_bbox_mask_invalid(naturalearth_lowres):
|
||||
with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
|
||||
read(naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55))
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
@pytest.mark.parametrize(
|
||||
"mask,expected",
|
||||
[
|
||||
@@ -306,9 +336,7 @@ def test_read_mask(naturalearth_lowres_all_ext, mask, expected):
|
||||
assert len(geometry) == len(expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
def test_read_mask_sql(naturalearth_lowres_all_ext):
|
||||
fields = read(
|
||||
naturalearth_lowres_all_ext,
|
||||
@@ -319,9 +347,7 @@ def test_read_mask_sql(naturalearth_lowres_all_ext):
|
||||
assert np.array_equal(fields[3], ["CAN"])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not HAS_SHAPELY, reason="Shapely is required for mask functionality"
|
||||
)
|
||||
@requires_shapely
|
||||
def test_read_mask_where(naturalearth_lowres_all_ext):
|
||||
fields = read(
|
||||
naturalearth_lowres_all_ext,
|
||||
@@ -414,35 +440,43 @@ def test_read_return_only_fids(naturalearth_lowres):
|
||||
assert len(field_data) == 0
|
||||
|
||||
|
||||
def test_write(tmpdir, naturalearth_lowres):
|
||||
@pytest.mark.parametrize("encoding", [None, "ISO-8859-1"])
|
||||
def test_write_shp(tmp_path, naturalearth_lowres, encoding):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.shp")
|
||||
filename = tmp_path / "test.shp"
|
||||
meta["encoding"] = encoding
|
||||
write(filename, geometry, field_data, **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
for ext in (".dbf", ".prj"):
|
||||
assert os.path.exists(filename.replace(".shp", ext))
|
||||
assert filename.with_suffix(ext).exists()
|
||||
|
||||
# We write shapefiles in UTF-8 by default on all platforms
|
||||
expected_encoding = encoding if encoding is not None else "UTF-8"
|
||||
with open(filename.with_suffix(".cpg")) as cpg_file:
|
||||
result_encoding = cpg_file.read()
|
||||
assert result_encoding == expected_encoding
|
||||
|
||||
|
||||
def test_write_gpkg(tmpdir, naturalearth_lowres):
|
||||
def test_write_gpkg(tmp_path, naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
meta.update({"geometry_type": "MultiPolygon"})
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.gpkg")
|
||||
filename = tmp_path / "test.gpkg"
|
||||
write(filename, geometry, field_data, driver="GPKG", **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
|
||||
|
||||
def test_write_gpkg_multiple_layers(tmpdir, naturalearth_lowres):
|
||||
def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.gpkg")
|
||||
filename = tmp_path / "test.gpkg"
|
||||
write(filename, geometry, field_data, driver="GPKG", layer="first", **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
|
||||
assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
|
||||
|
||||
@@ -453,13 +487,13 @@ def test_write_gpkg_multiple_layers(tmpdir, naturalearth_lowres):
|
||||
)
|
||||
|
||||
|
||||
def test_write_geojson(tmpdir, naturalearth_lowres):
|
||||
def test_write_geojson(tmp_path, naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.json")
|
||||
filename = tmp_path / "test.json"
|
||||
write(filename, geometry, field_data, driver="GeoJSON", **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
|
||||
data = json.loads(open(filename).read())
|
||||
|
||||
@@ -478,17 +512,21 @@ def test_write_no_fields(tmp_path, naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
field_data = None
|
||||
meta["fields"] = None
|
||||
# naturalearth_lowres actually contains MultiPolygons. A shapefile doesn't make the
|
||||
# distinction, so the metadata just reports Polygon. GPKG does, so override here to
|
||||
# avoid GDAL warnings.
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
# Test
|
||||
filename = tmp_path / "test.gpkg"
|
||||
write(filename, geometry, field_data, driver="GPKG", **meta)
|
||||
|
||||
# Check result
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
meta, _, geometry, fields = read(filename)
|
||||
|
||||
assert meta["crs"] == "EPSG:4326"
|
||||
assert meta["geometry_type"] == "Polygon"
|
||||
assert meta["geometry_type"] == "MultiPolygon"
|
||||
assert meta["encoding"] == "UTF-8"
|
||||
assert meta["fields"].shape == (0,)
|
||||
assert len(fields) == 0
|
||||
@@ -510,7 +548,7 @@ def test_write_no_geom(tmp_path, naturalearth_lowres):
|
||||
write(filename, geometry, field_data, driver="GPKG", **meta)
|
||||
|
||||
# Check result
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
meta, _, geometry, fields = read(filename)
|
||||
|
||||
assert meta["crs"] is None
|
||||
@@ -547,7 +585,7 @@ def test_write_no_geom_data(tmp_path, naturalearth_lowres):
|
||||
write(filename, geometry, field_data, driver="GPKG", **meta)
|
||||
|
||||
# Check result
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
result_meta, _, result_geometry, result_field_data = read(filename)
|
||||
|
||||
assert result_meta["crs"] is None
|
||||
@@ -581,17 +619,84 @@ def test_write_no_geom_no_fields():
|
||||
__gdal_version__ < (3, 6, 0),
|
||||
reason="OpenFileGDB write support only available for GDAL >= 3.6.0",
|
||||
)
|
||||
def test_write_openfilegdb(tmpdir, naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
@pytest.mark.parametrize(
|
||||
"write_int64",
|
||||
[
|
||||
False,
|
||||
pytest.param(
|
||||
True,
|
||||
marks=pytest.mark.skipif(
|
||||
__gdal_version__ < (3, 9, 0),
|
||||
reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_write_openfilegdb(tmp_path, write_int64):
|
||||
# Point(0, 0)
|
||||
expected_geometry = np.array(
|
||||
[bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
|
||||
)
|
||||
expected_field_data = [
|
||||
np.array([True, False, True], dtype="bool"),
|
||||
np.array([1, 2, 3], dtype="int16"),
|
||||
np.array([1, 2, 3], dtype="int32"),
|
||||
np.array([1, 2, 3], dtype="int64"),
|
||||
np.array([1, 2, 3], dtype="float32"),
|
||||
np.array([1, 2, 3], dtype="float64"),
|
||||
]
|
||||
expected_fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
|
||||
expected_meta = {
|
||||
"geometry_type": "Point",
|
||||
"crs": "EPSG:4326",
|
||||
"fields": expected_fields,
|
||||
}
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.gdb")
|
||||
write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
|
||||
filename = tmp_path / "test.gdb"
|
||||
|
||||
assert os.path.exists(filename)
|
||||
# int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support
|
||||
# it is converted to float64 by default and raises a warning
|
||||
# (for GDAL >= 3.9.0 only)
|
||||
write_params = (
|
||||
{"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {}
|
||||
)
|
||||
|
||||
if write_int64 or __gdal_version__ < (3, 9, 0):
|
||||
ctx = contextlib.nullcontext()
|
||||
else:
|
||||
ctx = pytest.warns(
|
||||
RuntimeWarning, match="Integer64 will be written as a Float64"
|
||||
)
|
||||
|
||||
with ctx:
|
||||
write(
|
||||
filename,
|
||||
expected_geometry,
|
||||
expected_field_data,
|
||||
driver="OpenFileGDB",
|
||||
**expected_meta,
|
||||
**write_params,
|
||||
)
|
||||
|
||||
meta, _, geometry, field_data = read(filename)
|
||||
|
||||
if not write_int64:
|
||||
expected_field_data[3] = expected_field_data[3].astype("float64")
|
||||
|
||||
# bool types are converted to int32
|
||||
expected_field_data[0] = expected_field_data[0].astype("int32")
|
||||
|
||||
assert meta["crs"] == expected_meta["crs"]
|
||||
assert np.array_equal(meta["fields"], expected_meta["fields"])
|
||||
|
||||
assert np.array_equal(geometry, expected_geometry)
|
||||
for i in range(len(expected_field_data)):
|
||||
assert field_data[i].dtype == expected_field_data[i].dtype
|
||||
assert np.array_equal(field_data[i], expected_field_data[i])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ext", DRIVERS)
|
||||
def test_write_append(tmpdir, naturalearth_lowres, ext):
|
||||
def test_write_append(tmp_path, naturalearth_lowres, ext):
|
||||
if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
|
||||
pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
|
||||
|
||||
@@ -603,10 +708,10 @@ def test_write_append(tmpdir, naturalearth_lowres, ext):
|
||||
# coerce output layer to MultiPolygon to avoid mixed type errors
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
filename = os.path.join(str(tmpdir), f"test{ext}")
|
||||
filename = tmp_path / f"test{ext}"
|
||||
write(filename, geometry, field_data, **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
|
||||
assert read_info(filename)["features"] == 177
|
||||
|
||||
@@ -617,17 +722,17 @@ def test_write_append(tmpdir, naturalearth_lowres, ext):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
|
||||
def test_write_append_unsupported(tmpdir, naturalearth_lowres, driver, ext):
|
||||
def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
|
||||
if ext == ".geojsons" and __gdal_version__ >= (3, 6, 0):
|
||||
pytest.skip("Append to GeoJSONSeq supported for GDAL >= 3.6.0")
|
||||
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
# GML does not support append functionality
|
||||
filename = os.path.join(str(tmpdir), f"test{ext}")
|
||||
filename = tmp_path / f"test{ext}"
|
||||
write(filename, geometry, field_data, driver=driver, **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
|
||||
assert read_info(filename, force_feature_count=True)["features"] == 177
|
||||
|
||||
@@ -639,16 +744,16 @@ def test_write_append_unsupported(tmpdir, naturalearth_lowres, driver, ext):
|
||||
__gdal_version__ > (3, 5, 0),
|
||||
reason="segfaults on FlatGeobuf limited to GDAL <= 3.5.0",
|
||||
)
|
||||
def test_write_append_prevent_gdal_segfault(tmpdir, naturalearth_lowres):
|
||||
def test_write_append_prevent_gdal_segfault(tmp_path, naturalearth_lowres):
|
||||
"""GDAL <= 3.5.0 segfaults when appending to FlatGeobuf; this test
|
||||
verifies that we catch that before segfault"""
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.fgb")
|
||||
filename = tmp_path / "test.fgb"
|
||||
write(filename, geometry, field_data, **meta)
|
||||
|
||||
assert os.path.exists(filename)
|
||||
assert filename.exists()
|
||||
|
||||
with pytest.raises(
|
||||
RuntimeError, # match="append to FlatGeobuf is not supported for GDAL <= 3.5.0"
|
||||
@@ -664,7 +769,7 @@ def test_write_append_prevent_gdal_segfault(tmpdir, naturalearth_lowres):
|
||||
if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
|
||||
},
|
||||
)
|
||||
def test_write_supported(tmpdir, naturalearth_lowres, driver):
|
||||
def test_write_supported(tmp_path, naturalearth_lowres, driver):
|
||||
"""Test drivers known to work that are not specifically tested above"""
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres, columns=["iso_a3"])
|
||||
|
||||
@@ -673,7 +778,7 @@ def test_write_supported(tmpdir, naturalearth_lowres, driver):
|
||||
# we take the first record only.
|
||||
meta["geometry_type"] = "MultiPolygon"
|
||||
|
||||
filename = tmpdir / f"test{DRIVER_EXT[driver]}"
|
||||
filename = tmp_path / f"test{DRIVER_EXT[driver]}"
|
||||
write(
|
||||
filename,
|
||||
geometry[:1],
|
||||
@@ -688,10 +793,10 @@ def test_write_supported(tmpdir, naturalearth_lowres, driver):
|
||||
@pytest.mark.skipif(
|
||||
__gdal_version__ >= (3, 6, 0), reason="OpenFileGDB supports write for GDAL >= 3.6.0"
|
||||
)
|
||||
def test_write_unsupported(tmpdir, naturalearth_lowres):
|
||||
def test_write_unsupported(tmp_path, naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
filename = os.path.join(str(tmpdir), "test.gdb")
|
||||
filename = tmp_path / "test.gdb"
|
||||
|
||||
with pytest.raises(DataSourceError, match="does not support write functionality"):
|
||||
write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
|
||||
@@ -721,7 +826,7 @@ def assert_equal_result(result1, result2):
|
||||
|
||||
assert np.array_equal(meta1["fields"], meta2["fields"])
|
||||
assert np.array_equal(index1, index2)
|
||||
assert all([np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2)])
|
||||
assert all(np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2))
|
||||
|
||||
if HAS_SHAPELY:
|
||||
# a plain `assert np.array_equal(geometry1, geometry2)` doesn't work
|
||||
@@ -734,10 +839,10 @@ def assert_equal_result(result1, result2):
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
|
||||
@pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
|
||||
def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
|
||||
def test_read_from_bytes(tmp_path, naturalearth_lowres, driver, ext):
|
||||
meta, index, geometry, field_data = read(naturalearth_lowres)
|
||||
meta.update({"geometry_type": "Unknown"})
|
||||
filename = os.path.join(str(tmpdir), f"test.{ext}")
|
||||
filename = tmp_path / f"test.{ext}"
|
||||
write(filename, geometry, field_data, driver=driver, **meta)
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
@@ -747,7 +852,7 @@ def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
|
||||
assert_equal_result((meta, index, geometry, field_data), result2)
|
||||
|
||||
|
||||
def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
|
||||
def test_read_from_bytes_zipped(naturalearth_lowres_vsi):
|
||||
path, vsi_path = naturalearth_lowres_vsi
|
||||
meta, index, geometry, field_data = read(vsi_path)
|
||||
|
||||
@@ -760,10 +865,10 @@ def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
|
||||
@pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
|
||||
def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
|
||||
def test_read_from_file_like(tmp_path, naturalearth_lowres, driver, ext):
|
||||
meta, index, geometry, field_data = read(naturalearth_lowres)
|
||||
meta.update({"geometry_type": "Unknown"})
|
||||
filename = os.path.join(str(tmpdir), f"test.{ext}")
|
||||
filename = tmp_path / f"test.{ext}"
|
||||
write(filename, geometry, field_data, driver=driver, **meta)
|
||||
|
||||
with open(filename, "rb") as f:
|
||||
@@ -772,6 +877,12 @@ def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
|
||||
assert_equal_result((meta, index, geometry, field_data), result2)
|
||||
|
||||
|
||||
def test_read_from_nonseekable_bytes(nonseekable_bytes):
|
||||
meta, _, geometry, _ = read(nonseekable_bytes)
|
||||
assert meta["fields"].shape == (0,)
|
||||
assert len(geometry) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ext", ["gpkg", "fgb"])
|
||||
def test_read_write_data_types_numeric(tmp_path, ext):
|
||||
# Point(0, 0)
|
||||
@@ -787,13 +898,13 @@ def test_read_write_data_types_numeric(tmp_path, ext):
|
||||
np.array([1, 2, 3], dtype="float64"),
|
||||
]
|
||||
fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
|
||||
|
||||
filename = tmp_path / f"test.{ext}"
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
result = read(filename)[3]
|
||||
assert all([np.array_equal(f1, f2) for f1, f2 in zip(result, field_data)])
|
||||
assert all([f1.dtype == f2.dtype for f1, f2 in zip(result, field_data)])
|
||||
assert all(np.array_equal(f1, f2) for f1, f2 in zip(result, field_data))
|
||||
assert all(f1.dtype == f2.dtype for f1, f2 in zip(result, field_data))
|
||||
|
||||
# other integer data types that don't roundtrip exactly
|
||||
# these are generally promoted to a larger integer type except for uint64
|
||||
@@ -844,7 +955,7 @@ def test_read_write_datetime(tmp_path):
|
||||
geometry = np.array(
|
||||
[bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
|
||||
)
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
|
||||
|
||||
filename = tmp_path / "test.gpkg"
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
@@ -867,7 +978,7 @@ def test_read_write_int64_large(tmp_path, ext):
|
||||
)
|
||||
field_data = [np.array([1, 2192502720, -5], dtype="int64")]
|
||||
fields = ["overflow_int64"]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
|
||||
|
||||
filename = tmp_path / f"test.{ext}"
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
@@ -890,17 +1001,17 @@ def test_read_data_types_numeric_with_null(test_gpkg_nulls):
|
||||
assert field.dtype == "float64"
|
||||
|
||||
|
||||
def test_read_unsupported_types(test_ogr_types_list):
|
||||
fields = read(test_ogr_types_list)[3]
|
||||
def test_read_unsupported_types(list_field_values_file):
|
||||
fields = read(list_field_values_file)[3]
|
||||
# list field gets skipped, only integer field is read
|
||||
assert len(fields) == 1
|
||||
|
||||
fields = read(test_ogr_types_list, columns=["int64"])[3]
|
||||
fields = read(list_field_values_file, columns=["int64"])[3]
|
||||
assert len(fields) == 1
|
||||
|
||||
|
||||
def test_read_datetime_millisecond(test_datetime):
|
||||
field = read(test_datetime)[3][0]
|
||||
def test_read_datetime_millisecond(datetime_file):
|
||||
field = read(datetime_file)[3][0]
|
||||
assert field.dtype == "datetime64[ms]"
|
||||
assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
|
||||
assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
|
||||
@@ -929,13 +1040,14 @@ def test_read_unsupported_ext_with_prefix(tmp_path):
|
||||
assert field_data[0] == "data1"
|
||||
|
||||
|
||||
def test_read_datetime_as_string(test_datetime_tz):
|
||||
field = read(test_datetime_tz)[3][0]
|
||||
def test_read_datetime_as_string(datetime_tz_file):
|
||||
field = read(datetime_tz_file)[3][0]
|
||||
assert field.dtype == "datetime64[ms]"
|
||||
# timezone is ignored in numpy layer
|
||||
assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
|
||||
assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
|
||||
field = read(test_datetime_tz, datetime_as_string=True)[3][0]
|
||||
|
||||
field = read(datetime_tz_file, datetime_as_string=True)[3][0]
|
||||
assert field.dtype == "object"
|
||||
# GDAL doesn't return strings in ISO format (yet)
|
||||
assert field[0] == "2020/01/01 09:00:00.123-05"
|
||||
@@ -951,7 +1063,7 @@ def test_read_write_null_geometry(tmp_path, ext):
|
||||
)
|
||||
field_data = [np.array([1, 2], dtype="int32")]
|
||||
fields = ["col"]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326")
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
|
||||
if ext == "gpkg":
|
||||
meta["spatial_index"] = False
|
||||
|
||||
@@ -971,12 +1083,12 @@ def test_write_float_nan_null(tmp_path, dtype):
|
||||
)
|
||||
field_data = [np.array([1.5, np.nan], dtype=dtype)]
|
||||
fields = ["col"]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326")
|
||||
fname = tmp_path / "test.geojson"
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
|
||||
filename = tmp_path / "test.geojson"
|
||||
|
||||
# default nan_as_null=True
|
||||
write(fname, geometry, field_data, fields, **meta)
|
||||
with open(str(fname), "r") as f:
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert '{ "col": null }' in content
|
||||
|
||||
@@ -987,14 +1099,14 @@ def test_write_float_nan_null(tmp_path, dtype):
|
||||
else:
|
||||
ctx = contextlib.nullcontext()
|
||||
with ctx:
|
||||
write(fname, geometry, field_data, fields, **meta, nan_as_null=False)
|
||||
with open(str(fname), "r") as f:
|
||||
write(filename, geometry, field_data, fields, **meta, nan_as_null=False)
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert '"properties": { }' in content
|
||||
|
||||
# but can instruct GDAL to write NaN to json
|
||||
write(
|
||||
fname,
|
||||
filename,
|
||||
geometry,
|
||||
field_data,
|
||||
fields,
|
||||
@@ -1002,12 +1114,12 @@ def test_write_float_nan_null(tmp_path, dtype):
|
||||
nan_as_null=False,
|
||||
WRITE_NON_FINITE_VALUES="YES",
|
||||
)
|
||||
with open(str(fname), "r") as f:
|
||||
with open(filename) as f:
|
||||
content = f.read()
|
||||
assert '{ "col": NaN }' in content
|
||||
|
||||
|
||||
@requires_arrow_api
|
||||
@requires_pyarrow_api
|
||||
@pytest.mark.skipif(
|
||||
"Arrow" not in list_drivers(), reason="Arrow driver is not available"
|
||||
)
|
||||
@@ -1021,7 +1133,7 @@ def test_write_float_nan_null_arrow(tmp_path):
|
||||
)
|
||||
field_data = [np.array([1.5, np.nan], dtype="float64")]
|
||||
fields = ["col"]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326")
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
|
||||
fname = tmp_path / "test.arrow"
|
||||
|
||||
# default nan_as_null=True
|
||||
@@ -1039,6 +1151,112 @@ def test_write_float_nan_null_arrow(tmp_path):
|
||||
assert pc.is_nan(table["col"]).to_pylist() == [False, True]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
|
||||
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
||||
def test_write_memory(naturalearth_lowres, driver):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
meta.update({"geometry_type": "MultiPolygon"})
|
||||
|
||||
buffer = BytesIO()
|
||||
write(buffer, geometry, field_data, driver=driver, layer="test", **meta)
|
||||
|
||||
assert len(buffer.getbuffer()) > 0
|
||||
assert list_layers(buffer)[0][0] == "test"
|
||||
|
||||
actual_meta, _, actual_geometry, actual_field_data = read(buffer)
|
||||
|
||||
assert np.array_equal(actual_meta["fields"], meta["fields"])
|
||||
assert np.array_equal(actual_field_data, field_data)
|
||||
assert len(actual_geometry) == len(geometry)
|
||||
|
||||
|
||||
def test_write_memory_driver_required(naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
buffer = BytesIO()
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="driver must be provided to write to in-memory file",
|
||||
):
|
||||
write(buffer, geometry, field_data, driver=None, layer="test", **meta)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
|
||||
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
||||
if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
|
||||
pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
|
||||
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
buffer = BytesIO()
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=f"writing to in-memory file is not supported for {driver}"
|
||||
):
|
||||
write(
|
||||
buffer,
|
||||
geometry,
|
||||
field_data,
|
||||
driver=driver,
|
||||
layer="test",
|
||||
append=True,
|
||||
**meta,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
||||
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
meta.update({"geometry_type": "MultiPolygon"})
|
||||
|
||||
buffer = BytesIO()
|
||||
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="append is not supported for in-memory files"
|
||||
):
|
||||
write(
|
||||
buffer,
|
||||
geometry,
|
||||
field_data,
|
||||
driver=driver,
|
||||
layer="test",
|
||||
append=True,
|
||||
**meta,
|
||||
)
|
||||
|
||||
|
||||
def test_write_memory_existing_unsupported(naturalearth_lowres):
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
buffer = BytesIO(b"0000")
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="writing to existing in-memory object is not supported",
|
||||
):
|
||||
write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta)
|
||||
|
||||
|
||||
def test_write_open_file_handle(tmp_path, naturalearth_lowres):
|
||||
"""Verify that writing to an open file handle is not currently supported"""
|
||||
|
||||
meta, _, geometry, field_data = read(naturalearth_lowres)
|
||||
|
||||
# verify it fails for regular file handle
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="writing to an open file handle is not yet supported"
|
||||
):
|
||||
with open(tmp_path / "test.geojson", "wb") as f:
|
||||
write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
|
||||
|
||||
# verify it fails for ZipFile
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="writing to an open file handle is not yet supported"
|
||||
):
|
||||
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
|
||||
with z.open("test.geojson", "w") as f:
|
||||
write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"])
|
||||
@pytest.mark.parametrize(
|
||||
"read_encoding,write_encoding",
|
||||
@@ -1075,7 +1293,7 @@ def test_encoding_io(tmp_path, ext, read_encoding, write_encoding):
|
||||
np.array([mandarin], dtype=object),
|
||||
]
|
||||
fields = [arabic, cree, mandarin]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326", encoding=write_encoding)
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": write_encoding}
|
||||
|
||||
filename = tmp_path / f"test.{ext}"
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
@@ -1125,7 +1343,7 @@ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
|
||||
# character level) by GDAL when output to shapefile, so we have to truncate
|
||||
# before writing
|
||||
fields = [arabic[:5], cree[:3], mandarin]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326", encoding="UTF-8")
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": "UTF-8"}
|
||||
|
||||
filename = tmp_path / "test.shp"
|
||||
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
|
||||
@@ -1141,7 +1359,7 @@ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
|
||||
# verify that if cpg file is not present, that user-provided encoding is used,
|
||||
# otherwise it defaults to ISO-8859-1
|
||||
if read_encoding is not None:
|
||||
os.unlink(str(filename).replace(".shp", ".cpg"))
|
||||
filename.with_suffix(".cpg").unlink()
|
||||
actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
|
||||
assert np.array_equal(fields, actual_meta["fields"])
|
||||
assert np.array_equal(field_data, actual_field_data)
|
||||
@@ -1150,6 +1368,97 @@ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
||||
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
|
||||
"""Verify that we write non-UTF data to the data source
|
||||
|
||||
IMPORTANT: this may not be valid for the data source and will likely render
|
||||
them unusable in other tools, but should successfully roundtrip unless we
|
||||
disable writing using other encodings.
|
||||
|
||||
NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
|
||||
"""
|
||||
encoding, text = encoded_text
|
||||
|
||||
# Point(0, 0)
|
||||
geometry = np.array(
|
||||
[bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
|
||||
)
|
||||
|
||||
field_data = [np.array([text], dtype=object)]
|
||||
|
||||
fields = [text]
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
|
||||
|
||||
filename = tmp_path / f"test.{ext}"
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
|
||||
# cannot open these files without specifying encoding
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
read(filename)
|
||||
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
read_info(filename)
|
||||
|
||||
# must provide encoding to read these properly
|
||||
actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
|
||||
assert actual_meta["fields"][0] == text
|
||||
assert actual_field_data[0] == text
|
||||
assert read_info(filename, encoding=encoding)["fields"][0] == text
|
||||
|
||||
|
||||
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
|
||||
encoding, text = encoded_text
|
||||
|
||||
# Point(0, 0)
|
||||
geometry = np.array(
|
||||
[bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
|
||||
)
|
||||
|
||||
field_data = [np.array([text], dtype=object)]
|
||||
|
||||
fields = [text]
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
|
||||
|
||||
filename = tmp_path / "test.shp"
|
||||
write(filename, geometry, field_data, fields, **meta)
|
||||
|
||||
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
|
||||
# means that if we read this without specifying the encoding it uses the
|
||||
# correct one
|
||||
actual_meta, _, _, actual_field_data = read(filename)
|
||||
assert actual_meta["fields"][0] == text
|
||||
assert actual_field_data[0] == text
|
||||
assert read_info(filename)["fields"][0] == text
|
||||
|
||||
# verify that if cpg file is not present, that user-provided encoding must be used
|
||||
filename.with_suffix(".cpg").unlink()
|
||||
|
||||
# We will assume ISO-8859-1, which is wrong
|
||||
miscoded = text.encode(encoding).decode("ISO-8859-1")
|
||||
bad_meta, _, _, bad_field_data = read(filename)
|
||||
assert bad_meta["fields"][0] == miscoded
|
||||
assert bad_field_data[0] == miscoded
|
||||
assert read_info(filename)["fields"][0] == miscoded
|
||||
|
||||
# If encoding is provided, that should yield correct text
|
||||
actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
|
||||
assert actual_meta["fields"][0] == text
|
||||
assert actual_field_data[0] == text
|
||||
assert read_info(filename, encoding=encoding)["fields"][0] == text
|
||||
|
||||
# verify that setting encoding does not corrupt SHAPE_ENCODING option if set
|
||||
# globally (it is ignored during read when encoding is specified by user)
|
||||
try:
|
||||
set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
|
||||
_ = read(filename, encoding=encoding)
|
||||
assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
|
||||
|
||||
finally:
|
||||
# reset to clear between tests
|
||||
set_gdal_config_options({"SHAPE_ENCODING": None})
|
||||
|
||||
|
||||
def test_write_with_mask(tmp_path):
|
||||
# Point(0, 0), null
|
||||
geometry = np.array(
|
||||
@@ -1159,7 +1468,7 @@ def test_write_with_mask(tmp_path):
|
||||
field_data = [np.array([1, 2, 3], dtype="int32")]
|
||||
field_mask = [np.array([False, True, False])]
|
||||
fields = ["col"]
|
||||
meta = dict(geometry_type="Point", crs="EPSG:4326")
|
||||
meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
|
||||
|
||||
filename = tmp_path / "test.geojson"
|
||||
write(filename, geometry, field_data, fields, field_mask, **meta)
|
||||
@@ -1176,3 +1485,31 @@ def test_write_with_mask(tmp_path):
|
||||
field_mask = [np.array([False, True, False])] * 2
|
||||
with pytest.raises(ValueError):
|
||||
write(filename, geometry, field_data, fields, field_mask, **meta)
|
||||
|
||||
|
||||
@requires_arrow_api
|
||||
def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
|
||||
# this test is included here instead of test_arrow.py to ensure we also run
|
||||
# it when pyarrow is not installed
|
||||
|
||||
with open_arrow(naturalearth_lowres) as (meta, reader):
|
||||
assert isinstance(meta, dict)
|
||||
assert isinstance(reader, pyogrio._io._ArrowStream)
|
||||
capsule = reader.__arrow_c_stream__()
|
||||
assert (
|
||||
ctypes.pythonapi.PyCapsule_IsValid(
|
||||
ctypes.py_object(capsule), b"arrow_array_stream"
|
||||
)
|
||||
== 1
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed")
|
||||
@requires_arrow_api
|
||||
def test_open_arrow_error_no_pyarrow(naturalearth_lowres):
|
||||
# this test is included here instead of test_arrow.py to ensure we run
|
||||
# it when pyarrow is not installed
|
||||
|
||||
with pytest.raises(ImportError):
|
||||
with open_arrow(naturalearth_lowres, use_pyarrow=True) as _:
|
||||
pass
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
"""Run pytest tests manually on Windows due to import errors
|
||||
"""
|
||||
from pathlib import Path
|
||||
import platform
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
|
||||
data_dir = Path(__file__).parent.resolve() / "fixtures"
|
||||
|
||||
if platform.system() == "Windows":
|
||||
|
||||
naturalearth_lowres = data_dir / Path("naturalearth_lowres/naturalearth_lowres.shp")
|
||||
test_fgdb_vsi = f"/vsizip/{data_dir}/test_fgdb.gdb.zip"
|
||||
|
||||
from pyogrio.tests.test_core import test_read_info
|
||||
|
||||
try:
|
||||
test_read_info(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
from pyogrio.tests.test_raw_io import (
|
||||
test_read,
|
||||
test_read_no_geometry,
|
||||
test_read_columns,
|
||||
test_read_skip_features,
|
||||
test_read_max_features,
|
||||
test_read_where,
|
||||
test_read_where_invalid,
|
||||
test_write,
|
||||
test_write_gpkg,
|
||||
test_write_geojson,
|
||||
)
|
||||
|
||||
try:
|
||||
test_read(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_no_geometry(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_columns(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_skip_features(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_max_features(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_where(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
try:
|
||||
test_read_where_invalid(naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
test_write(tmpdir, naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
test_write_gpkg(tmpdir, naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
test_write_geojson(tmpdir, naturalearth_lowres)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
Reference in New Issue
Block a user