This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -1,41 +1,65 @@
"""Utility functions."""
import re
import sys
from packaging.version import Version
from pathlib import Path
from typing import Union
from urllib.parse import urlparse
from packaging.version import Version
from pyogrio._env import GDALEnv
with GDALEnv():
from pyogrio._ogr import buffer_to_virtual_file
from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel
def get_vsi_path(path_or_buffer):
def get_vsi_path_or_buffer(path_or_buffer):
"""Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer.
If path_or_buffer is a bytes object, it will be returned directly and will
be read into an in-memory dataset when passed to one of the Cython functions.
If path_or_buffer is a file-like object with a read method, bytes will be
read from the file-like object and returned.
Otherwise, it will be converted to a string, and parsed to prefix with
appropriate GDAL /vsi*/ prefixes.
Parameters
----------
path_or_buffer : str, pathlib.Path, bytes, or file-like
A dataset path or URI, raw buffer, or file-like object with a read method.
Returns
-------
str or bytes
"""
# treat Path objects here already to ignore their read method + to avoid backslashes
# on Windows.
if isinstance(path_or_buffer, Path):
return vsi_path(path_or_buffer)
if isinstance(path_or_buffer, bytes):
return path_or_buffer
if hasattr(path_or_buffer, "read"):
path_or_buffer = path_or_buffer.read()
bytes_buffer = path_or_buffer.read()
buffer = None
if isinstance(path_or_buffer, bytes):
buffer = path_or_buffer
ext = ""
is_zipped = path_or_buffer[:4].startswith(b"PK\x03\x04")
if is_zipped:
ext = ".zip"
path = buffer_to_virtual_file(path_or_buffer, ext=ext)
if is_zipped:
path = "/vsizip/" + path
else:
path = vsi_path(str(path_or_buffer))
# rewind buffer if possible so that subsequent operations do not need to rewind
if hasattr(path_or_buffer, "seekable") and path_or_buffer.seekable():
path_or_buffer.seek(0)
return path, buffer
return bytes_buffer
return vsi_path(str(path_or_buffer))
def vsi_path(path: str) -> str:
"""
Ensure path is a local path or a GDAL-compatible vsi path.
"""
def vsi_path(path: Union[str, Path]) -> str:
"""Ensure path is a local path or a GDAL-compatible VSI path."""
# Convert Path objects to string, but for VSI paths, keep posix style path.
if isinstance(path, Path):
if sys.platform == "win32" and path.as_posix().startswith("/vsi"):
path = path.as_posix()
else:
path = str(path)
# path is already in GDAL format
if path.startswith("/vsi"):
@@ -78,12 +102,11 @@ SCHEMES = {
# those are for now not added as supported URI
}
CURLSCHEMES = set([k for k, v in SCHEMES.items() if v == "curl"])
CURLSCHEMES = {k for k, v in SCHEMES.items() if v == "curl"}
def _parse_uri(path: str):
"""
Parse a URI
"""Parse a URI.
Returns a tuples of (path, archive, scheme)
@@ -95,7 +118,7 @@ def _parse_uri(path: str):
scheme : str
URI scheme such as "https" or "zip+s3".
"""
parts = urlparse(path)
parts = urlparse(path, allow_fragments=False)
# if the scheme is not one of GDAL's supported schemes, return raw path
if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split("+")):
@@ -118,8 +141,7 @@ def _parse_uri(path: str):
def _construct_vsi_path(path, archive, scheme) -> str:
"""Convert a parsed path to a GDAL VSI path"""
"""Convert a parsed path to a GDAL VSI path."""
prefix = ""
suffix = ""
schemes = scheme.split("+")
@@ -128,9 +150,7 @@ def _construct_vsi_path(path, archive, scheme) -> str:
schemes.insert(0, "zip")
if schemes:
prefix = "/".join(
"vsi{0}".format(SCHEMES[p]) for p in schemes if p and p != "file"
)
prefix = "/".join(f"vsi{SCHEMES[p]}" for p in schemes if p and p != "file")
if schemes[-1] in CURLSCHEMES:
suffix = f"{schemes[-1]}://"
@@ -139,15 +159,15 @@ def _construct_vsi_path(path, archive, scheme) -> str:
if archive:
return "/{}/{}{}/{}".format(prefix, suffix, archive, path.lstrip("/"))
else:
return "/{}/{}{}".format(prefix, suffix, path)
return f"/{prefix}/{suffix}{path}"
return path
def _preprocess_options_key_value(options):
"""
Preprocess options, eg `spatial_index=True` gets converted
to `SPATIAL_INDEX="YES"`.
"""Preprocess options.
For example, `spatial_index=True` gets converted to `SPATIAL_INDEX="YES"`.
"""
if not isinstance(options, dict):
raise TypeError(f"Expected options to be a dict, got {type(options)}")
@@ -171,6 +191,7 @@ def _mask_to_wkb(mask):
Parameters
----------
mask : Shapely geometry
The geometry to convert to WKB.
Returns
-------
@@ -181,8 +202,8 @@ def _mask_to_wkb(mask):
ValueError
raised if Shapely >= 2.0 is not available or mask is not a Shapely
Geometry object
"""
"""
if mask is None:
return mask
@@ -201,3 +222,26 @@ def _mask_to_wkb(mask):
raise ValueError("'mask' parameter must be a Shapely geometry")
return shapely.to_wkb(mask)
def vsimem_rmtree_toplevel(path: Union[str, Path]):
"""Remove the parent directory of the file path recursively.
This is used for final cleanup of an in-memory dataset, which may have been
created within a directory to contain sibling files.
Additional VSI handlers may be chained to the left of /vsimem/ in path and
will be ignored.
Remark: function is defined here to be able to run tests on it.
Parameters
----------
path : str or pathlib.Path
path to in-memory file
"""
if isinstance(path, Path):
path = path.as_posix()
_vsimem_rmtree_toplevel(path)