#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION """IO support for OGR vector data sources """ import contextlib import datetime import locale import logging import math import os import warnings from libc.stdint cimport uint8_t, uintptr_t from libc.stdlib cimport malloc, free from libc.string cimport strlen from libc.math cimport isnan cimport cython import numpy as np cimport numpy as np from pyogrio._ogr cimport * from pyogrio._err cimport * from pyogrio._err import CPLE_BaseError, CPLE_NotSupportedError, NullPointerError from pyogrio._geometry cimport get_geometry_type, get_geometry_type_code from pyogrio.errors import CRSError, DataSourceError, DataLayerError, GeometryError, FieldError, FeatureError log = logging.getLogger(__name__) # Mapping of OGR integer field types to Python field type names # (index in array is the integer field type) FIELD_TYPES = [ 'int32', # OFTInteger, Simple 32bit integer None, # OFTIntegerList, List of 32bit integers, not supported 'float64', # OFTReal, Double Precision floating point None, # OFTRealList, List of doubles, not supported 'object', # OFTString, String of UTF-8 chars None, # OFTStringList, Array of strings, not supported None, # OFTWideString, deprecated, not supported None, # OFTWideStringList, deprecated, not supported 'object', # OFTBinary, Raw Binary data 'datetime64[D]', # OFTDate, Date None, # OFTTime, Time, NOTE: not directly supported in numpy 'datetime64[ms]',# OFTDateTime, Date and Time 'int64', # OFTInteger64, Single 64bit integer None # OFTInteger64List, List of 64bit integers, not supported ] FIELD_SUBTYPES = { OFSTNone: None, # No subtype OFSTBoolean: "bool", # Boolean integer OFSTInt16: "int16", # Signed 16-bit integer OFSTFloat32: "float32", # Single precision (32 bit) floating point } # Mapping of numpy ndarray dtypes to (field type, subtype) DTYPE_OGR_FIELD_TYPES = { 'int8': (OFTInteger, OFSTInt16), 'int16': (OFTInteger, OFSTInt16), 'int32': (OFTInteger, OFSTNone), 'int': (OFTInteger64, OFSTNone), 'int64': (OFTInteger64, OFSTNone), # unsigned ints have to be converted to ints; these are converted # to the next largest integer size 'uint8': (OFTInteger, OFSTInt16), 'uint16': (OFTInteger, OFSTNone), 'uint32': (OFTInteger64, OFSTNone), # TODO: these might get truncated, check maximum value and raise error 'uint': (OFTInteger64, OFSTNone), 'uint64': (OFTInteger64, OFSTNone), # bool is handled as integer with boolean subtype 'bool': (OFTInteger, OFSTBoolean), 'float32': (OFTReal,OFSTFloat32), 'float': (OFTReal, OFSTNone), 'float64': (OFTReal, OFSTNone), 'datetime64[D]': (OFTDate, OFSTNone), 'datetime64': (OFTDateTime, OFSTNone), } cdef int start_transaction(OGRDataSourceH ogr_dataset, int force) except 1: cdef int err = GDALDatasetStartTransaction(ogr_dataset, force) if err == OGRERR_FAILURE: raise DataSourceError("Failed to start transaction") return 0 cdef int commit_transaction(OGRDataSourceH ogr_dataset) except 1: cdef int err = GDALDatasetCommitTransaction(ogr_dataset) if err == OGRERR_FAILURE: raise DataSourceError("Failed to commit transaction") return 0 # Not currently used; uncomment when used # cdef int rollback_transaction(OGRDataSourceH ogr_dataset) except 1: # cdef int err = GDALDatasetRollbackTransaction(ogr_dataset) # if err == OGRERR_FAILURE: # raise DataSourceError("Failed to rollback transaction") # return 0 cdef char** dict_to_options(object values): """Convert a python dictionary into name / value pairs (stored in a char**) Parameters ---------- values: dict all keys and values must be strings Returns ------- char** """ cdef char **options = NULL if values is None: return NULL for k, v in values.items(): k = k.encode('UTF-8') v = v.encode('UTF-8') options = CSLAddNameValue(options, k, v) return options cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL: cdef void* ogr_dataset = NULL # Force linear approximations in all cases OGRSetNonLinearGeometriesEnabledFlag(0) flags = GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR if mode == 1: flags |= GDAL_OF_UPDATE else: flags |= GDAL_OF_READONLY try: # WARNING: GDAL logs warnings about invalid open options to stderr # instead of raising an error ogr_dataset = exc_wrap_pointer( GDALOpenEx(path_c, flags, NULL, options, NULL) ) return ogr_dataset except NullPointerError: raise DataSourceError( "Failed to open dataset (mode={}): {}".format(mode, path_c.decode("utf-8")) ) from None except CPLE_BaseError as exc: if str(exc).endswith("not recognized as a supported file format."): raise DataSourceError( f"{str(exc)} It might help to specify the correct driver explicitly by " "prefixing the file path with ':', e.g. 'CSV:path'." ) from None raise DataSourceError(str(exc)) from None cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: """Open OGR layer by index or name. Parameters ---------- ogr_dataset : pointer to open OGR dataset layer : str or int name or index of layer Returns ------- pointer to OGR layer """ cdef OGRLayerH ogr_layer = NULL try: if isinstance(layer, str): name_b = layer.encode('utf-8') name_c = name_b ogr_layer = exc_wrap_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c)) elif isinstance(layer, int): ogr_layer = exc_wrap_pointer(GDALDatasetGetLayer(ogr_dataset, layer)) # GDAL does not always raise exception messages in this case except NullPointerError: raise DataLayerError(f"Layer '{layer}' could not be opened") from None except CPLE_BaseError as exc: raise DataLayerError(str(exc)) # if the driver is OSM, we need to execute SQL to set the layer to read in # order to read it properly if get_driver(ogr_dataset) == "OSM": # Note: this returns NULL and does not need to be freed via # GDALDatasetReleaseResultSet() layer_name = get_string(OGR_L_GetName(ogr_layer)) sql_b = f"SET interest_layers = {layer_name}".encode('utf-8') sql_c = sql_b GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL) return ogr_layer cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, str sql, str sql_dialect=None) except NULL: """Execute an SQL statement on a dataset. Parameters ---------- ogr_dataset : pointer to open OGR dataset sql : str The sql statement to execute sql_dialect : str, optional (default: None) The sql dialect the sql statement is written in Returns ------- pointer to OGR layer """ try: sql_b = sql.encode('utf-8') sql_c = sql_b if sql_dialect is None: return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)) sql_dialect_b = sql_dialect.encode('utf-8') sql_dialect_c = sql_dialect_b return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c)) # GDAL does not always raise exception messages in this case except NullPointerError: raise DataLayerError(f"Error executing sql '{sql}'") from None except CPLE_BaseError as exc: raise DataLayerError(str(exc)) cdef str get_crs(OGRLayerH ogr_layer): """Read CRS from layer as EPSG: if available or WKT. Parameters ---------- ogr_layer : pointer to open OGR layer Returns ------- str or None EPSG: or WKT """ cdef void *ogr_crs = NULL cdef const char *authority_key = NULL cdef const char *authority_val = NULL cdef char *ogr_wkt = NULL try: ogr_crs = exc_wrap_pointer(OGR_L_GetSpatialRef(ogr_layer)) except NullPointerError: # No coordinate system defined. # This is expected and valid for nonspatial tables. return None except CPLE_BaseError as exc: raise CRSError(str(exc)) # If CRS can be decoded to an EPSG code, use that. # The following pointers will be NULL if it cannot be decoded. retval = OSRAutoIdentifyEPSG(ogr_crs) authority_key = OSRGetAuthorityName(ogr_crs, NULL) authority_val = OSRGetAuthorityCode(ogr_crs, NULL) if authority_key != NULL and authority_val != NULL: key = get_string(authority_key) if key == 'EPSG': value = get_string(authority_val) return f"EPSG:{value}" try: OSRExportToWkt(ogr_crs, &ogr_wkt) if ogr_wkt == NULL: raise CRSError("CRS could not be extracted as WKT") from None wkt = get_string(ogr_wkt) finally: CPLFree(ogr_wkt) return wkt cdef get_driver(OGRDataSourceH ogr_dataset): """Get the driver for a dataset. Parameters ---------- ogr_dataset : pointer to open OGR dataset Returns ------- str or None """ cdef void *ogr_driver try: ogr_driver = exc_wrap_pointer(GDALGetDatasetDriver(ogr_dataset)) except NullPointerError: raise DataLayerError(f"Could not detect driver of dataset") from None except CPLE_BaseError as exc: raise DataLayerError(str(exc)) driver = OGR_Dr_GetName(ogr_driver).decode("UTF-8") return driver cdef get_feature_count(OGRLayerH ogr_layer, int force): """Get the feature count of a layer. If GDAL returns an unknown count (-1), this iterates over every feature to calculate the count. Parameters ---------- ogr_layer : pointer to open OGR layer force : bool True if the feature count should be computed even if it is expensive Returns ------- int count of features """ cdef OGRFeatureH ogr_feature = NULL cdef int feature_count = OGR_L_GetFeatureCount(ogr_layer, force) # if GDAL refuses to give us the feature count, we have to loop over all # features ourselves and get the count. This can happen for some drivers # (e.g., OSM) or if a where clause is invalid but not rejected as error if force and feature_count == -1: # make sure layer is read from beginning OGR_L_ResetReading(ogr_layer) feature_count = 0 while True: try: ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer)) feature_count +=1 except NullPointerError: # No more rows available, so stop reading break # driver may raise other errors, e.g., for OSM if node ids are not # increasing, the default config option OSM_USE_CUSTOM_INDEXING=YES # causes errors iterating over features except CPLE_BaseError as exc: # if an invalid where clause is used for a GPKG file, it is not # caught as an error until attempting to iterate over features; # catch it here if "failed to prepare SQL" in str(exc): raise ValueError(f"Invalid SQL query: {str(exc)}") from None raise DataLayerError(f"Could not iterate over features: {str(exc)}") from None finally: if ogr_feature != NULL: OGR_F_Destroy(ogr_feature) ogr_feature = NULL return feature_count cdef get_total_bounds(OGRLayerH ogr_layer, int force): """Get the total bounds of a layer. Parameters ---------- ogr_layer : pointer to open OGR layer force : bool True if the total bounds should be computed even if it is expensive Returns ------- tuple of (xmin, ymin, xmax, ymax) or None The total bounds of the layer, or None if they could not be determined. """ cdef OGREnvelope ogr_envelope try: exc_wrap_ogrerr(OGR_L_GetExtent(ogr_layer, &ogr_envelope, force)) bounds = ( ogr_envelope.MinX, ogr_envelope.MinY, ogr_envelope.MaxX, ogr_envelope.MaxY ) except CPLE_BaseError: bounds = None return bounds cdef set_metadata(GDALMajorObjectH obj, object metadata): """Set metadata on a dataset or layer Parameters ---------- obj : pointer to dataset or layer metadata : dict, optional (default None) keys and values must be strings """ cdef char **metadata_items = NULL cdef int err = 0 metadata_items = dict_to_options(metadata) if metadata_items != NULL: # only default namepace is currently supported err = GDALSetMetadata(obj, metadata_items, NULL) CSLDestroy(metadata_items) metadata_items = NULL if err: raise RuntimeError("Could not set metadata") from None cdef get_metadata(GDALMajorObjectH obj): """Get metadata for a dataset or layer Parameters ---------- obj : pointer to dataset or layer Returns ------- dict or None metadata as key, value pairs """ # only default namespace is currently supported cdef char **metadata = GDALGetMetadata(obj, NULL) if metadata != NULL: return dict( metadata[i].decode('UTF-8').split('=', 1) for i in range(CSLCount(metadata)) ) return None cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer): """Attempt to detect the encoding of the layer. If it supports UTF-8, use that. If it is a shapefile, it must otherwise be ISO-8859-1. Parameters ---------- ogr_dataset : pointer to open OGR dataset ogr_layer : pointer to open OGR layer Returns ------- str or None """ if OGR_L_TestCapability(ogr_layer, OLCStringsAsUTF8): return 'UTF-8' driver = get_driver(ogr_dataset) if driver == 'ESRI Shapefile': return 'ISO-8859-1' if driver == "OSM": # always set OSM data to UTF-8 # per https://help.openstreetmap.org/questions/2172/what-encoding-does-openstreetmap-use return "UTF-8" return None cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False): """Get field names and types for layer. Parameters ---------- ogr_layer : pointer to open OGR layer encoding : str encoding to use when reading field name use_arrow : bool, default False If using arrow, all types are supported, and we don't have to raise warnings Returns ------- ndarray(n, 4) array of index, ogr type, name, numpy type """ cdef int i cdef int field_count cdef OGRFeatureDefnH ogr_featuredef = NULL cdef OGRFieldDefnH ogr_fielddef = NULL cdef int field_subtype cdef const char *key_c try: ogr_featuredef = exc_wrap_pointer(OGR_L_GetLayerDefn(ogr_layer)) except NullPointerError: raise DataLayerError("Could not get layer definition") from None except CPLE_BaseError as exc: raise DataLayerError(str(exc)) field_count = OGR_FD_GetFieldCount(ogr_featuredef) fields = np.empty(shape=(field_count, 4), dtype=object) fields_view = fields[:,:] skipped_fields = False for i in range(field_count): try: ogr_fielddef = exc_wrap_pointer(OGR_FD_GetFieldDefn(ogr_featuredef, i)) except NullPointerError: raise FieldError(f"Could not get field definition for field at index {i}") from None except CPLE_BaseError as exc: raise FieldError(str(exc)) field_name = get_string(OGR_Fld_GetNameRef(ogr_fielddef), encoding=encoding) field_type = OGR_Fld_GetType(ogr_fielddef) np_type = FIELD_TYPES[field_type] if not np_type and not use_arrow: skipped_fields = True log.warning( f"Skipping field {field_name}: unsupported OGR type: {field_type}") continue field_subtype = OGR_Fld_GetSubType(ogr_fielddef) subtype = FIELD_SUBTYPES.get(field_subtype) if subtype is not None: # bool, int16, float32 dtypes np_type = subtype fields_view[i,0] = i fields_view[i,1] = field_type fields_view[i,2] = field_name fields_view[i,3] = np_type if skipped_fields: # filter out skipped fields mask = np.array([idx is not None for idx in fields[:, 0]]) fields = fields[mask] return fields cdef apply_where_filter(OGRLayerH ogr_layer, str where): """Applies where filter to layer. WARNING: GDAL does not raise an error for GPKG when SQL query is invalid but instead only logs to stderr. Parameters ---------- ogr_layer : pointer to open OGR layer where : str See http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html restricted_where for more information about valid expressions. Raises ------ ValueError: if SQL query is not valid """ where_b = where.encode('utf-8') where_c = where_b err = OGR_L_SetAttributeFilter(ogr_layer, where_c) # WARNING: GDAL does not raise this error for GPKG but instead only # logs to stderr if err != OGRERR_NONE: try: exc_check() except CPLE_BaseError as exc: raise ValueError(str(exc)) raise ValueError(f"Invalid SQL query for layer '{OGR_L_GetName(ogr_layer)}': '{where}'") cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox): """Applies bounding box spatial filter to layer. Parameters ---------- ogr_layer : pointer to open OGR layer bbox: list or tuple of xmin, ymin, xmax, ymax Raises ------ ValueError: if bbox is not a list or tuple or does not have proper number of items """ if not (isinstance(bbox, (tuple, list)) and len(bbox) == 4): raise ValueError(f"Invalid bbox: {bbox}") xmin, ymin, xmax, ymax = bbox OGR_L_SetSpatialFilterRect(ogr_layer, xmin, ymin, xmax, ymax) cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb): """Applies geometry spatial filter to layer. Parameters ---------- ogr_layer : pointer to open OGR layer wkb: WKB encoding of geometry """ cdef OGRGeometryH ogr_geometry = NULL cdef unsigned char *wkb_buffer = wkb err = OGR_G_CreateFromWkb(wkb_buffer, NULL, &ogr_geometry, len(wkb)) if err: if ogr_geometry != NULL: OGR_G_DestroyGeometry(ogr_geometry) raise GeometryError("Could not create mask geometry") from None OGR_L_SetSpatialFilter(ogr_layer, ogr_geometry) OGR_G_DestroyGeometry(ogr_geometry) cdef validate_feature_range(OGRLayerH ogr_layer, int skip_features=0, int max_features=0): """Limit skip_features and max_features to bounds available for dataset. This is typically performed after applying where and spatial filters, which reduce the available range of features. Parameters ---------- ogr_layer : pointer to open OGR layer skip_features : number of features to skip from beginning of available range max_features : maximum number of features to read from available range """ feature_count = get_feature_count(ogr_layer, 1) num_features = max_features if feature_count == 0: return 0, 0 if skip_features >= feature_count: skip_features = feature_count elif max_features == 0: num_features = feature_count - skip_features elif max_features > feature_count: num_features = feature_count return skip_features, num_features @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. cdef process_geometry(OGRFeatureH ogr_feature, int i, geom_view, uint8_t force_2d): cdef OGRGeometryH ogr_geometry = NULL cdef OGRwkbGeometryType ogr_geometry_type cdef unsigned char *wkb = NULL cdef int ret_length ogr_geometry = OGR_F_GetGeometryRef(ogr_feature) if ogr_geometry == NULL: geom_view[i] = None else: try: ogr_geometry_type = OGR_G_GetGeometryType(ogr_geometry) # if geometry has M values, these need to be removed first if (OGR_G_IsMeasured(ogr_geometry)): OGR_G_SetMeasured(ogr_geometry, 0) if force_2d and OGR_G_Is3D(ogr_geometry): OGR_G_Set3D(ogr_geometry, 0) # if non-linear (e.g., curve), force to linear type if OGR_GT_IsNonLinear(ogr_geometry_type): ogr_geometry = OGR_G_GetLinearGeometry(ogr_geometry, 0, NULL) ret_length = OGR_G_WkbSize(ogr_geometry) wkb = malloc(sizeof(unsigned char)*ret_length) OGR_G_ExportToWkb(ogr_geometry, 1, wkb) geom_view[i] = wkb[:ret_length] finally: free(wkb) @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. cdef process_fields( OGRFeatureH ogr_feature, int i, int n_fields, object field_data, object field_data_view, object field_indexes, object field_ogr_types, encoding, bint datetime_as_string ): cdef int j cdef int success cdef int field_index cdef int ret_length cdef GByte *bin_value cdef int year = 0 cdef int month = 0 cdef int day = 0 cdef int hour = 0 cdef int minute = 0 cdef float fsecond = 0.0 cdef int timezone = 0 for j in range(n_fields): field_index = field_indexes[j] field_type = field_ogr_types[j] data = field_data_view[j] isnull = OGR_F_IsFieldSetAndNotNull(ogr_feature, field_index) == 0 if isnull: if field_type in (OFTInteger, OFTInteger64, OFTReal): # if a boolean or integer type, have to cast to float to hold # NaN values if data.dtype.kind in ('b', 'i', 'u'): field_data[j] = field_data[j].astype(np.float64) field_data_view[j] = field_data[j][:] field_data_view[j][i] = np.nan else: data[i] = np.nan elif field_type in ( OFTDate, OFTDateTime) and not datetime_as_string: data[i] = np.datetime64('NaT') else: data[i] = None continue if field_type == OFTInteger: data[i] = OGR_F_GetFieldAsInteger(ogr_feature, field_index) elif field_type == OFTInteger64: data[i] = OGR_F_GetFieldAsInteger64(ogr_feature, field_index) elif field_type == OFTReal: data[i] = OGR_F_GetFieldAsDouble(ogr_feature, field_index) elif field_type == OFTString: value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding) data[i] = value elif field_type == OFTBinary: bin_value = OGR_F_GetFieldAsBinary(ogr_feature, field_index, &ret_length) data[i] = bin_value[:ret_length] elif field_type == OFTDateTime or field_type == OFTDate: if datetime_as_string: # defer datetime parsing to user/ pandas layer # Update to OGR_F_GetFieldAsISO8601DateTime when GDAL 3.7+ only data[i] = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding) else: success = OGR_F_GetFieldAsDateTimeEx( ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone) ms, ss = math.modf(fsecond) second = int(ss) # fsecond has millisecond accuracy microsecond = round(ms * 1000) * 1000 if not success: data[i] = np.datetime64('NaT') elif field_type == OFTDate: data[i] = datetime.date(year, month, day).isoformat() elif field_type == OFTDateTime: data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat() @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. cdef get_features( OGRLayerH ogr_layer, object[:,:] fields, encoding, uint8_t read_geometry, uint8_t force_2d, int skip_features, int num_features, uint8_t return_fids, bint datetime_as_string ): cdef OGRFeatureH ogr_feature = NULL cdef int n_fields cdef int i cdef int field_index # make sure layer is read from beginning OGR_L_ResetReading(ogr_layer) if skip_features > 0: OGR_L_SetNextByIndex(ogr_layer, skip_features) if return_fids: fid_data = np.empty(shape=(num_features), dtype=np.int64) fid_view = fid_data[:] else: fid_data = None if read_geometry: geometries = np.empty(shape=(num_features, ), dtype='object') geom_view = geometries[:] else: geometries = None n_fields = fields.shape[0] field_indexes = fields[:,0] field_ogr_types = fields[:,1] field_data = [ np.empty(shape=(num_features, ), dtype = ("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3]) ) for field_index in range(n_fields) ] field_data_view = [field_data[field_index][:] for field_index in range(n_fields)] if num_features == 0: return fid_data, geometries, field_data i = 0 while True: try: if num_features > 0 and i == num_features: break try: ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer)) except NullPointerError: # No more rows available, so stop reading break except CPLE_BaseError as exc: raise FeatureError(str(exc)) if i >= num_features: raise FeatureError( "GDAL returned more records than expected based on the count of " "records that may meet your combination of filters against this " "dataset. Please open an issue on Github " "(https://github.com/geopandas/pyogrio/issues) to report encountering " "this error." ) from None if return_fids: fid_view[i] = OGR_F_GetFID(ogr_feature) if read_geometry: process_geometry(ogr_feature, i, geom_view, force_2d) process_fields( ogr_feature, i, n_fields, field_data, field_data_view, field_indexes, field_ogr_types, encoding, datetime_as_string ) i += 1 finally: if ogr_feature != NULL: OGR_F_Destroy(ogr_feature) ogr_feature = NULL # There may be fewer rows available than expected from OGR_L_GetFeatureCount, # such as features with bounding boxes that intersect the bbox # but do not themselves intersect the bbox. # Empty rows are dropped. if i < num_features: if return_fids: fid_data = fid_data[:i] if read_geometry: geometries = geometries[:i] field_data = [data_field[:i] for data_field in field_data] return fid_data, geometries, field_data @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. cdef get_features_by_fid( OGRLayerH ogr_layer, int[:] fids, object[:,:] fields, encoding, uint8_t read_geometry, uint8_t force_2d, bint datetime_as_string ): cdef OGRFeatureH ogr_feature = NULL cdef int n_fields cdef int i cdef int fid cdef int field_index cdef int count = len(fids) # make sure layer is read from beginning OGR_L_ResetReading(ogr_layer) if read_geometry: geometries = np.empty(shape=(count, ), dtype='object') geom_view = geometries[:] else: geometries = None n_fields = fields.shape[0] field_indexes = fields[:,0] field_ogr_types = fields[:,1] field_data = [ np.empty(shape=(count, ), dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime") else fields[field_index,3])) for field_index in range(n_fields) ] field_data_view = [field_data[field_index][:] for field_index in range(n_fields)] for i in range(count): try: fid = fids[i] try: ogr_feature = exc_wrap_pointer(OGR_L_GetFeature(ogr_layer, fid)) except NullPointerError: raise FeatureError(f"Could not read feature with fid {fid}") from None except CPLE_BaseError as exc: raise FeatureError(str(exc)) if read_geometry: process_geometry(ogr_feature, i, geom_view, force_2d) process_fields( ogr_feature, i, n_fields, field_data, field_data_view, field_indexes, field_ogr_types, encoding, datetime_as_string ) finally: if ogr_feature != NULL: OGR_F_Destroy(ogr_feature) ogr_feature = NULL return (geometries, field_data) @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. cdef get_bounds( OGRLayerH ogr_layer, int skip_features, int num_features): cdef OGRFeatureH ogr_feature = NULL cdef OGRGeometryH ogr_geometry = NULL cdef OGREnvelope ogr_envelope # = NULL cdef int i # make sure layer is read from beginning OGR_L_ResetReading(ogr_layer) if skip_features > 0: OGR_L_SetNextByIndex(ogr_layer, skip_features) fid_data = np.empty(shape=(num_features), dtype=np.int64) fid_view = fid_data[:] bounds_data = np.empty(shape=(4, num_features), dtype='float64') bounds_view = bounds_data[:] i = 0 while True: try: if num_features > 0 and i == num_features: break try: ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer)) except NullPointerError: # No more rows available, so stop reading break except CPLE_BaseError as exc: raise FeatureError(str(exc)) if i >= num_features: raise FeatureError( "Reading more features than indicated by OGR_L_GetFeatureCount is not supported" ) from None fid_view[i] = OGR_F_GetFID(ogr_feature) ogr_geometry = OGR_F_GetGeometryRef(ogr_feature) if ogr_geometry == NULL: bounds_view[:,i] = np.nan else: OGR_G_GetEnvelope(ogr_geometry, &ogr_envelope) bounds_view[0, i] = ogr_envelope.MinX bounds_view[1, i] = ogr_envelope.MinY bounds_view[2, i] = ogr_envelope.MaxX bounds_view[3, i] = ogr_envelope.MaxY i += 1 finally: if ogr_feature != NULL: OGR_F_Destroy(ogr_feature) ogr_feature = NULL # Less rows read than anticipated, so drop empty rows if i < num_features: fid_data = fid_data[:i] bounds_data = bounds_data[:, :i] return fid_data, bounds_data def ogr_read( str path, object dataset_kwargs, object layer=None, object encoding=None, int read_geometry=True, int force_2d=False, object columns=None, int skip_features=0, int max_features=0, object where=None, tuple bbox=None, object mask=None, object fids=None, str sql=None, str sql_dialect=None, int return_fids=False, bint datetime_as_string=False ): cdef int err = 0 cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL cdef const char *field_c = NULL cdef char **fields_c = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef int feature_count = 0 cdef double xmin, ymin, xmax, ymax path_b = path.encode('utf-8') path_c = path_b if fids is not None: if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features: raise ValueError( "cannot set both 'fids' and any of 'where', 'bbox', 'mask', " "'sql', 'skip_features' or 'max_features'" ) fids = np.asarray(fids, dtype=np.intc) if sql is not None and layer is not None: raise ValueError("'sql' paramater cannot be combined with 'layer'") if not (read_geometry or return_fids or columns is None or len(columns) > 0): raise ValueError( "at least one of read_geometry or return_fids must be True or columns must " "be None or non-empty" ) if bbox and mask: raise ValueError("cannot set both 'bbox' and 'mask'") if skip_features < 0: raise ValueError("'skip_features' must be >= 0") if max_features < 0: raise ValueError("'max_features' must be >= 0") try: dataset_options = dict_to_options(dataset_kwargs) ogr_dataset = ogr_open(path_c, 0, dataset_options) if sql is None: # layer defaults to index 0 if layer is None: layer = 0 ogr_layer = get_ogr_layer(ogr_dataset, layer) else: ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect) crs = get_crs(ogr_layer) # Encoding is derived from the user, from the dataset capabilities / type, # or from the system locale encoding = ( encoding or detect_encoding(ogr_dataset, ogr_layer) or locale.getpreferredencoding() ) fields = get_fields(ogr_layer, encoding) ignored_fields = [] if columns is not None: # Fields are matched exactly by name, duplicates are dropped. # Find index of each field into fields idx = np.intersect1d(fields[:,2], columns, return_indices=True)[1] fields = fields[idx, :] ignored_fields = list(set(fields[:,2]) - set(columns)) if not read_geometry: ignored_fields.append("OGR_GEOMETRY") # Instruct GDAL to ignore reading fields not # included in output columns for faster I/O if ignored_fields: for field in ignored_fields: field_b = field.encode("utf-8") field_c = field_b fields_c = CSLAddString(fields_c, field_c) OGR_L_SetIgnoredFields(ogr_layer, fields_c) geometry_type = get_geometry_type(ogr_layer) if fids is not None: geometries, field_data = get_features_by_fid( ogr_layer, fids, fields, encoding, read_geometry=read_geometry and geometry_type is not None, force_2d=force_2d, datetime_as_string=datetime_as_string ) # bypass reading fids since these should match fids used for read if return_fids: fid_data = fids.astype(np.int64) else: fid_data = None else: # Apply the attribute filter if where is not None and where != "": apply_where_filter(ogr_layer, where) # Apply the spatial filter if bbox is not None: apply_bbox_filter(ogr_layer, bbox) elif mask is not None: apply_geometry_filter(ogr_layer, mask) # Limit feature range to available range skip_features, num_features = validate_feature_range( ogr_layer, skip_features, max_features ) fid_data, geometries, field_data = get_features( ogr_layer, fields, encoding, read_geometry=read_geometry and geometry_type is not None, force_2d=force_2d, skip_features=skip_features, num_features=num_features, return_fids=return_fids, datetime_as_string=datetime_as_string ) meta = { 'crs': crs, 'encoding': encoding, 'fields': fields[:,2], # return only names 'dtypes':fields[:,3], 'geometry_type': geometry_type, } finally: if dataset_options != NULL: CSLDestroy(dataset_options) dataset_options = NULL if ogr_dataset != NULL: if sql is not None: GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer) GDALClose(ogr_dataset) ogr_dataset = NULL return ( meta, fid_data, geometries, field_data ) @contextlib.contextmanager def ogr_open_arrow( str path, dataset_kwargs, object layer=None, object encoding=None, int read_geometry=True, int force_2d=False, object columns=None, int skip_features=0, int max_features=0, object where=None, tuple bbox=None, object mask=None, object fids=None, str sql=None, str sql_dialect=None, int return_fids=False, int batch_size=0): cdef int err = 0 cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef char **fields_c = NULL cdef const char *field_c = NULL cdef char **options = NULL cdef ArrowArrayStream stream cdef ArrowSchema schema IF CTE_GDAL_VERSION < (3, 6, 0): raise RuntimeError("Need GDAL>=3.6 for Arrow support") path_b = path.encode('utf-8') path_c = path_b if force_2d: raise ValueError("forcing 2D is not supported for Arrow") if fids is not None: raise ValueError("reading by FID is not supported for Arrow") IF CTE_GDAL_VERSION < (3, 8, 0): if skip_features: raise ValueError( "specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0" ) if skip_features < 0: raise ValueError("'skip_features' must be >= 0") if max_features: raise ValueError( "specifying 'max_features' is not supported for Arrow" ) if sql is not None and layer is not None: raise ValueError("'sql' paramater cannot be combined with 'layer'") if not (read_geometry or return_fids or columns is None or len(columns) > 0): raise ValueError( "at least one of read_geometry or return_fids must be True or columns must " "be None or non-empty" ) if bbox and mask: raise ValueError("cannot set both 'bbox' and 'mask'") reader = None try: dataset_options = dict_to_options(dataset_kwargs) ogr_dataset = ogr_open(path_c, 0, dataset_options) if sql is None: # layer defaults to index 0 if layer is None: layer = 0 ogr_layer = get_ogr_layer(ogr_dataset, layer) else: ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect) crs = get_crs(ogr_layer) # Encoding is derived from the user, from the dataset capabilities / type, # or from the system locale encoding = ( encoding or detect_encoding(ogr_dataset, ogr_layer) or locale.getpreferredencoding() ) fields = get_fields(ogr_layer, encoding, use_arrow=True) ignored_fields = [] if columns is not None: # Fields are matched exactly by name, duplicates are dropped. ignored_fields = list(set(fields[:,2]) - set(columns)) if not read_geometry: ignored_fields.append("OGR_GEOMETRY") geometry_type = get_geometry_type(ogr_layer) geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer)) fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer)) # OGR_L_GetFIDColumn returns the column name if it is a custom column, # or "" if not. For arrow, the default column name is "OGC_FID". if fid_column == "": fid_column = "OGC_FID" # Apply the attribute filter if where is not None and where != "": apply_where_filter(ogr_layer, where) # Apply the spatial filter if bbox is not None: apply_bbox_filter(ogr_layer, bbox) elif mask is not None: apply_geometry_filter(ogr_layer, mask) # Limit to specified columns if ignored_fields: for field in ignored_fields: field_b = field.encode("utf-8") field_c = field_b fields_c = CSLAddString(fields_c, field_c) OGR_L_SetIgnoredFields(ogr_layer, fields_c) if not return_fids: options = CSLSetNameValue(options, "INCLUDE_FID", "NO") if batch_size > 0: options = CSLSetNameValue( options, "MAX_FEATURES_IN_BATCH", str(batch_size).encode('UTF-8') ) # make sure layer is read from beginning OGR_L_ResetReading(ogr_layer) if not OGR_L_GetArrowStream(ogr_layer, &stream, options): raise RuntimeError("Failed to open ArrowArrayStream from Layer") stream_ptr = &stream if skip_features: # only supported for GDAL >= 3.8.0; have to do this after getting # the Arrow stream OGR_L_SetNextByIndex(ogr_layer, skip_features) # stream has to be consumed before the Dataset is closed import pyarrow as pa reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr) meta = { 'crs': crs, 'encoding': encoding, 'fields': fields[:,2], # return only names 'geometry_type': geometry_type, 'geometry_name': geometry_name, 'fid_column': fid_column, } yield meta, reader finally: if reader is not None: # Mark reader as closed to prevent reading batches reader.close() CSLDestroy(options) if fields_c != NULL: CSLDestroy(fields_c) fields_c = NULL if dataset_options != NULL: CSLDestroy(dataset_options) dataset_options = NULL if ogr_dataset != NULL: if sql is not None: GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer) GDALClose(ogr_dataset) ogr_dataset = NULL def ogr_read_bounds( str path, object layer=None, object encoding=None, int read_geometry=True, int force_2d=False, object columns=None, int skip_features=0, int max_features=0, object where=None, tuple bbox=None, object mask=None): cdef int err = 0 cdef const char *path_c = NULL cdef const char *where_c = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef int feature_count = 0 cdef double xmin, ymin, xmax, ymax if bbox and mask: raise ValueError("cannot set both 'bbox' and 'mask'") if skip_features < 0: raise ValueError("'skip_features' must be >= 0") if max_features < 0: raise ValueError("'max_features' must be >= 0") path_b = path.encode('utf-8') path_c = path_b # layer defaults to index 0 if layer is None: layer = 0 ogr_dataset = ogr_open(path_c, 0, NULL) ogr_layer = get_ogr_layer(ogr_dataset, layer) # Apply the attribute filter if where is not None and where != "": apply_where_filter(ogr_layer, where) # Apply the spatial filter if bbox is not None: apply_bbox_filter(ogr_layer, bbox) elif mask is not None: apply_geometry_filter(ogr_layer, mask) # Limit feature range to available range skip_features, num_features = validate_feature_range(ogr_layer, skip_features, max_features) return get_bounds(ogr_layer, skip_features, num_features) def ogr_read_info( str path, dataset_kwargs, object layer=None, object encoding=None, int force_feature_count=False, int force_total_bounds=False): cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL path_b = path.encode('utf-8') path_c = path_b # layer defaults to index 0 if layer is None: layer = 0 try: dataset_options = dict_to_options(dataset_kwargs) ogr_dataset = ogr_open(path_c, 0, dataset_options) ogr_layer = get_ogr_layer(ogr_dataset, layer) # Encoding is derived from the user, from the dataset capabilities / type, # or from the system locale encoding = ( encoding or detect_encoding(ogr_dataset, ogr_layer) or locale.getpreferredencoding() ) fields = get_fields(ogr_layer, encoding) meta = { 'crs': get_crs(ogr_layer), 'encoding': encoding, 'fields': fields[:,2], # return only names 'dtypes': fields[:,3], 'geometry_type': get_geometry_type(ogr_layer), 'features': get_feature_count(ogr_layer, force_feature_count), 'total_bounds': get_total_bounds(ogr_layer, force_total_bounds), 'driver': get_driver(ogr_dataset), "capabilities": { "random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead) == 1, "fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex) == 1, "fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter) == 1, "fast_feature_count": OGR_L_TestCapability(ogr_layer, OLCFastFeatureCount) == 1, "fast_total_bounds": OGR_L_TestCapability(ogr_layer, OLCFastGetExtent) == 1, }, 'layer_metadata': get_metadata(ogr_layer), 'dataset_metadata': get_metadata(ogr_dataset), } finally: if dataset_options != NULL: CSLDestroy(dataset_options) dataset_options = NULL if ogr_dataset != NULL: GDALClose(ogr_dataset) ogr_dataset = NULL return meta def ogr_list_layers(str path): cdef const char *path_c = NULL cdef const char *ogr_name = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL path_b = path.encode('utf-8') path_c = path_b ogr_dataset = ogr_open(path_c, 0, NULL) layer_count = GDALDatasetGetLayerCount(ogr_dataset) data = np.empty(shape=(layer_count, 2), dtype=object) data_view = data[:] for i in range(layer_count): ogr_layer = GDALDatasetGetLayer(ogr_dataset, i) data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer)) data_view[i, 1] = get_geometry_type(ogr_layer) if ogr_dataset != NULL: GDALClose(ogr_dataset) ogr_dataset = NULL return data # NOTE: all modes are write-only # some data sources have multiple layers cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except NULL: cdef void *ogr_driver = NULL cdef OGRDataSourceH ogr_dataset = NULL # Get the driver try: ogr_driver = exc_wrap_pointer(GDALGetDriverByName(driver_c)) except NullPointerError: raise DataSourceError(f"Could not obtain driver: {driver_c.decode('utf-8')} (check that it was installed correctly into GDAL)") except CPLE_BaseError as exc: raise DataSourceError(str(exc)) # Create the dataset try: ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options)) except NullPointerError: raise DataSourceError(f"Failed to create dataset with driver: {path_c.decode('utf-8')} {driver_c.decode('utf-8')}") from None except CPLE_NotSupportedError as exc: raise DataSourceError(f"Driver {driver_c.decode('utf-8')} does not support write functionality") from None except CPLE_BaseError as exc: raise DataSourceError(str(exc)) return ogr_dataset cdef void * create_crs(str crs) except NULL: cdef char *crs_c = NULL cdef void *ogr_crs = NULL crs_b = crs.encode('UTF-8') crs_c = crs_b try: ogr_crs = exc_wrap_pointer(OSRNewSpatialReference(NULL)) err = OSRSetFromUserInput(ogr_crs, crs_c) if err: raise CRSError("Could not set CRS: {}".format(crs_c.decode('UTF-8'))) from None except CPLE_BaseError as exc: OSRRelease(ogr_crs) raise CRSError("Could not set CRS: {}".format(exc)) return ogr_crs cdef infer_field_types(list dtypes): cdef int field_type = 0 cdef int field_subtype = 0 cdef int width = 0 cdef int precision = 0 field_types = np.zeros(shape=(len(dtypes), 4), dtype=int) field_types_view = field_types[:] for i in range(len(dtypes)): dtype = dtypes[i] if dtype.name in DTYPE_OGR_FIELD_TYPES: field_type, field_subtype = DTYPE_OGR_FIELD_TYPES[dtype.name] field_types_view[i, 0] = field_type field_types_view[i, 1] = field_subtype # Determine field type from ndarray values elif dtype == np.dtype('O'): # Object type is ambiguous: could be a string or binary data # TODO: handle binary or other types # for now fall back to string (same as Geopandas) field_types_view[i, 0] = OFTString # Convert to unicode string then take itemsize # TODO: better implementation of this # width = values.astype(np.unicode_).dtype.itemsize // 4 # DO WE NEED WIDTH HERE? elif dtype.type is np.unicode_ or dtype.type is np.string_: field_types_view[i, 0] = OFTString field_types_view[i, 2] = int(dtype.itemsize // 4) elif dtype.name.startswith("datetime64"): # datetime dtype precision is specified with eg. [ms], but this isn't # usefull when writing to gdal. field_type, field_subtype = DTYPE_OGR_FIELD_TYPES["datetime64"] field_types_view[i, 0] = field_type field_types_view[i, 1] = field_subtype else: raise NotImplementedError(f"field type is not supported {dtype.name} (field index: {i})") return field_types # TODO: set geometry and field data as memory views? def ogr_write( str path, str layer, str driver, geometry, fields, field_data, field_mask, str crs, str geometry_type, str encoding, object dataset_kwargs, object layer_kwargs, bint promote_to_multi=False, bint nan_as_null=True, bint append=False, dataset_metadata=None, layer_metadata=None, gdal_tz_offsets=None ): cdef const char *path_c = NULL cdef const char *layer_c = NULL cdef const char *driver_c = NULL cdef const char *crs_c = NULL cdef const char *encoding_c = NULL cdef char **dataset_options = NULL cdef char **layer_options = NULL cdef const char *ogr_name = NULL cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef OGRFeatureH ogr_feature = NULL cdef OGRGeometryH ogr_geometry = NULL cdef OGRGeometryH ogr_geometry_multi = NULL cdef OGRFeatureDefnH ogr_featuredef = NULL cdef OGRFieldDefnH ogr_fielddef = NULL cdef unsigned char *wkb_buffer = NULL cdef OGRSpatialReferenceH ogr_crs = NULL cdef int layer_idx = -1 cdef int supports_transactions = 0 cdef OGRwkbGeometryType geometry_code cdef int err = 0 cdef int i = 0 cdef int num_records = -1 cdef int num_field_data = len(field_data) if field_data is not None else 0 cdef int num_fields = len(fields) if fields is not None else 0 if num_fields != num_field_data: raise ValueError("field_data array needs to be same length as fields array") if num_fields == 0 and geometry is None: raise ValueError("You must provide at least a geometry column or a field") if num_fields > 0: num_records = len(field_data[0]) for i in range(1, len(field_data)): if len(field_data[i]) != num_records: raise ValueError("field_data arrays must be same length") if geometry is None: # If no geometry data, we ignore the geometry_type and don't create a geometry # column geometry_type = None else: if num_fields > 0: if len(geometry) != num_records: raise ValueError( "field_data arrays must be same length as geometry array" ) else: num_records = len(geometry) if field_mask is not None: if len(field_data) != len(field_mask): raise ValueError("field_data and field_mask must be same length") for i in range(0, len(field_mask)): if field_mask[i] is not None and len(field_mask[i]) != num_records: raise ValueError("field_mask arrays must be same length as geometry array") else: field_mask = [None] * num_fields path_b = path.encode('UTF-8') path_c = path_b driver_b = driver.encode('UTF-8') driver_c = driver_b if not layer: layer = os.path.splitext(os.path.split(path)[1])[0] if gdal_tz_offsets is None: gdal_tz_offsets = {} # if shapefile, GeoJSON, or FlatGeobuf, always delete first # for other types, check if we can create layers # GPKG might be the only multi-layer writeable type. TODO: check this if driver in ('ESRI Shapefile', 'GeoJSON', 'GeoJSONSeq', 'FlatGeobuf') and os.path.exists(path): if not append: os.unlink(path) layer_exists = False if os.path.exists(path): try: ogr_dataset = ogr_open(path_c, 1, NULL) for i in range(GDALDatasetGetLayerCount(ogr_dataset)): name = OGR_L_GetName(GDALDatasetGetLayer(ogr_dataset, i)) if layer == name.decode('UTF-8'): layer_idx = i break if layer_idx >= 0: layer_exists = True if not append: GDALDatasetDeleteLayer(ogr_dataset, layer_idx) except DataSourceError as exc: # open failed if append: raise exc # otherwise create from scratch os.unlink(path) ogr_dataset = NULL # either it didn't exist or could not open it in write mode if ogr_dataset == NULL: dataset_options = dict_to_options(dataset_kwargs) ogr_dataset = ogr_create(path_c, driver_c, dataset_options) # if we are not appending to an existing layer, we need to create # the layer and all associated properties (CRS, field defs, etc) create_layer = not (append and layer_exists) ### Create the layer if create_layer: # Create the CRS if crs is not None: try: ogr_crs = create_crs(crs) except Exception as exc: OGRReleaseDataSource(ogr_dataset) ogr_dataset = NULL if dataset_options != NULL: CSLDestroy(dataset_options) dataset_options = NULL raise exc # Setup layer creation options if not encoding: encoding = locale.getpreferredencoding() if driver == 'ESRI Shapefile': # Fiona only sets encoding for shapefiles; other drivers do not support # encoding as an option. encoding_b = encoding.upper().encode('UTF-8') encoding_c = encoding_b layer_options = CSLSetNameValue(layer_options, "ENCODING", encoding_c) # Setup other layer creation options for k, v in layer_kwargs.items(): k = k.encode('UTF-8') v = v.encode('UTF-8') layer_options = CSLAddNameValue(layer_options, k, v) ### Get geometry type # TODO: this is brittle for 3D / ZM / M types # TODO: fail on M / ZM types geometry_code = get_geometry_type_code(geometry_type) try: if create_layer: layer_b = layer.encode('UTF-8') layer_c = layer_b ogr_layer = exc_wrap_pointer( GDALDatasetCreateLayer(ogr_dataset, layer_c, ogr_crs, geometry_code, layer_options)) else: ogr_layer = exc_wrap_pointer(get_ogr_layer(ogr_dataset, layer)) # Set dataset and layer metadata set_metadata(ogr_dataset, dataset_metadata) set_metadata(ogr_layer, layer_metadata) except Exception as exc: OGRReleaseDataSource(ogr_dataset) ogr_dataset = NULL raise DataLayerError(str(exc)) finally: if ogr_crs != NULL: OSRRelease(ogr_crs) ogr_crs = NULL if dataset_options != NULL: CSLDestroy(dataset_options) dataset_options = NULL if layer_options != NULL: CSLDestroy(layer_options) layer_options = NULL ### Create the fields field_types = None if num_fields > 0: field_types = infer_field_types([field.dtype for field in field_data]) ### Create the fields if create_layer: for i in range(num_fields): field_type, field_subtype, width, precision = field_types[i] name_b = fields[i].encode(encoding) try: ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type)) # subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html if field_subtype != OFSTNone: OGR_Fld_SetSubType(ogr_fielddef, field_subtype) if width: OGR_Fld_SetWidth(ogr_fielddef, width) # TODO: set precision except: if ogr_fielddef != NULL: OGR_Fld_Destroy(ogr_fielddef) ogr_fielddef = NULL OGRReleaseDataSource(ogr_dataset) ogr_dataset = NULL raise FieldError(f"Error creating field '{fields[i]}' from field_data") from None try: exc_wrap_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1)) except: OGRReleaseDataSource(ogr_dataset) ogr_dataset = NULL raise FieldError(f"Error adding field '{fields[i]}' to layer") from None finally: if ogr_fielddef != NULL: OGR_Fld_Destroy(ogr_fielddef) ### Create the features ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer) supports_transactions = OGR_L_TestCapability(ogr_layer, OLCTransactions) if supports_transactions: start_transaction(ogr_dataset, 0) for i in range(num_records): try: # create the feature ogr_feature = OGR_F_Create(ogr_featuredef) if ogr_feature == NULL: raise FeatureError(f"Could not create feature at index {i}") from None # create the geometry based on specific WKB type (there might be mixed types in geometries) # TODO: geometry must not be null or errors wkb = None if geometry is None else geometry[i] if wkb is not None: wkbtype = bytearray(wkb)[1] # may need to consider all 4 bytes: int.from_bytes(wkb[0][1:4], byteorder="little") # use "little" if the first byte == 1 ogr_geometry = OGR_G_CreateGeometry(wkbtype) if ogr_geometry == NULL: raise GeometryError(f"Could not create geometry at index {i} for WKB type {wkbtype}") from None # import the WKB wkb_buffer = wkb err = OGR_G_ImportFromWkb(ogr_geometry, wkb_buffer, len(wkb)) if err: if ogr_geometry != NULL: OGR_G_DestroyGeometry(ogr_geometry) ogr_geometry = NULL raise GeometryError(f"Could not create geometry from WKB at index {i}") from None # Convert to multi type if promote_to_multi: if wkbtype in (wkbPoint, wkbPoint25D, wkbPointM, wkbPointZM): ogr_geometry = OGR_G_ForceToMultiPoint(ogr_geometry) elif wkbtype in (wkbLineString, wkbLineString25D, wkbLineStringM, wkbLineStringZM): ogr_geometry = OGR_G_ForceToMultiLineString(ogr_geometry) elif wkbtype in (wkbPolygon, wkbPolygon25D, wkbPolygonM, wkbPolygonZM): ogr_geometry = OGR_G_ForceToMultiPolygon(ogr_geometry) # Set the geometry on the feature # this assumes ownership of the geometry and it's cleanup err = OGR_F_SetGeometryDirectly(ogr_feature, ogr_geometry) if err: raise GeometryError(f"Could not set geometry for feature at index {i}") from None # Set field values for field_idx in range(num_fields): field_value = field_data[field_idx][i] field_type = field_types[field_idx][0] mask = field_mask[field_idx] if mask is not None and mask[i]: OGR_F_SetFieldNull(ogr_feature, field_idx) elif field_type == OFTString: # TODO: encode string using approach from _get_internal_encoding which checks layer capabilities if ( field_value is None or (isinstance(field_value, float) and isnan(field_value)) ): OGR_F_SetFieldNull(ogr_feature, field_idx) else: if not isinstance(field_value, str): field_value = str(field_value) try: value_b = field_value.encode("UTF-8") OGR_F_SetFieldString(ogr_feature, field_idx, value_b) except AttributeError: raise ValueError(f"Could not encode value '{field_value}' in field '{fields[field_idx]}' to string") except Exception: raise elif field_type == OFTInteger: OGR_F_SetFieldInteger(ogr_feature, field_idx, field_value) elif field_type == OFTInteger64: OGR_F_SetFieldInteger64(ogr_feature, field_idx, field_value) elif field_type == OFTReal: if nan_as_null and isnan(field_value): OGR_F_SetFieldNull(ogr_feature, field_idx) else: OGR_F_SetFieldDouble(ogr_feature, field_idx, field_value) elif field_type == OFTDate: if np.isnat(field_value): OGR_F_SetFieldNull(ogr_feature, field_idx) else: datetime = field_value.item() OGR_F_SetFieldDateTimeEx( ogr_feature, field_idx, datetime.year, datetime.month, datetime.day, 0, 0, 0.0, 0 ) elif field_type == OFTDateTime: if np.isnat(field_value): OGR_F_SetFieldNull(ogr_feature, field_idx) else: datetime = field_value.astype("datetime64[ms]").item() tz_array = gdal_tz_offsets.get(fields[field_idx], None) if tz_array is None: gdal_tz = 0 else: gdal_tz = tz_array[i] OGR_F_SetFieldDateTimeEx( ogr_feature, field_idx, datetime.year, datetime.month, datetime.day, datetime.hour, datetime.minute, datetime.second + datetime.microsecond / 10**6, gdal_tz ) else: raise NotImplementedError(f"OGR field type is not supported for writing: {field_type}") # Add feature to the layer try: exc_wrap_int(OGR_L_CreateFeature(ogr_layer, ogr_feature)) except CPLE_BaseError as exc: raise FeatureError(f"Could not add feature to layer at index {i}: {exc}") from None finally: if ogr_feature != NULL: OGR_F_Destroy(ogr_feature) ogr_feature = NULL if supports_transactions: commit_transaction(ogr_dataset) log.info(f"Created {num_records:,} records" ) ### Final cleanup if ogr_dataset != NULL: GDALClose(ogr_dataset) # GDAL will set an error if there was an error writing the data source # on close exc = exc_check() if exc: raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")