library packages
This commit is contained in:
473
.venv/lib/python3.12/site-packages/geopandas/io/sql.py
Normal file
473
.venv/lib/python3.12/site-packages/geopandas/io/sql.py
Normal file
@@ -0,0 +1,473 @@
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from functools import lru_cache
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import shapely
|
||||
import shapely.wkb
|
||||
|
||||
from geopandas import GeoDataFrame
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _get_conn(conn_or_engine):
|
||||
"""
|
||||
Yield a connection within a transaction context.
|
||||
|
||||
Engine.begin() returns a Connection with an implicit Transaction while
|
||||
Connection.begin() returns the Transaction. This helper will always return a
|
||||
Connection with an implicit (possibly nested) Transaction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
conn_or_engine : Connection or Engine
|
||||
A sqlalchemy Connection or Engine instance
|
||||
Returns
|
||||
-------
|
||||
Connection
|
||||
"""
|
||||
from sqlalchemy.engine.base import Connection, Engine
|
||||
|
||||
if isinstance(conn_or_engine, Connection):
|
||||
if not conn_or_engine.in_transaction():
|
||||
with conn_or_engine.begin():
|
||||
yield conn_or_engine
|
||||
else:
|
||||
yield conn_or_engine
|
||||
elif isinstance(conn_or_engine, Engine):
|
||||
with conn_or_engine.begin() as conn:
|
||||
yield conn
|
||||
else:
|
||||
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
|
||||
|
||||
|
||||
def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
|
||||
"""
|
||||
Transforms a pandas DataFrame into a GeoDataFrame.
|
||||
The column 'geom_col' must be a geometry column in WKB representation.
|
||||
To be used to convert df based on pd.read_sql to gdf.
|
||||
Parameters
|
||||
----------
|
||||
df : DataFrame
|
||||
pandas DataFrame with geometry column in WKB representation.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : pyproj.CRS, optional
|
||||
CRS to use for the returned GeoDataFrame. The value can be anything accepted
|
||||
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
|
||||
such as an authority string (eg "EPSG:4326") or a WKT string.
|
||||
If not set, tries to determine CRS from the SRID associated with the
|
||||
first geometry in the database, and assigns that to all geometries.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
"""
|
||||
|
||||
if geom_col not in df:
|
||||
raise ValueError("Query missing geometry column '{}'".format(geom_col))
|
||||
|
||||
if df.columns.to_list().count(geom_col) > 1:
|
||||
raise ValueError(
|
||||
f"Duplicate geometry column '{geom_col}' detected in SQL query output. Only"
|
||||
"one geometry column is allowed."
|
||||
)
|
||||
|
||||
geoms = df[geom_col].dropna()
|
||||
|
||||
if not geoms.empty:
|
||||
load_geom_bytes = shapely.wkb.loads
|
||||
"""Load from Python 3 binary."""
|
||||
|
||||
def load_geom_text(x):
|
||||
"""Load from binary encoded as text."""
|
||||
return shapely.wkb.loads(str(x), hex=True)
|
||||
|
||||
if isinstance(geoms.iat[0], bytes):
|
||||
load_geom = load_geom_bytes
|
||||
else:
|
||||
load_geom = load_geom_text
|
||||
|
||||
df[geom_col] = geoms = geoms.apply(load_geom)
|
||||
if crs is None:
|
||||
srid = shapely.get_srid(geoms.iat[0])
|
||||
# if no defined SRID in geodatabase, returns SRID of 0
|
||||
if srid != 0:
|
||||
try:
|
||||
spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
|
||||
except pd.errors.DatabaseError:
|
||||
warning_msg = (
|
||||
f"Could not find the spatial reference system table "
|
||||
f"(spatial_ref_sys) in PostGIS."
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
else:
|
||||
if not spatial_ref_sys_df.empty:
|
||||
auth_name = spatial_ref_sys_df["auth_name"].item()
|
||||
crs = f"{auth_name}:{srid}"
|
||||
else:
|
||||
warning_msg = (
|
||||
f"Could not find srid {srid} in the "
|
||||
f"spatial_ref_sys table. "
|
||||
f"Trying epsg:{srid} as a fallback."
|
||||
)
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=3)
|
||||
crs = "epsg:{}".format(srid)
|
||||
|
||||
return GeoDataFrame(df, crs=crs, geometry=geom_col)
|
||||
|
||||
|
||||
def _read_postgis(
|
||||
sql,
|
||||
con,
|
||||
geom_col="geom",
|
||||
crs=None,
|
||||
index_col=None,
|
||||
coerce_float=True,
|
||||
parse_dates=None,
|
||||
params=None,
|
||||
chunksize=None,
|
||||
):
|
||||
"""
|
||||
Returns a GeoDataFrame corresponding to the result of the query
|
||||
string, which must contain a geometry column in WKB representation.
|
||||
|
||||
It is also possible to use :meth:`~GeoDataFrame.read_file` to read from a database.
|
||||
Especially for file geodatabases like GeoPackage or SpatiaLite this can be easier.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
sql : string
|
||||
SQL query to execute in selecting entries from database, or name
|
||||
of the table to read from the database.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the database to query.
|
||||
geom_col : string, default 'geom'
|
||||
column name to convert to shapely geometries
|
||||
crs : dict or str, optional
|
||||
CRS to use for the returned GeoDataFrame; if not set, tries to
|
||||
determine CRS from the SRID associated with the first geometry in
|
||||
the database, and assigns that to all geometries.
|
||||
chunksize : int, default None
|
||||
If specified, return an iterator where chunksize is the number of rows to
|
||||
include in each chunk.
|
||||
|
||||
See the documentation for pandas.read_sql for further explanation
|
||||
of the following parameters:
|
||||
index_col, coerce_float, parse_dates, params, chunksize
|
||||
|
||||
Returns
|
||||
-------
|
||||
GeoDataFrame
|
||||
|
||||
Examples
|
||||
--------
|
||||
PostGIS
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> db_connection_url = "postgresql://myusername:mypassword@myhost:5432/mydatabase"
|
||||
>>> con = create_engine(db_connection_url) # doctest: +SKIP
|
||||
>>> sql = "SELECT geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
|
||||
SpatiaLite
|
||||
|
||||
>>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
|
||||
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if chunksize is None:
|
||||
# read all in one chunk and return a single GeoDataFrame
|
||||
df = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
|
||||
|
||||
else:
|
||||
# read data in chunks and return a generator
|
||||
df_generator = pd.read_sql(
|
||||
sql,
|
||||
con,
|
||||
index_col=index_col,
|
||||
coerce_float=coerce_float,
|
||||
parse_dates=parse_dates,
|
||||
params=params,
|
||||
chunksize=chunksize,
|
||||
)
|
||||
return (
|
||||
_df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
|
||||
)
|
||||
|
||||
|
||||
def _get_geometry_type(gdf):
|
||||
"""
|
||||
Get basic geometry type of a GeoDataFrame. See more info from:
|
||||
https://geoalchemy-2.readthedocs.io/en/latest/types.html#geoalchemy2.types._GISType
|
||||
|
||||
Following rules apply:
|
||||
- if geometries all share the same geometry-type,
|
||||
geometries are inserted with the given GeometryType with following types:
|
||||
- Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
|
||||
GeometryCollection.
|
||||
- LinearRing geometries will be converted into LineString -objects.
|
||||
- in all other cases, geometries will be inserted with type GEOMETRY:
|
||||
- a mix of Polygons and MultiPolygons in GeoSeries
|
||||
- a mix of Points and LineStrings in GeoSeries
|
||||
- geometry is of type GeometryCollection,
|
||||
such as GeometryCollection([Point, LineStrings])
|
||||
- if any of the geometries has Z-coordinate, all records will
|
||||
be written with 3D.
|
||||
"""
|
||||
geom_types = list(gdf.geometry.geom_type.unique())
|
||||
has_curve = False
|
||||
|
||||
for gt in geom_types:
|
||||
if gt is None:
|
||||
continue
|
||||
elif "LinearRing" in gt:
|
||||
has_curve = True
|
||||
|
||||
if len(geom_types) == 1:
|
||||
if has_curve:
|
||||
target_geom_type = "LINESTRING"
|
||||
else:
|
||||
if geom_types[0] is None:
|
||||
raise ValueError("No valid geometries in the data.")
|
||||
else:
|
||||
target_geom_type = geom_types[0].upper()
|
||||
else:
|
||||
target_geom_type = "GEOMETRY"
|
||||
|
||||
# Check for 3D-coordinates
|
||||
if any(gdf.geometry.has_z):
|
||||
target_geom_type += "Z"
|
||||
|
||||
return target_geom_type, has_curve
|
||||
|
||||
|
||||
def _get_srid_from_crs(gdf):
|
||||
"""
|
||||
Get EPSG code from CRS if available. If not, return 0.
|
||||
"""
|
||||
|
||||
# Use geoalchemy2 default for srid
|
||||
# Note: undefined srid in PostGIS is 0
|
||||
srid = None
|
||||
warning_msg = (
|
||||
"Could not parse CRS from the GeoDataFrame. "
|
||||
"Inserting data without defined CRS."
|
||||
)
|
||||
if gdf.crs is not None:
|
||||
try:
|
||||
for confidence in (100, 70, 25):
|
||||
srid = gdf.crs.to_epsg(min_confidence=confidence)
|
||||
if srid is not None:
|
||||
break
|
||||
auth_srid = gdf.crs.to_authority(
|
||||
auth_name="ESRI", min_confidence=confidence
|
||||
)
|
||||
if auth_srid is not None:
|
||||
srid = int(auth_srid[1])
|
||||
break
|
||||
except Exception:
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
if srid is None:
|
||||
srid = 0
|
||||
warnings.warn(warning_msg, UserWarning, stacklevel=2)
|
||||
|
||||
return srid
|
||||
|
||||
|
||||
def _convert_linearring_to_linestring(gdf, geom_name):
|
||||
from shapely.geometry import LineString
|
||||
|
||||
# Todo: Use shapely function once it's implemented:
|
||||
# https://github.com/shapely/shapely/issues/1617
|
||||
|
||||
mask = gdf.geom_type == "LinearRing"
|
||||
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
|
||||
lambda geom: LineString(geom)
|
||||
)
|
||||
return gdf
|
||||
|
||||
|
||||
def _convert_to_ewkb(gdf, geom_name, srid):
|
||||
"""Convert geometries to ewkb."""
|
||||
geoms = shapely.to_wkb(
|
||||
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
|
||||
hex=True,
|
||||
include_srid=True,
|
||||
)
|
||||
|
||||
# The gdf will warn that the geometry column doesn't hold in-memory geometries
|
||||
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
|
||||
# the user that the dtypes are unexpected.
|
||||
df = pd.DataFrame(gdf, copy=False)
|
||||
df[geom_name] = geoms
|
||||
return df
|
||||
|
||||
|
||||
def _psql_insert_copy(tbl, conn, keys, data_iter):
|
||||
import csv
|
||||
import io
|
||||
|
||||
s_buf = io.StringIO()
|
||||
writer = csv.writer(s_buf)
|
||||
writer.writerows(data_iter)
|
||||
s_buf.seek(0)
|
||||
|
||||
columns = ", ".join('"{}"'.format(k) for k in keys)
|
||||
|
||||
dbapi_conn = conn.connection
|
||||
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
|
||||
tbl.table.schema, tbl.table.name, columns
|
||||
)
|
||||
with dbapi_conn.cursor() as cur:
|
||||
# Use psycopg method if it's available
|
||||
if hasattr(cur, "copy") and callable(cur.copy):
|
||||
with cur.copy(sql) as copy:
|
||||
copy.write(s_buf.read())
|
||||
else: # otherwise use psycopg2 method
|
||||
cur.copy_expert(sql, s_buf)
|
||||
|
||||
|
||||
def _write_postgis(
|
||||
gdf,
|
||||
name,
|
||||
con,
|
||||
schema=None,
|
||||
if_exists="fail",
|
||||
index=False,
|
||||
index_label=None,
|
||||
chunksize=None,
|
||||
dtype=None,
|
||||
):
|
||||
"""
|
||||
Upload GeoDataFrame into PostGIS database.
|
||||
|
||||
This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
|
||||
Python driver (e.g. psycopg2) to be installed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of the target table.
|
||||
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
|
||||
Active connection to the PostGIS database.
|
||||
if_exists : {'fail', 'replace', 'append'}, default 'fail'
|
||||
How to behave if the table already exists:
|
||||
|
||||
- fail: Raise a ValueError.
|
||||
- replace: Drop the table before inserting new values.
|
||||
- append: Insert new values to the existing table.
|
||||
schema : string, optional
|
||||
Specify the schema. If None, use default schema: 'public'.
|
||||
index : bool, default True
|
||||
Write DataFrame index as a column.
|
||||
Uses *index_label* as the column name in the table.
|
||||
index_label : string or sequence, default None
|
||||
Column label for index column(s).
|
||||
If None is given (default) and index is True,
|
||||
then the index names are used.
|
||||
chunksize : int, optional
|
||||
Rows will be written in batches of this size at a time.
|
||||
By default, all rows will be written at once.
|
||||
dtype : dict of column name to SQL type, default None
|
||||
Specifying the datatype for columns.
|
||||
The keys should be the column names and the values
|
||||
should be the SQLAlchemy types.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from sqlalchemy import create_engine # doctest: +SKIP
|
||||
>>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432\
|
||||
/mydatabase";) # doctest: +SKIP
|
||||
>>> gdf.to_postgis("my_table", engine) # doctest: +SKIP
|
||||
"""
|
||||
try:
|
||||
from geoalchemy2 import Geometry
|
||||
from sqlalchemy import text
|
||||
except ImportError:
|
||||
raise ImportError("'to_postgis()' requires geoalchemy2 package.")
|
||||
|
||||
gdf = gdf.copy()
|
||||
geom_name = gdf.geometry.name
|
||||
|
||||
# Get srid
|
||||
srid = _get_srid_from_crs(gdf)
|
||||
|
||||
# Get geometry type and info whether data contains LinearRing.
|
||||
geometry_type, has_curve = _get_geometry_type(gdf)
|
||||
|
||||
# Build dtype with Geometry
|
||||
if dtype is not None:
|
||||
dtype[geom_name] = Geometry(geometry_type=geometry_type, srid=srid)
|
||||
else:
|
||||
dtype = {geom_name: Geometry(geometry_type=geometry_type, srid=srid)}
|
||||
|
||||
# Convert LinearRing geometries to LineString
|
||||
if has_curve:
|
||||
gdf = _convert_linearring_to_linestring(gdf, geom_name)
|
||||
|
||||
# Convert geometries to EWKB
|
||||
gdf = _convert_to_ewkb(gdf, geom_name, srid)
|
||||
|
||||
if schema is not None:
|
||||
schema_name = schema
|
||||
else:
|
||||
schema_name = "public"
|
||||
|
||||
if if_exists == "append":
|
||||
# Check that the geometry srid matches with the current GeoDataFrame
|
||||
with _get_conn(con) as connection:
|
||||
# Only check SRID if table exists
|
||||
if connection.dialect.has_table(connection, name, schema):
|
||||
target_srid = connection.execute(
|
||||
text(
|
||||
"SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format(
|
||||
schema=schema_name, table=name, geom_col=geom_name
|
||||
)
|
||||
)
|
||||
).fetchone()[0]
|
||||
|
||||
if target_srid != srid:
|
||||
msg = (
|
||||
"The CRS of the target table (EPSG:{epsg_t}) differs from the "
|
||||
"CRS of current GeoDataFrame (EPSG:{epsg_src}).".format(
|
||||
epsg_t=target_srid, epsg_src=srid
|
||||
)
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
with _get_conn(con) as connection:
|
||||
gdf.to_sql(
|
||||
name,
|
||||
connection,
|
||||
schema=schema_name,
|
||||
if_exists=if_exists,
|
||||
index=index,
|
||||
index_label=index_label,
|
||||
chunksize=chunksize,
|
||||
dtype=dtype,
|
||||
method=_psql_insert_copy,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _get_spatial_ref_sys_df(con, srid):
|
||||
spatial_ref_sys_sql = (
|
||||
f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
|
||||
)
|
||||
return pd.read_sql(spatial_ref_sys_sql, con)
|
||||
Reference in New Issue
Block a user