This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -1,5 +1,6 @@
import warnings
from contextlib import contextmanager
from functools import lru_cache
import pandas as pd
@@ -8,8 +9,6 @@ import shapely.wkb
from geopandas import GeoDataFrame
from geopandas import _compat as compat
@contextmanager
def _get_conn(conn_or_engine):
@@ -28,7 +27,7 @@ def _get_conn(conn_or_engine):
-------
Connection
"""
from sqlalchemy.engine.base import Engine, Connection
from sqlalchemy.engine.base import Connection, Engine
if isinstance(conn_or_engine, Connection):
if not conn_or_engine.in_transaction():
@@ -43,7 +42,7 @@ def _get_conn(conn_or_engine):
raise ValueError(f"Unknown Connectable: {conn_or_engine}")
def _df_to_geodf(df, geom_col="geom", crs=None):
def _df_to_geodf(df, geom_col="geom", crs=None, con=None):
"""
Transforms a pandas DataFrame into a GeoDataFrame.
The column 'geom_col' must be a geometry column in WKB representation.
@@ -60,6 +59,8 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
such as an authority string (eg "EPSG:4326") or a WKT string.
If not set, tries to determine CRS from the SRID associated with the
first geometry in the database, and assigns that to all geometries.
con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
Active connection to the database to query.
Returns
-------
GeoDataFrame
@@ -80,10 +81,6 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
load_geom_bytes = shapely.wkb.loads
"""Load from Python 3 binary."""
def load_geom_buffer(x):
"""Load from Python 2 binary."""
return shapely.wkb.loads(str(x))
def load_geom_text(x):
"""Load from binary encoded as text."""
return shapely.wkb.loads(str(x), hex=True)
@@ -95,13 +92,31 @@ def _df_to_geodf(df, geom_col="geom", crs=None):
df[geom_col] = geoms = geoms.apply(load_geom)
if crs is None:
if compat.SHAPELY_GE_20:
srid = shapely.get_srid(geoms.iat[0])
else:
srid = shapely.geos.lgeos.GEOSGetSRID(geoms.iat[0]._geom)
srid = shapely.get_srid(geoms.iat[0])
# if no defined SRID in geodatabase, returns SRID of 0
if srid != 0:
crs = "epsg:{}".format(srid)
try:
spatial_ref_sys_df = _get_spatial_ref_sys_df(con, srid)
except pd.errors.DatabaseError:
warning_msg = (
f"Could not find the spatial reference system table "
f"(spatial_ref_sys) in PostGIS."
f"Trying epsg:{srid} as a fallback."
)
warnings.warn(warning_msg, UserWarning, stacklevel=3)
crs = "epsg:{}".format(srid)
else:
if not spatial_ref_sys_df.empty:
auth_name = spatial_ref_sys_df["auth_name"].item()
crs = f"{auth_name}:{srid}"
else:
warning_msg = (
f"Could not find srid {srid} in the "
f"spatial_ref_sys table. "
f"Trying epsg:{srid} as a fallback."
)
warnings.warn(warning_msg, UserWarning, stacklevel=3)
crs = "epsg:{}".format(srid)
return GeoDataFrame(df, crs=crs, geometry=geom_col)
@@ -176,7 +191,7 @@ def _read_postgis(
params=params,
chunksize=chunksize,
)
return _df_to_geodf(df, geom_col=geom_col, crs=crs)
return _df_to_geodf(df, geom_col=geom_col, crs=crs, con=con)
else:
# read data in chunks and return a generator
@@ -189,20 +204,9 @@ def _read_postgis(
params=params,
chunksize=chunksize,
)
return (_df_to_geodf(df, geom_col=geom_col, crs=crs) for df in df_generator)
def read_postgis(*args, **kwargs):
import warnings
warnings.warn(
"geopandas.io.sql.read_postgis() is intended for internal "
"use only, and will be deprecated. Use geopandas.read_postgis() instead.",
FutureWarning,
stacklevel=2,
)
return _read_postgis(*args, **kwargs)
return (
_df_to_geodf(df, geom_col=geom_col, crs=crs, con=con) for df in df_generator
)
def _get_geometry_type(gdf):
@@ -253,7 +257,7 @@ def _get_geometry_type(gdf):
def _get_srid_from_crs(gdf):
"""
Get EPSG code from CRS if available. If not, return -1.
Get EPSG code from CRS if available. If not, return 0.
"""
# Use geoalchemy2 default for srid
@@ -279,7 +283,7 @@ def _get_srid_from_crs(gdf):
warnings.warn(warning_msg, UserWarning, stacklevel=2)
if srid is None:
srid = -1
srid = 0
warnings.warn(warning_msg, UserWarning, stacklevel=2)
return srid
@@ -288,8 +292,8 @@ def _get_srid_from_crs(gdf):
def _convert_linearring_to_linestring(gdf, geom_name):
from shapely.geometry import LineString
# Todo: Use Pygeos function once it's implemented:
# https://github.com/pygeos/pygeos/issues/76
# Todo: Use shapely function once it's implemented:
# https://github.com/shapely/shapely/issues/1617
mask = gdf.geom_type == "LinearRing"
gdf.loc[mask, geom_name] = gdf.loc[mask, geom_name].apply(
@@ -300,26 +304,11 @@ def _convert_linearring_to_linestring(gdf, geom_name):
def _convert_to_ewkb(gdf, geom_name, srid):
"""Convert geometries to ewkb."""
if compat.USE_SHAPELY_20:
geoms = shapely.to_wkb(
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
hex=True,
include_srid=True,
)
elif compat.USE_PYGEOS:
from pygeos import set_srid, to_wkb
geoms = to_wkb(
set_srid(gdf[geom_name].values._data, srid=srid),
hex=True,
include_srid=True,
)
else:
from shapely.wkb import dumps
geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]]
geoms = shapely.to_wkb(
shapely.set_srid(gdf[geom_name].values._data, srid=srid),
hex=True,
include_srid=True,
)
# The gdf will warn that the geometry column doesn't hold in-memory geometries
# now that they are EWKB, so convert back to a regular dataframe to avoid warning
@@ -330,8 +319,8 @@ def _convert_to_ewkb(gdf, geom_name, srid):
def _psql_insert_copy(tbl, conn, keys, data_iter):
import io
import csv
import io
s_buf = io.StringIO()
writer = csv.writer(s_buf)
@@ -341,11 +330,16 @@ def _psql_insert_copy(tbl, conn, keys, data_iter):
columns = ", ".join('"{}"'.format(k) for k in keys)
dbapi_conn = conn.connection
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
tbl.table.schema, tbl.table.name, columns
)
with dbapi_conn.cursor() as cur:
sql = 'COPY "{}"."{}" ({}) FROM STDIN WITH CSV'.format(
tbl.table.schema, tbl.table.name, columns
)
cur.copy_expert(sql=sql, file=s_buf)
# Use psycopg method if it's available
if hasattr(cur, "copy") and callable(cur.copy):
with cur.copy(sql) as copy:
copy.write(s_buf.read())
else: # otherwise use psycopg2 method
cur.copy_expert(sql, s_buf)
def _write_postgis(
@@ -469,3 +463,11 @@ def _write_postgis(
dtype=dtype,
method=_psql_insert_copy,
)
@lru_cache
def _get_spatial_ref_sys_df(con, srid):
spatial_ref_sys_sql = (
f"SELECT srid, auth_name FROM spatial_ref_sys WHERE srid = {srid}"
)
return pd.read_sql(spatial_ref_sys_sql, con)