updated maps for redux

This commit is contained in:
2025-12-25 23:31:01 -08:00
parent 6342f8f78c
commit affae050a3
6 changed files with 59 additions and 22 deletions

View File

@@ -22,6 +22,38 @@ OUT = ROOT / 'out_tables'
OUT.mkdir(parents=True, exist_ok=True)
DB_NAME = 'colorado_spills'
SPILLS_TABLE_PRIMARY = 'spills_with_ruca'
SPILLS_TABLE_FALLBACK = 'spills_with_demographics_geog'
START_DATE = pd.Timestamp('2015-01-01')
END_DATE = pd.Timestamp('2024-12-31')
def filter_spills_by_date(df: pd.DataFrame) -> pd.DataFrame:
"""Filter rows to [START_DATE, END_DATE] using 'Initial Report Date' and 'Date of Discovery'.
Uses the earliest non-null of the two columns (when both exist).
If neither column exists, returns df unchanged.
"""
report_col = 'Initial Report Date'
discovery_col = 'Date of Discovery'
if report_col not in df.columns and discovery_col not in df.columns:
return df
report_dt = pd.to_datetime(df[report_col], errors='coerce') if report_col in df.columns else pd.NaT
disc_dt = pd.to_datetime(df[discovery_col], errors='coerce') if discovery_col in df.columns else pd.NaT
# earliest non-null
if isinstance(report_dt, pd.Series) and isinstance(disc_dt, pd.Series):
use_dt = report_dt.where(report_dt.notna(), disc_dt)
use_dt = use_dt.where(disc_dt.isna() | (use_dt <= disc_dt), disc_dt)
elif isinstance(report_dt, pd.Series):
use_dt = report_dt
else:
use_dt = disc_dt
mask = (use_dt >= START_DATE) & (use_dt <= END_DATE)
return df.loc[mask].copy()
def get_engine():
@@ -42,33 +74,38 @@ def load_spills() -> gpd.GeoDataFrame:
"""Load spills as a GeoDataFrame, preferring PostGIS, else CSV fallback."""
engine = get_engine()
if engine is not None:
# Try multiple geometry options from the PostGIS table
try_statements = [
("SELECT *, geom FROM spills_with_demographics_geog", 'geom'),
("SELECT *, geometry FROM spills_with_demographics_geog", 'geometry'),
("SELECT *, CAST(geog AS geometry) AS geom FROM spills_with_demographics_geog", 'geom'),
("SELECT *, ST_SetSRID(CAST(geog AS geometry), 4326) AS geom FROM spills_with_demographics_geog", 'geom'),
]
for sql, geom_col in try_statements:
# Try multiple geometry options from the PostGIS table (prefer spills_with_ruca)
for table in (SPILLS_TABLE_PRIMARY, SPILLS_TABLE_FALLBACK):
try_statements = [
(f"SELECT *, geom FROM {table}", 'geom'),
(f"SELECT *, geometry FROM {table}", 'geometry'),
(f"SELECT *, CAST(geog AS geometry) AS geom FROM {table}", 'geom'),
(f"SELECT *, ST_SetSRID(CAST(geog AS geometry), 4326) AS geom FROM {table}", 'geom'),
]
for sql, geom_col in try_statements:
try:
gdf = gpd.read_postgis(sql, engine, geom_col=geom_col)
# Ensure CRS
if gdf.crs is None:
gdf.set_crs('EPSG:4326', inplace=True)
gdf = gdf.loc[filter_spills_by_date(gdf).index]
return gdf
except Exception:
pass
# Fallback to pandas + lat/lon if present in the DB table
try:
gdf = gpd.read_postgis(sql, engine, geom_col=geom_col)
# Ensure CRS
if gdf.crs is None:
gdf.set_crs('EPSG:4326', inplace=True)
return gdf
df = pd.read_sql_table(table, engine)
df = filter_spills_by_date(df)
if {'Latitude', 'Longitude'}.issubset(df.columns):
return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
except Exception:
pass
# Fallback to pandas + lat/lon if present in the DB table
try:
df = pd.read_sql_table('spills_with_demographics_geog', engine)
if {'Latitude', 'Longitude'}.issubset(df.columns):
return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
except Exception:
pass
# CSV fallback
csv = ANALYSIS_DIR / 'spills_trimmed.csv'
df = pd.read_csv(csv)
df = filter_spills_by_date(df)
if not {'Latitude', 'Longitude'}.issubset(df.columns):
raise ValueError('Expected Latitude/Longitude columns in spills_trimmed.csv')
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')