updated maps for redux
This commit is contained in:
@@ -10874,7 +10874,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
"version": "3.13.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1077,7 +1077,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
"version": "3.13.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 1.2 MiB After Width: | Height: | Size: 1.2 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 476 KiB After Width: | Height: | Size: 476 KiB |
@@ -22,6 +22,38 @@ OUT = ROOT / 'out_tables'
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
DB_NAME = 'colorado_spills'
|
||||
SPILLS_TABLE_PRIMARY = 'spills_with_ruca'
|
||||
SPILLS_TABLE_FALLBACK = 'spills_with_demographics_geog'
|
||||
|
||||
START_DATE = pd.Timestamp('2015-01-01')
|
||||
END_DATE = pd.Timestamp('2024-12-31')
|
||||
|
||||
|
||||
def filter_spills_by_date(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Filter rows to [START_DATE, END_DATE] using 'Initial Report Date' and 'Date of Discovery'.
|
||||
|
||||
Uses the earliest non-null of the two columns (when both exist).
|
||||
If neither column exists, returns df unchanged.
|
||||
"""
|
||||
report_col = 'Initial Report Date'
|
||||
discovery_col = 'Date of Discovery'
|
||||
if report_col not in df.columns and discovery_col not in df.columns:
|
||||
return df
|
||||
|
||||
report_dt = pd.to_datetime(df[report_col], errors='coerce') if report_col in df.columns else pd.NaT
|
||||
disc_dt = pd.to_datetime(df[discovery_col], errors='coerce') if discovery_col in df.columns else pd.NaT
|
||||
|
||||
# earliest non-null
|
||||
if isinstance(report_dt, pd.Series) and isinstance(disc_dt, pd.Series):
|
||||
use_dt = report_dt.where(report_dt.notna(), disc_dt)
|
||||
use_dt = use_dt.where(disc_dt.isna() | (use_dt <= disc_dt), disc_dt)
|
||||
elif isinstance(report_dt, pd.Series):
|
||||
use_dt = report_dt
|
||||
else:
|
||||
use_dt = disc_dt
|
||||
|
||||
mask = (use_dt >= START_DATE) & (use_dt <= END_DATE)
|
||||
return df.loc[mask].copy()
|
||||
|
||||
|
||||
def get_engine():
|
||||
@@ -42,33 +74,38 @@ def load_spills() -> gpd.GeoDataFrame:
|
||||
"""Load spills as a GeoDataFrame, preferring PostGIS, else CSV fallback."""
|
||||
engine = get_engine()
|
||||
if engine is not None:
|
||||
# Try multiple geometry options from the PostGIS table
|
||||
try_statements = [
|
||||
("SELECT *, geom FROM spills_with_demographics_geog", 'geom'),
|
||||
("SELECT *, geometry FROM spills_with_demographics_geog", 'geometry'),
|
||||
("SELECT *, CAST(geog AS geometry) AS geom FROM spills_with_demographics_geog", 'geom'),
|
||||
("SELECT *, ST_SetSRID(CAST(geog AS geometry), 4326) AS geom FROM spills_with_demographics_geog", 'geom'),
|
||||
]
|
||||
for sql, geom_col in try_statements:
|
||||
# Try multiple geometry options from the PostGIS table (prefer spills_with_ruca)
|
||||
for table in (SPILLS_TABLE_PRIMARY, SPILLS_TABLE_FALLBACK):
|
||||
try_statements = [
|
||||
(f"SELECT *, geom FROM {table}", 'geom'),
|
||||
(f"SELECT *, geometry FROM {table}", 'geometry'),
|
||||
(f"SELECT *, CAST(geog AS geometry) AS geom FROM {table}", 'geom'),
|
||||
(f"SELECT *, ST_SetSRID(CAST(geog AS geometry), 4326) AS geom FROM {table}", 'geom'),
|
||||
]
|
||||
for sql, geom_col in try_statements:
|
||||
try:
|
||||
gdf = gpd.read_postgis(sql, engine, geom_col=geom_col)
|
||||
# Ensure CRS
|
||||
if gdf.crs is None:
|
||||
gdf.set_crs('EPSG:4326', inplace=True)
|
||||
gdf = gdf.loc[filter_spills_by_date(gdf).index]
|
||||
return gdf
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to pandas + lat/lon if present in the DB table
|
||||
try:
|
||||
gdf = gpd.read_postgis(sql, engine, geom_col=geom_col)
|
||||
# Ensure CRS
|
||||
if gdf.crs is None:
|
||||
gdf.set_crs('EPSG:4326', inplace=True)
|
||||
return gdf
|
||||
df = pd.read_sql_table(table, engine)
|
||||
df = filter_spills_by_date(df)
|
||||
if {'Latitude', 'Longitude'}.issubset(df.columns):
|
||||
return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback to pandas + lat/lon if present in the DB table
|
||||
try:
|
||||
df = pd.read_sql_table('spills_with_demographics_geog', engine)
|
||||
if {'Latitude', 'Longitude'}.issubset(df.columns):
|
||||
return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# CSV fallback
|
||||
csv = ANALYSIS_DIR / 'spills_trimmed.csv'
|
||||
df = pd.read_csv(csv)
|
||||
df = filter_spills_by_date(df)
|
||||
if not {'Latitude', 'Longitude'}.issubset(df.columns):
|
||||
raise ValueError('Expected Latitude/Longitude columns in spills_trimmed.csv')
|
||||
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
|
||||
|
||||
Reference in New Issue
Block a user