tables and maps
This commit is contained in:
249
scripts/build_publication_tables.py
Normal file
249
scripts/build_publication_tables.py
Normal file
@@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path('/home/dadams/Repos/colorado_spills')
|
||||
ANALYSIS_DIR = ROOT / 'analysis' / 'new analysis Aug 2025'
|
||||
OUT = ROOT / 'out_tables'
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Helpers
|
||||
|
||||
def read_csv_safely(path: Path) -> pd.DataFrame:
|
||||
if path.exists():
|
||||
return pd.read_csv(path)
|
||||
raise FileNotFoundError(str(path))
|
||||
|
||||
# -------------------------
|
||||
# Table 1: Data and Sample Overview
|
||||
# -------------------------
|
||||
|
||||
def build_table1():
|
||||
rows = []
|
||||
# Attempt to load sample sizes and exclusions
|
||||
raw_path = ANALYSIS_DIR / 'spills_with_trim_flag.csv'
|
||||
trimmed_path = ANALYSIS_DIR / 'spills_trimmed.csv'
|
||||
removed_path = ANALYSIS_DIR / 'spills_trimmed_removed.csv'
|
||||
|
||||
n_raw = read_csv_safely(raw_path).shape[0] if raw_path.exists() else None
|
||||
n_trimmed = read_csv_safely(trimmed_path).shape[0]
|
||||
n_removed = read_csv_safely(removed_path).shape[0] if removed_path.exists() else (n_raw - n_trimmed if n_raw else None)
|
||||
|
||||
rows.append({'Component': 'Primary data source', 'Details': 'COGCC Spill/Release Reports (Form 19), statewide Colorado'})
|
||||
rows.append({'Component': 'Units and variables', 'Details': 'Dates, volumes, locations, facility, county; outcome = report delay (days)'})
|
||||
rows.append({'Component': 'Temporal coverage', 'Details': '2015–2022 (pre-policy and post-2020 policy)'})
|
||||
rows.append({'Component': 'Geographic coverage', 'Details': 'Statewide; top counties: Weld, Garfield, Las Animas'})
|
||||
if n_raw:
|
||||
rows.append({'Component': 'Initial sample size', 'Details': f'N = {n_raw:,} spill reports'})
|
||||
rows.append({'Component': 'Analytical sample size', 'Details': f'N = {n_trimmed:,} after IQR trimming'})
|
||||
if n_removed is not None:
|
||||
rows.append({'Component': 'Exclusions', 'Details': f'{n_removed:,} observations removed by IQR/outlier rules'})
|
||||
|
||||
# Optional joins
|
||||
rows.append({'Component': 'Demographics join', 'Details': 'Census tract RUCA and demographics (ACS), spatial join to spill locations'})
|
||||
|
||||
t1 = pd.DataFrame(rows)
|
||||
t1.to_csv(OUT / 'table1_data_overview.csv', index=False)
|
||||
return t1
|
||||
|
||||
# -------------------------
|
||||
# Table 2: Variable Definitions and Summary Statistics
|
||||
# -------------------------
|
||||
|
||||
def build_table2():
|
||||
df = read_csv_safely(ANALYSIS_DIR / 'spills_trimmed.csv')
|
||||
# Ensure Period exists
|
||||
if 'Period' not in df.columns:
|
||||
df['Period'] = pd.to_datetime(df['Initial Report Date']).dt.year.map(lambda y: 'Before 2020' if y < 2020 else '2020 and After')
|
||||
|
||||
# Definitions
|
||||
definitions = [
|
||||
{'Variable': 'Report Delay (days)', 'Definition': 'Days between discovery and initial report; outcome variable', 'Type': 'Continuous'},
|
||||
{'Variable': 'Spill Type', 'Definition': 'Historical vs Recent spill classification', 'Type': 'Categorical'},
|
||||
{'Variable': 'Policy Period', 'Definition': 'Before 2020 vs 2020 and After (SB 19-181 implementation)', 'Type': 'Categorical'},
|
||||
{'Variable': 'Rurality', 'Definition': 'Rural, Suburban, Urban (RUCA-based)', 'Type': 'Categorical'},
|
||||
{'Variable': 'Median Household Income', 'Definition': 'Census tract ACS median income (USD)', 'Type': 'Continuous (optional)'},
|
||||
{'Variable': 'Percent Poverty', 'Definition': 'Census tract percent below poverty line', 'Type': 'Continuous (optional)'}
|
||||
]
|
||||
defs_df = pd.DataFrame(definitions)
|
||||
|
||||
# Summary stats
|
||||
# Outcome: median [IQR] preferred due to skew; also mean (SD)
|
||||
y = df['report_delay_full_days'].dropna()
|
||||
outcome_row = {
|
||||
'Variable': 'Report Delay (days)',
|
||||
'N': len(y),
|
||||
'Mean (SD)': f"{y.mean():.2f} ({y.std():.2f})",
|
||||
'Median [IQR]': f"{y.median():.2f} [{y.quantile(0.25):.2f}, {y.quantile(0.75):.2f}]"
|
||||
}
|
||||
|
||||
# Categorical stats
|
||||
def cat_summary(series: pd.Series, label: str):
|
||||
counts = series.value_counts(dropna=False)
|
||||
rows = []
|
||||
for k, v in counts.items():
|
||||
pct = 100 * v / len(series)
|
||||
rows.append({'Variable': f"{label}: {k}", 'N': int(v), 'Percent': f"{pct:.2f}%"})
|
||||
return rows
|
||||
|
||||
cat_rows = []
|
||||
cat_rows += cat_summary(df['Spill Type'], 'Spill Type') if 'Spill Type' in df.columns else []
|
||||
cat_rows += cat_summary(df['Period'], 'Period')
|
||||
cat_rows += cat_summary(df['rurality'], 'Rurality') if 'rurality' in df.columns else []
|
||||
|
||||
# Covariates
|
||||
demo_rows = []
|
||||
for col, label in [
|
||||
('median_household_income', 'Median Household Income'),
|
||||
('percent_poverty', 'Percent Poverty'),
|
||||
('percent_white', 'Percent White'),
|
||||
('percent_hispanic', 'Percent Hispanic'),
|
||||
('unemployment_rate', 'Unemployment Rate'),
|
||||
]:
|
||||
if col in df.columns:
|
||||
s = df[col].dropna()
|
||||
demo_rows.append({'Variable': label, 'N': len(s), 'Mean (SD)': f"{s.mean():.2f} ({s.std():.2f})", 'Median [IQR]': f"{s.median():.2f} [{s.quantile(0.25):.2f}, {s.quantile(0.75):.2f}]"})
|
||||
|
||||
summary_df = pd.DataFrame([outcome_row] + cat_rows + demo_rows)
|
||||
|
||||
# Save
|
||||
defs_df.to_csv(OUT / 'table2_variable_definitions.csv', index=False)
|
||||
summary_df.to_csv(OUT / 'table2_summary_statistics.csv', index=False)
|
||||
|
||||
# Combined view
|
||||
combined = defs_df.merge(summary_df[['Variable','N','Mean (SD)','Median [IQR]']], on='Variable', how='left')
|
||||
combined.to_csv(OUT / 'table2_definitions_and_summary.csv', index=False)
|
||||
return combined
|
||||
|
||||
# -------------------------
|
||||
# Table 3: Core Model Results (GLM) — contrasts
|
||||
# -------------------------
|
||||
|
||||
def build_table3():
|
||||
path = ANALYSIS_DIR / 'final_contrast_comparison_combined.csv'
|
||||
df = read_csv_safely(path)
|
||||
# Keep Poisson as primary model
|
||||
dfp = df[df['model'] == 'poisson'].copy()
|
||||
|
||||
# We have Period and rurality; compute formatted strings
|
||||
dfp['Delta (days)'] = dfp['poisson_median']
|
||||
dfp['95% CI'] = dfp.apply(lambda r: f"({r['poisson_ci_lower']:.2f}, {r['poisson_ci_upper']:.2f})", axis=1)
|
||||
dfp['p'] = dfp['poisson_p'].apply(lambda x: '<0.001' if x < 0.001 else f"{x:.3f}")
|
||||
dfp['Delta (hours)'] = dfp['poisson_median_hours']
|
||||
dfp['Hours 95% CI'] = dfp.apply(lambda r: f"({r['poisson_ci_lower_hours']:.2f}, {r['poisson_ci_upper_hours']:.2f})", axis=1)
|
||||
|
||||
order_period = {'Before 2020': 0, '2020 and After': 1}
|
||||
order_r = {'Rural': 0, 'Suburban': 1, 'Urban': 2}
|
||||
dfp['po'] = dfp['Period'].map(order_period)
|
||||
dfp['ro'] = dfp['rurality'].map(order_r)
|
||||
dfp = dfp.sort_values(['po','ro']).drop(columns=['po','ro'])
|
||||
|
||||
keep = ['Period','rurality','Delta (days)','95% CI','p','Delta (hours)','Hours 95% CI']
|
||||
out = dfp[keep]
|
||||
out.to_csv(OUT / 'table3_core_glm_contrasts.csv', index=False)
|
||||
return out
|
||||
|
||||
# -------------------------
|
||||
# Table 4: Predicted Means or Contrasts (predicted reporting delays)
|
||||
# -------------------------
|
||||
|
||||
def build_table4():
|
||||
path = ANALYSIS_DIR / 'poisson_predicted_groups_boot_final.csv'
|
||||
df = read_csv_safely(path)
|
||||
# Expect columns: spill_type, Period, rurality, predicted_mean, ci_low, ci_high (names may vary)
|
||||
# Attempt to infer
|
||||
col_map = {}
|
||||
for c in df.columns:
|
||||
lc = c.lower()
|
||||
if 'pred' in lc and ('mean' in lc or 'delay' in lc): col_map['pred'] = c
|
||||
if ('ci_low' in lc or 'lower' in lc): col_map['low'] = c
|
||||
if ('ci_high' in lc or 'upper' in lc): col_map['high'] = c
|
||||
# Required grouping fields
|
||||
gcols = []
|
||||
for name in ['spill_type','Spill Type']:
|
||||
if name in df.columns: gcols.append(name); break
|
||||
for name in ['Period']:
|
||||
if name in df.columns: gcols.append(name); break
|
||||
for name in ['rurality','Rurality']:
|
||||
if name in df.columns: gcols.append(name); break
|
||||
|
||||
if len(gcols) < 3 or not {'pred','low','high'} <= set(col_map):
|
||||
# Fall back to exporting raw for manual review
|
||||
df.to_csv(OUT / 'table4_predicted_means_raw.csv', index=False)
|
||||
return df
|
||||
|
||||
tidy = df[gcols + [col_map['pred'], col_map['low'], col_map['high']]].copy()
|
||||
tidy = tidy.rename(columns={gcols[0]:'Spill Type', gcols[1]:'Period', gcols[2]:'Rurality', col_map['pred']:'Predicted (days)', col_map['low']:'Lower', col_map['high']:'Upper'})
|
||||
tidy['95% CI'] = tidy.apply(lambda r: f"({r['Lower']:.2f}, {r['Upper']:.2f})", axis=1)
|
||||
out = tidy[['Spill Type','Period','Rurality','Predicted (days)','95% CI']].copy()
|
||||
|
||||
# Sort
|
||||
order_period = {'Before 2020': 0, '2020 and After': 1}
|
||||
order_r = {'Rural': 0, 'Suburban': 1, 'Urban': 2}
|
||||
out['po'] = out['Period'].map(order_period)
|
||||
out['ro'] = out['Rurality'].map(order_r)
|
||||
out = out.sort_values(['Spill Type','po','ro']).drop(columns=['po','ro'])
|
||||
|
||||
out.to_csv(OUT / 'table4_predicted_means.csv', index=False)
|
||||
return out
|
||||
|
||||
# -------------------------
|
||||
# Table 5: ITS or Robustness Checks
|
||||
# -------------------------
|
||||
|
||||
def build_table5():
|
||||
rows = []
|
||||
# ITS coefficients summary if available
|
||||
its_path = ANALYSIS_DIR / 'its_summary_with_boot.csv'
|
||||
if its_path.exists():
|
||||
its = pd.read_csv(its_path)
|
||||
# Expect columns: model/group identifiers + level change, slope change, CIs
|
||||
# Try to infer
|
||||
candidates = {
|
||||
'level': [c for c in its.columns if 'level' in c.lower() and ('change' in c.lower() or 'beta' in c.lower() or 'immediate' in c.lower())],
|
||||
'slope': [c for c in its.columns if 'slope' in c.lower() and ('change' in c.lower() or 'beta' in c.lower())],
|
||||
'low': [c for c in its.columns if 'low' in c.lower()],
|
||||
'high': [c for c in its.columns if 'high' in c.lower()],
|
||||
}
|
||||
# Build a minimal summary per group if possible
|
||||
key_cols = []
|
||||
for name in ['spill_type','Spill Type']:
|
||||
if name in its.columns: key_cols.append(name); break
|
||||
for name in ['rurality','Rurality']:
|
||||
if name in its.columns: key_cols.append(name); break
|
||||
if key_cols and candidates['level'] and candidates['slope']:
|
||||
summary = its[key_cols + [candidates['level'][0], candidates['slope'][0]]].copy()
|
||||
summary = summary.rename(columns={key_cols[0]:'Spill Type', key_cols[1]:'Rurality', candidates['level'][0]:'Level Change (days)', candidates['slope'][0]:'Slope Change (days/month)'})
|
||||
summary.to_csv(OUT / 'table5_its_summary.csv', index=False)
|
||||
rows.append({'Section':'ITS Summary','Details':'Saved table5_its_summary.csv with level and slope changes by group'})
|
||||
|
||||
# Robustness: Poisson vs NB differences from appendix_table2_model_comparison.csv
|
||||
cmp_path = ANALYSIS_DIR / 'appendix_table2_model_comparison.csv'
|
||||
if cmp_path.exists():
|
||||
cmp = pd.read_csv(cmp_path)
|
||||
# Expect columns: Period, Rurality, Poisson Δ (days), NB Δ (days), Difference (NB - Poisson), Poisson p, NB p
|
||||
keep_cols = [c for c in cmp.columns if 'Period' in c or 'Rurality' in c or 'Difference' in c or 'Poisson' in c or 'NB' in c]
|
||||
cmp[keep_cols].to_csv(OUT / 'table5_robustness_poisson_vs_nb.csv', index=False)
|
||||
rows.append({'Section':'Model Robustness','Details':'Saved table5_robustness_poisson_vs_nb.csv comparing Poisson vs NB'})
|
||||
|
||||
t5 = pd.DataFrame(rows) if rows else pd.DataFrame([{'Section':'Note','Details':'No ITS or robustness sources found'}])
|
||||
t5.to_csv(OUT / 'table5_overview.csv', index=False)
|
||||
return t5
|
||||
|
||||
|
||||
def main():
|
||||
t1 = build_table1()
|
||||
t2 = build_table2()
|
||||
t3 = build_table3()
|
||||
t4 = build_table4()
|
||||
t5 = build_table5()
|
||||
print('Built tables:')
|
||||
print(' -', OUT / 'table1_data_overview.csv')
|
||||
print(' -', OUT / 'table2_definitions_and_summary.csv')
|
||||
print(' -', OUT / 'table3_core_glm_contrasts.csv')
|
||||
print(' -', OUT / 'table4_predicted_means.csv')
|
||||
print(' -', OUT / 'table5_overview.csv')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
258
scripts/make_spill_map.py
Normal file
258
scripts/make_spill_map.py
Normal file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create a production-ready map of Colorado oil & gas spill locations for journal use.
|
||||
Outputs:
|
||||
- out_tables/spill_map_points.png (vector-like high-res)
|
||||
- out_tables/spill_map_hex.pdf (publication PDF)
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
from shapely.geometry import Point
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib_scalebar.scalebar import ScaleBar
|
||||
import matplotlib.patheffects as pe
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
ROOT = Path('/home/dadams/Repos/colorado_spills')
|
||||
ANALYSIS_DIR = ROOT / 'analysis' / 'new analysis Aug 2025'
|
||||
OUT = ROOT / 'out_tables'
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
DB_NAME = 'colorado_spills'
|
||||
|
||||
|
||||
def get_engine():
|
||||
user = os.getenv('DB_USER')
|
||||
password = os.getenv('DB_PASSWORD')
|
||||
host = os.getenv('DB_HOST')
|
||||
port = os.getenv('DB_PORT')
|
||||
if not all([user, password, host, port]):
|
||||
return None
|
||||
url = f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{DB_NAME}'
|
||||
try:
|
||||
return create_engine(url)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def load_spills() -> gpd.GeoDataFrame:
|
||||
"""Load spills as a GeoDataFrame, preferring PostGIS, else CSV fallback."""
|
||||
engine = get_engine()
|
||||
if engine is not None:
|
||||
# Try multiple geometry options from the PostGIS table
|
||||
try_statements = [
|
||||
("SELECT *, geom FROM spills_with_demographics_geog", 'geom'),
|
||||
("SELECT *, geometry FROM spills_with_demographics_geog", 'geometry'),
|
||||
("SELECT *, CAST(geog AS geometry) AS geom FROM spills_with_demographics_geog", 'geom'),
|
||||
("SELECT *, ST_SetSRID(CAST(geog AS geometry), 4326) AS geom FROM spills_with_demographics_geog", 'geom'),
|
||||
]
|
||||
for sql, geom_col in try_statements:
|
||||
try:
|
||||
gdf = gpd.read_postgis(sql, engine, geom_col=geom_col)
|
||||
# Ensure CRS
|
||||
if gdf.crs is None:
|
||||
gdf.set_crs('EPSG:4326', inplace=True)
|
||||
return gdf
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback to pandas + lat/lon if present in the DB table
|
||||
try:
|
||||
df = pd.read_sql_table('spills_with_demographics_geog', engine)
|
||||
if {'Latitude', 'Longitude'}.issubset(df.columns):
|
||||
return gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# CSV fallback
|
||||
csv = ANALYSIS_DIR / 'spills_trimmed.csv'
|
||||
df = pd.read_csv(csv)
|
||||
if not {'Latitude', 'Longitude'}.issubset(df.columns):
|
||||
raise ValueError('Expected Latitude/Longitude columns in spills_trimmed.csv')
|
||||
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['Longitude'], df['Latitude']), crs='EPSG:4326')
|
||||
return gdf
|
||||
|
||||
|
||||
def try_query_boundary(engine) -> gpd.GeoDataFrame | None:
|
||||
"""Attempt to find a Colorado boundary polygon from common table names/columns in PostGIS."""
|
||||
candidates = [
|
||||
# table, geom expression, where
|
||||
("cb_2018_us_state_500k", "geom", "stusps='CO' OR name ILIKE 'Colorado' OR statefp='08'"),
|
||||
("cb_2019_us_state_500k", "geom", "stusps='CO' OR name ILIKE 'Colorado' OR statefp='08'"),
|
||||
("tl_2020_us_state", "geom", "stusps='CO' OR name ILIKE 'Colorado' OR statefp='08'"),
|
||||
("us_states", "geom", "stusps='CO' OR name ILIKE 'Colorado' OR statefp='08'"),
|
||||
("states", "geom", "stusps='CO' OR name ILIKE 'Colorado' OR statefp='08'"),
|
||||
("colorado_boundary", "geom", "1=1"),
|
||||
("co_boundary", "geom", "1=1"),
|
||||
]
|
||||
alt_geoms = ["geom", "geometry", "ST_SetSRID(CAST(geog AS geometry), 4326)"]
|
||||
for table, geom_expr, where_clause in candidates:
|
||||
for gexpr in alt_geoms:
|
||||
sql = f"SELECT {gexpr} AS geom FROM {table} WHERE {where_clause}"
|
||||
try:
|
||||
gdf = gpd.read_postgis(sql, engine, geom_col='geom')
|
||||
if not gdf.empty:
|
||||
if gdf.crs is None:
|
||||
gdf.set_crs('EPSG:4326', inplace=True)
|
||||
return gdf[["geom"]].to_crs(4326)
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def try_query_counties(engine) -> gpd.GeoDataFrame | None:
|
||||
"""Attempt to fetch Colorado counties from PostGIS for boundary overlays."""
|
||||
candidates = [
|
||||
("cb_2018_us_county_500k", "geom", "statefp='08' OR stusps='CO'"),
|
||||
("cb_2019_us_county_500k", "geom", "statefp='08' OR stusps='CO'"),
|
||||
("tl_2020_us_county", "geom", "statefp='08' OR stusps='CO'"),
|
||||
("counties", "geom", "statefp='08' OR stusps='CO' OR state='CO' OR state_name ILIKE 'Colorado'"),
|
||||
("co_counties", "geom", "1=1"),
|
||||
("colorado_counties", "geom", "1=1"),
|
||||
]
|
||||
alt_geoms = ["geom", "geometry", "ST_SetSRID(CAST(geog AS geometry), 4326)"]
|
||||
for table, geom_expr, where_clause in candidates:
|
||||
for gexpr in alt_geoms:
|
||||
sql = f"SELECT {gexpr} AS geom FROM {table} WHERE {where_clause}"
|
||||
try:
|
||||
gdf = gpd.read_postgis(sql, engine, geom_col='geom')
|
||||
if not gdf.empty:
|
||||
if gdf.crs is None:
|
||||
gdf.set_crs('EPSG:4326', inplace=True)
|
||||
return gdf[["geom"]].to_crs(4326)
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def get_colorado_boundary(spills_gdf: gpd.GeoDataFrame) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame | None]:
|
||||
"""Return Colorado boundary and optional counties. Prefer PostGIS; fallback to convex-hull from points."""
|
||||
engine = get_engine()
|
||||
counties = None
|
||||
if engine is not None:
|
||||
boundary = try_query_boundary(engine)
|
||||
if boundary is not None and not boundary.empty:
|
||||
counties = try_query_counties(engine)
|
||||
return boundary, counties
|
||||
# Fallback: convex hull of points
|
||||
boundary_geom = spills_gdf.to_crs(3857).unary_union.convex_hull.buffer(30_000)
|
||||
boundary = gpd.GeoDataFrame(geometry=[boundary_geom], crs='EPSG:3857').to_crs(4326)
|
||||
return boundary, counties
|
||||
|
||||
|
||||
def add_north_arrow(ax, x=0.95, y=0.1, size=18):
|
||||
ax.annotate('N', xy=(x, y+0.06), xytext=(x, y), xycoords='axes fraction',
|
||||
ha='center', va='bottom', fontsize=size,
|
||||
fontweight='bold', color='black',
|
||||
path_effects=[pe.withStroke(linewidth=3, foreground='white')],
|
||||
arrowprops=dict(facecolor='black', width=5, headwidth=15, headlength=18, shrink=0.05,
|
||||
path_effects=[pe.withStroke(linewidth=3, foreground='white')]))
|
||||
|
||||
|
||||
def style_axes(ax):
|
||||
ax.set_axis_off()
|
||||
for spine in ax.spines.values():
|
||||
spine.set_visible(False)
|
||||
|
||||
|
||||
def map_points(spills: gpd.GeoDataFrame, boundary: gpd.GeoDataFrame, counties: gpd.GeoDataFrame | None):
|
||||
# Project to Web Mercator for plotting
|
||||
spills_3857 = spills.to_crs(3857)
|
||||
boundary_3857 = boundary.to_crs(3857)
|
||||
|
||||
# Figure aesthetics
|
||||
fig, ax = plt.subplots(figsize=(7, 9), dpi=300, constrained_layout=True)
|
||||
boundary_3857.plot(ax=ax, color='#f5f5f5', edgecolor='#888888', linewidth=0.8)
|
||||
if counties is not None and not counties.empty:
|
||||
counties_3857 = counties.to_crs(3857)
|
||||
counties_3857.boundary.plot(ax=ax, color='#aaaaaa', linewidth=0.3)
|
||||
|
||||
# Plot points with alpha and small size to avoid overplotting
|
||||
spills_3857.plot(ax=ax, markersize=1.2, alpha=0.35, color='#2c7fb8')
|
||||
|
||||
# Tighten to boundary
|
||||
minx, miny, maxx, maxy = boundary_3857.total_bounds
|
||||
ax.set_xlim(minx, maxx)
|
||||
ax.set_ylim(miny, maxy)
|
||||
|
||||
# Add scalebar (requires axis in meters)
|
||||
ax.add_artist(ScaleBar(1, units='m', location='lower left', box_alpha=0.7))
|
||||
|
||||
add_north_arrow(ax)
|
||||
style_axes(ax)
|
||||
# Dynamic title from available year fields
|
||||
title = 'Oil & Gas Spill Reports in Colorado'
|
||||
year_cols = [c for c in spills.columns if c.lower() in ('year', 'yr')]
|
||||
if year_cols:
|
||||
try:
|
||||
yrs = spills[year_cols[0]].dropna().astype(int)
|
||||
if not yrs.empty:
|
||||
title = f"Oil & Gas Spill Reports in Colorado, {yrs.min()}–{yrs.max()}"
|
||||
except Exception:
|
||||
pass
|
||||
ax.set_title(title, fontsize=12, pad=10)
|
||||
|
||||
out_png = OUT / 'spill_map_points.png'
|
||||
fig.savefig(out_png, dpi=300)
|
||||
plt.close(fig)
|
||||
return out_png
|
||||
|
||||
|
||||
def map_hex(spills: gpd.GeoDataFrame, boundary: gpd.GeoDataFrame, counties: gpd.GeoDataFrame | None):
|
||||
# Project to Web Mercator
|
||||
spills_3857 = spills.to_crs(3857)
|
||||
boundary_3857 = boundary.to_crs(3857)
|
||||
|
||||
# Build hexbin density using matplotlib hexbin on data coordinates
|
||||
x = spills_3857.geometry.x.values
|
||||
y = spills_3857.geometry.y.values
|
||||
|
||||
fig, ax = plt.subplots(figsize=(7, 9), dpi=300, constrained_layout=True)
|
||||
boundary_3857.plot(ax=ax, color='#f5f5f5', edgecolor='#888888', linewidth=0.8)
|
||||
if counties is not None and not counties.empty:
|
||||
counties_3857 = counties.to_crs(3857)
|
||||
counties_3857.boundary.plot(ax=ax, color='#aaaaaa', linewidth=0.3)
|
||||
|
||||
# Use hexbin; gridsize tuned to CO scale; mincnt to suppress isolated cells
|
||||
hb = ax.hexbin(x, y, gridsize=70, mincnt=3, cmap='viridis', alpha=0.9)
|
||||
cb = fig.colorbar(hb, ax=ax, fraction=0.028, pad=0.01)
|
||||
cb.set_label('Spill count')
|
||||
|
||||
# Scale bar and north arrow
|
||||
ax.add_artist(ScaleBar(1, units='m', location='lower left', box_alpha=0.7))
|
||||
add_north_arrow(ax)
|
||||
|
||||
# Tighten to boundary
|
||||
minx, miny, maxx, maxy = boundary_3857.total_bounds
|
||||
ax.set_xlim(minx, maxx)
|
||||
ax.set_ylim(miny, maxy)
|
||||
style_axes(ax)
|
||||
# Dynamic title from available year fields
|
||||
title = 'Spill Density (Hexbin)'
|
||||
year_cols = [c for c in spills.columns if c.lower() in ('year', 'yr')]
|
||||
if year_cols:
|
||||
try:
|
||||
yrs = spills[year_cols[0]].dropna().astype(int)
|
||||
if not yrs.empty:
|
||||
title = f"Spill Density (Hexbin), {yrs.min()}–{yrs.max()}"
|
||||
except Exception:
|
||||
pass
|
||||
ax.set_title(title, fontsize=12, pad=10)
|
||||
|
||||
out_pdf = OUT / 'spill_map_hex.pdf'
|
||||
fig.savefig(out_pdf)
|
||||
plt.close(fig)
|
||||
return out_pdf
|
||||
|
||||
|
||||
def main():
|
||||
spills = load_spills()
|
||||
boundary, counties = get_colorado_boundary(spills)
|
||||
p = map_points(spills, boundary, counties)
|
||||
h = map_hex(spills, boundary, counties)
|
||||
print(f"Saved: {p}")
|
||||
print(f"Saved: {h}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user