In [2]:
# Importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
In [3]:
## set directory
import os
os.chdir('/home/dadams/Repos/california_equity_git')
In [4]:
# read in the data
data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)
In [5]:
data.columns
Out[5]:
Index(['Project IDNumber', 'Reporting Cycle Name', 'Agency Name',
'Program Name', 'Program Description', 'Sub Program Name',
'Record Type', 'Project Name', 'Project Type', 'Project Description',
...
'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',
'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',
'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',
'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],
dtype='object', length=127)
In [6]:
import geopandas as gpd
# Load the shapefile
shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'
gdf = gpd.read_file(shapefile_path)
# Print the head of the GeoDataFrame
print(gdf.head())
Tract ZIP County ApproxLoc TotPop19 CIscore \
0 6.083002e+09 93454 Santa Barbara Santa Maria 4495 36.019653
1 6.083002e+09 93455 Santa Barbara Santa Maria 13173 37.030667
2 6.083002e+09 93454 Santa Barbara Santa Maria 2398 31.213140
3 6.083002e+09 93455 Santa Barbara Orcutt 4496 6.639331
4 6.083002e+09 93455 Santa Barbara Orcutt 4008 14.022852
CIscoreP Ozone OzoneP PM2_5 ... Elderly65 Hispanic \
0 69.162885 0.034190 10.566273 7.567724 ... 12.5028 68.9210
1 70.637922 0.035217 11.561917 7.624775 ... 5.3519 78.6229
2 61.069087 0.034190 10.566273 7.548835 ... 12.8857 65.7214
3 5.988401 0.036244 13.615432 7.660570 ... 14.4128 22.9537
4 23.121533 0.036244 13.615432 7.663210 ... 18.8872 33.4082
White AfricanAm NativeAm OtherMult Shape_Leng Shape_Area \
0 20.8899 0.4004 0.2670 1.3126 6999.357689 2.847611e+06
1 13.2240 2.5051 0.0000 0.9489 19100.578232 1.635292e+07
2 30.6088 0.9591 0.0000 2.1685 4970.985897 1.352329e+06
3 69.1948 0.9342 0.7117 2.5356 6558.956012 2.417717e+06
4 59.7804 0.6986 1.4721 1.3723 6570.368730 2.608422e+06
AAPI geometry
0 8.2091 POLYGON ((-39795.07 -341919.191, -38126.384 -3...
1 4.6990 POLYGON ((-39795.07 -341919.191, -39803.632 -3...
2 0.5421 POLYGON ((-38115.747 -341130.248, -38126.384 -...
3 3.6699 POLYGON ((-37341.662 -348530.437, -37252.307 -...
4 3.2685 POLYGON ((-39465.107 -348499.262, -38244.305 -...
[5 rows x 67 columns]
In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Basic cleaning
data['Date Operational'] = pd.to_datetime(data['Date Operational'])
data = data[
(data['Date Operational'] >= '2010-01-01') &
(data['Date Operational'] <= '2024-11-01')
].copy()
# Remove rows with no GGRF funding
data = data.dropna(subset=['Total Program GGRFFunding'])
# Add derived columns
data['Year'] = data['Date Operational'].dt.year
data['is_multi_county'] = data['County'].str.contains(',', na=False)
data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1
# Quick validation
print(f"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B")
print(f"Number of projects: {len(data)}")
Total GGRF Funding: $8.13B Number of projects: 131428
In [8]:
# Temporal analysis of GGRF funding
temporal = data.groupby('Year').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean',
'is_multi_county': ['count', 'mean'],
'partnership_size': 'mean'
}).round(2)
# Visualize key metrics
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
# Projects per year
ax1.plot(temporal.index, temporal[('Total Program GGRFFunding', 'count')],
marker='o', linewidth=2)
ax1.set_title('Number of GGRF Projects by Year')
ax1.grid(True, alpha=0.3)
# Average funding per project
ax2.plot(temporal.index, temporal[('Total Program GGRFFunding', 'mean')]/1e6,
marker='o', linewidth=2)
ax2.set_title('Average GGRF Funding per Project (Millions $)')
ax2.grid(True, alpha=0.3)
# DAC benefit rate
ax3.plot(temporal.index, temporal[('Is Benefit Disadvantaged Communities', 'mean')],
marker='o', linewidth=2)
ax3.set_title('DAC Benefit Rate')
ax3.grid(True, alpha=0.3)
# Multi-county projects
ax4.plot(temporal.index, temporal[('partnership_size', 'mean')],
marker='o', linewidth=2)
ax4.set_title('Average Number of Partner Counties')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print("Key metrics by period:")
print(temporal)
Key metrics by period:
Total Program GGRFFunding \
count sum mean
Year
2011 1 11500 11500.00
2012 98 2328417 23759.36
2013 3379 50726172 15012.18
2014 7281 76042854 10444.01
2015 6235 61034095 9788.95
2016 8001 105685277 13209.01
2017 12745 368260901 28894.54
2018 18071 641426028 35494.77
2019 21131 1131373505 53540.94
2020 18281 1555581678 85092.81
2021 15957 1147697486 71924.39
2022 12906 1187728536 92029.18
2023 6825 1114799921 163340.65
2024 517 686268968 1327406.13
Is Benefit Disadvantaged Communities is_multi_county \
mean count mean
Year
2011 1.00 1 0.00
2012 0.29 98 0.00
2013 0.28 3379 0.00
2014 0.40 7281 0.00
2015 0.48 6235 0.01
2016 0.59 8001 0.00
2017 0.45 12745 0.00
2018 0.44 18071 0.00
2019 0.35 21131 0.00
2020 0.30 18281 0.01
2021 0.27 15957 0.01
2022 0.23 12906 0.01
2023 0.00 6825 0.01
2024 0.00 517 0.07
partnership_size
mean
Year
2011 1.00
2012 1.00
2013 1.00
2014 1.00
2015 1.02
2016 1.00
2017 1.01
2018 1.01
2019 1.01
2020 1.06
2021 1.02
2022 1.03
2023 1.03
2024 1.32
In [9]:
print("2024 Projects by Program:")
print(data[data['Year'] == 2024].groupby('Program Name').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean']
}).round(2))
print("\nLargest 2024 Projects:")
print(data[data['Year'] == 2024].nlargest(5, 'Total Program GGRFFunding')[
['Program Name', 'County', 'Total Program GGRFFunding', 'Date Operational']
])
2024 Projects by Program:
Total Program GGRFFunding \
count
Program Name
Affordable Housing and Sustainable Communities ... 8
Climate Adaptation and Resiliency Program 1
Community Air Protection 220
Fire Prevention Program 12
Fluorinated Gases Emission Reduction Incentives 15
Food Production Investment Program 30
Forest Carbon Plan Implementation 5
Forest Health Program 20
Low Carbon Transit Operations Program 18
Low Carbon Transportation 4
Safe and Affordable Drinking Water Fund 10
Transformative Climate Communities 10
Transit and Intercity Rail Capital Program 29
Urban and Community Forestry Program 132
Waste Diversion 1
Wetlands and Watershed Restoration 2
sum mean
Program Name
Affordable Housing and Sustainable Communities ... 176615877 22076984.62
Climate Adaptation and Resiliency Program 299000 299000.00
Community Air Protection 80955408 367979.13
Fire Prevention Program 7806649 650554.08
Fluorinated Gases Emission Reduction Incentives 1000001 66666.73
Food Production Investment Program 70824290 2360809.67
Forest Carbon Plan Implementation 1108131 221626.20
Forest Health Program 57230331 2861516.55
Low Carbon Transit Operations Program 11165512 620306.22
Low Carbon Transportation 10744732 2686183.00
Safe and Affordable Drinking Water Fund 10457866 1045786.60
Transformative Climate Communities 38277301 3827730.10
Transit and Intercity Rail Capital Program 189696000 6541241.38
Urban and Community Forestry Program 18561997 140621.19
Waste Diversion 3950527 3950527.00
Wetlands and Watershed Restoration 7575346 3787673.00
Largest 2024 Projects:
Program Name County \
117879 Transit and Intercity Rail Capital Program Alameda
90922 Transit and Intercity Rail Capital Program Los Angeles
136661 Affordable Housing and Sustainable Communities... Los Angeles
141400 Affordable Housing and Sustainable Communities... San Francisco
100763 Affordable Housing and Sustainable Communities... San Francisco
Total Program GGRFFunding Date Operational
117879 107100000 2024-03-01
90922 40000000 2024-09-23
136661 29889806 2024-04-01
141400 29269952 2024-08-01
100763 25424799 2024-01-01
In [10]:
# Compare 2023 vs 2024 by program
years_comparison = data[data['Year'].isin([2023, 2024])].groupby(['Year', 'Program Name']).agg({
'Total Program GGRFFunding': ['count', 'sum'],
'Is Benefit Disadvantaged Communities': 'mean'
}).round(2)
print("2023 vs 2024 Program Comparison:")
print(years_comparison)
# Calculate percent changes in key metrics
print("\nPercent Changes 2023-2024:")
metrics_2023 = data[data['Year'] == 2023].agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean'
})
metrics_2024 = data[data['Year'] == 2024].agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean'
})
pct_change = ((metrics_2024 - metrics_2023) / metrics_2023 * 100).round(2)
print(pct_change)
2023 vs 2024 Program Comparison:
Total Program GGRFFunding \
count
Year Program Name
2023 Affordable Housing and Sustainable Communities ... 20
Climate Adaptation and Resiliency Program 6
Climate Change Research Program 3
Climate Ready Program 4
Climate Resilience Planning 36
Climate Smart Agriculture 363
Community Air Protection 1926
Fire Prevention Program 11
Food Production Investment Program 20
Forest Carbon Plan Implementation 83
Forest Health Program 27
Funding Agricultural Replacement Measures for E... 539
Low Carbon Transit Operations Program 99
Low Carbon Transportation 3187
Low-Income Weatherization Program 26
SB 1383 Local Assistance Grant Program 12
Safe and Affordable Drinking Water Fund 22
Sustainable Agricultural Lands Conservation Pro... 15
Training and Workforce Development Program 136
Transformative Climate Communities 142
Transit and Intercity Rail Capital Program 14
Urban Greening Program 42
Waste Diversion 18
Water-Energy Efficiency 51
Wetlands and Watershed Restoration 1
Woodsmoke Reduction Program 22
2024 Affordable Housing and Sustainable Communities ... 8
Climate Adaptation and Resiliency Program 1
Community Air Protection 220
Fire Prevention Program 12
Fluorinated Gases Emission Reduction Incentives 15
Food Production Investment Program 30
Forest Carbon Plan Implementation 5
Forest Health Program 20
Low Carbon Transit Operations Program 18
Low Carbon Transportation 4
Safe and Affordable Drinking Water Fund 10
Transformative Climate Communities 10
Transit and Intercity Rail Capital Program 29
Urban and Community Forestry Program 132
Waste Diversion 1
Wetlands and Watershed Restoration 2
\
sum
Year Program Name
2023 Affordable Housing and Sustainable Communities ... 334283890
Climate Adaptation and Resiliency Program 3374200
Climate Change Research Program 396119
Climate Ready Program 4368244
Climate Resilience Planning 5041046
Climate Smart Agriculture 43068601
Community Air Protection 45727103
Fire Prevention Program 9456431
Food Production Investment Program 42815128
Forest Carbon Plan Implementation 22524961
Forest Health Program 37902075
Funding Agricultural Replacement Measures for E... 40805691
Low Carbon Transit Operations Program 114507056
Low Carbon Transportation 169989768
Low-Income Weatherization Program 4226240
SB 1383 Local Assistance Grant Program 5969666
Safe and Affordable Drinking Water Fund 11621901
Sustainable Agricultural Lands Conservation Pro... 24711311
Training and Workforce Development Program 7332437
Transformative Climate Communities 40263894
Transit and Intercity Rail Capital Program 118568000
Urban Greening Program 26722800
Waste Diversion 213843
Water-Energy Efficiency 73165
Wetlands and Watershed Restoration 743216
Woodsmoke Reduction Program 93135
2024 Affordable Housing and Sustainable Communities ... 176615877
Climate Adaptation and Resiliency Program 299000
Community Air Protection 80955408
Fire Prevention Program 7806649
Fluorinated Gases Emission Reduction Incentives 1000001
Food Production Investment Program 70824290
Forest Carbon Plan Implementation 1108131
Forest Health Program 57230331
Low Carbon Transit Operations Program 11165512
Low Carbon Transportation 10744732
Safe and Affordable Drinking Water Fund 10457866
Transformative Climate Communities 38277301
Transit and Intercity Rail Capital Program 189696000
Urban and Community Forestry Program 18561997
Waste Diversion 3950527
Wetlands and Watershed Restoration 7575346
Is Benefit Disadvantaged Communities
mean
Year Program Name
2023 Affordable Housing and Sustainable Communities ... 0.00
Climate Adaptation and Resiliency Program 0.00
Climate Change Research Program 0.00
Climate Ready Program 0.00
Climate Resilience Planning 0.00
Climate Smart Agriculture 0.00
Community Air Protection 0.00
Fire Prevention Program 0.00
Food Production Investment Program 0.00
Forest Carbon Plan Implementation 0.00
Forest Health Program 0.00
Funding Agricultural Replacement Measures for E... 0.00
Low Carbon Transit Operations Program 0.00
Low Carbon Transportation 0.00
Low-Income Weatherization Program 0.00
SB 1383 Local Assistance Grant Program 0.00
Safe and Affordable Drinking Water Fund 0.00
Sustainable Agricultural Lands Conservation Pro... 0.00
Training and Workforce Development Program 0.00
Transformative Climate Communities 0.00
Transit and Intercity Rail Capital Program 0.36
Urban Greening Program 0.00
Waste Diversion 0.00
Water-Energy Efficiency 0.06
Wetlands and Watershed Restoration 0.00
Woodsmoke Reduction Program 0.00
2024 Affordable Housing and Sustainable Communities ... 0.00
Climate Adaptation and Resiliency Program 0.00
Community Air Protection 0.00
Fire Prevention Program 0.00
Fluorinated Gases Emission Reduction Incentives 0.00
Food Production Investment Program 0.00
Forest Carbon Plan Implementation 0.00
Forest Health Program 0.00
Low Carbon Transit Operations Program 0.00
Low Carbon Transportation 0.00
Safe and Affordable Drinking Water Fund 0.00
Transformative Climate Communities 0.00
Transit and Intercity Rail Capital Program 0.00
Urban and Community Forestry Program 0.00
Waste Diversion 0.00
Wetlands and Watershed Restoration 0.00
Percent Changes 2023-2024:
Total Program GGRFFunding Is Benefit Disadvantaged Communities
count -92.42 NaN
sum -38.44 NaN
mean 712.66 -100.0
In [11]:
# Filter out Low Carbon Transportation
data_filtered = data[data['Program Name'] != 'Low Carbon Transportation'].copy()
# Recalculate temporal analysis
temporal_filtered = data_filtered.groupby('Year').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean',
'is_multi_county': ['count', 'mean'],
'partnership_size': 'mean'
}).round(2)
# Compare 2023-2024 without LCT
years_comparison = data_filtered[data_filtered['Year'].isin([2023, 2024])].groupby(['Year', 'Program Name']).agg({
'Total Program GGRFFunding': ['count', 'sum'],
'Is Benefit Disadvantaged Communities': 'mean'
}).round(2)
print("2023-2024 Changes (excluding Low Carbon Transportation):")
changes = pd.DataFrame({
'2023': temporal_filtered.loc[2023],
'2024': temporal_filtered.loc[2024]
})
print(changes)
2023-2024 Changes (excluding Low Carbon Transportation):
2023 2024
Total Program GGRFFunding count 3.638000e+03 5.130000e+02
sum 9.448102e+08 6.755242e+08
mean 2.597059e+05 1.316811e+06
Is Benefit Disadvantaged Communities mean 0.000000e+00 0.000000e+00
is_multi_county count 3.638000e+03 5.130000e+02
mean 2.000000e-02 7.000000e-02
partnership_size mean 1.050000e+00 1.320000e+00
In [12]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
# Projects count
ax1.plot(temporal_filtered.index,
temporal_filtered[('Total Program GGRFFunding', 'count')],
marker='o', linewidth=2)
ax1.set_title('Number of Projects (Excluding LCT)')
ax1.grid(True, alpha=0.3)
# Average funding
ax2.plot(temporal_filtered.index,
temporal_filtered[('Total Program GGRFFunding', 'mean')]/1e6,
marker='o', linewidth=2)
ax2.set_title('Average Project Funding (Millions $)')
ax2.grid(True, alpha=0.3)
# Total funding
ax3.plot(temporal_filtered.index,
temporal_filtered[('Total Program GGRFFunding', 'sum')]/1e9,
marker='o', linewidth=2)
ax3.set_title('Total GGRF Funding (Billions $)')
ax3.grid(True, alpha=0.3)
# Partnership size
ax4.plot(temporal_filtered.index,
temporal_filtered[('partnership_size', 'mean')],
marker='o', linewidth=2)
ax4.set_title('Average Number of Partner Counties')
ax4.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
- Project Volume Evolution
- Growth phase: 2014-2019 (from ~0 to 5000+ projects)
- Plateau: 2019-2022 (~4000-5000 projects)
- Sharp decline: 2023-2024 (down to ~500 projects)
- Average Project Size
- Relatively stable 2014-2023 ($0.1-0.3M per project)
- Dramatic increase in 2024 (to ~$1.3M per project)
- Suggests shift to fewer but larger projects
- Total GGRF Funding
- Steady increase: 2014-2020 (reaching ~$1.1B)
- Recent decline: 2020-2024 (down to ~$0.67B)
- More stable pattern than project counts
- Partnership Trends
- Generally stable at 1.0-1.1 partners until 2020
- Spike in 2020 (~1.24 partners)
- New peak in 2024 (~1.32 partners)
- Suggests increasing regional collaboration
In [13]:
# 1. Program Scale Analysis
program_scale = data_filtered.groupby('Program Name').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean',
'Total Project GHGReductions': 'sum'
}).round(2)
# Flatten column names
program_scale.columns = ['project_count', 'total_funding', 'avg_funding',
'dac_rate', 'total_ghg']
# Calculate GHG efficiency
program_scale['ghg_per_dollar'] = program_scale['total_ghg'] / program_scale['total_funding']
# Categorize programs by size
def categorize_program_size(mean_funding):
if mean_funding > 10e6: # 10M
return 'Mega'
elif mean_funding > 1e6: # 1M
return 'Large'
elif mean_funding > 500e3: # 500K
return 'Medium'
else:
return 'Small'
program_scale['size_category'] = program_scale['avg_funding'].apply(categorize_program_size)
print("Program Scale Distribution:")
print(program_scale['size_category'].value_counts())
# 2. Geographic Analysis
geographic_dist = data_filtered.groupby('County').agg({
'Total Program GGRFFunding': ['count', 'sum'],
'Is Benefit Disadvantaged Communities': 'mean',
'Total Project GHGReductions': 'sum'
})
geographic_dist.columns = ['project_count', 'total_funding', 'dac_rate', 'total_ghg']
# Calculate concentration metrics
total_funding = geographic_dist['total_funding'].sum()
top_5_counties = geographic_dist['total_funding'].nlargest(5)
concentration = (top_5_counties.sum() / total_funding) * 100
print("\nGeographic Concentration:")
print(f"Top 5 counties account for {concentration:.1f}% of funding")
# 3. Print key findings
print("\nProgram Categories by Total Funding (Billions $):")
size_summary = program_scale.groupby('size_category')['total_funding'].sum().sort_values(ascending=False)/1e9
print(size_summary.round(2))
# Show largest programs
print("\nLargest Programs (by total funding):")
print(program_scale.nlargest(5, 'total_funding')[
['total_funding', 'project_count', 'avg_funding', 'dac_rate', 'ghg_per_dollar']
].round(2))
Program Scale Distribution:
size_category
Small 21
Large 11
Medium 5
Mega 1
Name: count, dtype: int64
Geographic Concentration:
Top 5 counties account for 39.3% of funding
Program Categories by Total Funding (Billions $):
size_category
Large 2.19
Small 2.00
Mega 1.19
Medium 0.78
Name: total_funding, dtype: float64
Largest Programs (by total funding):
total_funding \
Program Name
Affordable Housing and Sustainable Communities ... 1192203125
Low Carbon Transit Operations Program 775906434
Transit and Intercity Rail Capital Program 771556000
Fire Prevention Program 596274123
Community Air Protection 529523228
project_count \
Program Name
Affordable Housing and Sustainable Communities ... 93
Low Carbon Transit Operations Program 766
Transit and Intercity Rail Capital Program 135
Fire Prevention Program 600
Community Air Protection 5187
avg_funding dac_rate \
Program Name
Affordable Housing and Sustainable Communities ... 12819388.44 0.16
Low Carbon Transit Operations Program 1012932.68 0.09
Transit and Intercity Rail Capital Program 5715229.63 0.19
Fire Prevention Program 993790.20 0.00
Community Air Protection 102086.61 0.00
ghg_per_dollar
Program Name
Affordable Housing and Sustainable Communities ... 0.00
Low Carbon Transit Operations Program 0.01
Transit and Intercity Rail Capital Program 0.01
Fire Prevention Program 0.00
Community Air Protection 0.00
- Program Scale Distribution
- Most programs (21) are "Small" scale
- 11 "Large" programs
- Only 1 "Mega" program (Affordable Housing at $1.19B)
- More balanced distribution than when including transportation subsidies
- Funding Allocation
- Large programs: $2.19B total
- Small programs: $2.00B total
- Mega programs: $1.19B total
- Medium programs: $0.78B total
- Total GGRF funding: ~$6.16B
- Top Programs by Funding
- Affordable Housing: $1.19B (93 projects)
- Low Carbon Transit: $776M (766 projects)
- Transit/Rail Capital: $772M (135 projects)
- Fire Prevention: $596M (600 projects)
- Community Air Protection: $530M (5,187 projects)
- Program Characteristics
- Wide range in project counts (93 to 5,187)
- Average project sizes vary significantly:
- Affordable Housing: $12.8M/project
- Transit/Rail: $5.7M/project
- Community Air Protection: $102K/project
- Geographic Distribution
- Less concentrated than before
- Top 5 counties: 39.3% of funding (vs. previous 75.6%)
- Suggests more equitable geographic distribution
In [14]:
# 1. DAC Benefits by Program Size
dac_by_size = program_scale.groupby('size_category').agg({
'dac_rate': ['mean', 'min', 'max'],
'total_funding': 'sum',
'project_count': 'sum'
}).round(3)
# 2. Geographic Analysis
# Add region classification
def classify_region(county):
if isinstance(county, str): # Handle multi-county cases
counties = county.split(',')
county = counties[0].strip()
urban_counties = ['Los Angeles', 'San Francisco', 'Alameda', 'San Diego', 'Orange']
central_valley = ['Fresno', 'Kern', 'Kings', 'Madera', 'Merced', 'San Joaquin', 'Stanislaus', 'Tulare']
if county in urban_counties:
return 'Urban'
elif county in central_valley:
return 'Central Valley'
else:
return 'Other'
geographic_dist['region'] = geographic_dist.index.map(classify_region)
regional_metrics = geographic_dist.groupby('region').agg({
'total_funding': 'sum',
'project_count': 'sum',
'dac_rate': 'mean',
'total_ghg': 'sum'
}).round(3)
# 3. Project Size vs GHG Efficiency
# Create scatter plot
plt.figure(figsize=(12, 8))
plt.scatter(program_scale['avg_funding']/1e6,
program_scale['ghg_per_dollar'],
alpha=0.6)
plt.xlabel('Average Project Size (Millions $)')
plt.ylabel('GHG Reduction per Dollar')
plt.title('Project Size vs. GHG Efficiency')
# 4. Multi-county Analysis
multi_county_data = data_filtered[data_filtered['County'].str.contains(',', na=False)]
single_county_data = data_filtered[~data_filtered['County'].str.contains(',', na=False)]
multi_vs_single = pd.DataFrame({
'Multi-County': {
'project_count': len(multi_county_data),
'total_funding': multi_county_data['Total Program GGRFFunding'].sum(),
'avg_funding': multi_county_data['Total Program GGRFFunding'].mean(),
'dac_rate': multi_county_data['Is Benefit Disadvantaged Communities'].mean(),
'ghg_per_dollar': (multi_county_data['Total Project GHGReductions'].sum() /
multi_county_data['Total Program GGRFFunding'].sum())
},
'Single-County': {
'project_count': len(single_county_data),
'total_funding': single_county_data['Total Program GGRFFunding'].sum(),
'avg_funding': single_county_data['Total Program GGRFFunding'].mean(),
'dac_rate': single_county_data['Is Benefit Disadvantaged Communities'].mean(),
'ghg_per_dollar': (single_county_data['Total Project GHGReductions'].sum() /
single_county_data['Total Program GGRFFunding'].sum())
}
})
print("1. DAC Benefits by Program Size:")
print(dac_by_size)
print("\n2. Regional Distribution:")
print(regional_metrics)
print("\n4. Multi-County vs Single-County Projects:")
print(multi_vs_single.round(3))
plt.show()
1. DAC Benefits by Program Size:
dac_rate total_funding project_count
mean min max sum sum
size_category
Large 0.087 0.00 0.67 2185763970 1314
Medium 0.036 0.00 0.18 779274523 831
Mega 0.160 0.16 0.16 1192203125 93
Small 0.084 0.00 0.92 2003043460 28913
2. Regional Distribution:
total_funding project_count dac_rate total_ghg
region
Central Valley 1387932197 14496 0.104 25320641
Other 2289435192 9195 0.101 24839824
Urban 2482917689 7460 0.161 15727243
4. Multi-County vs Single-County Projects:
Multi-County Single-County
project_count 7.520000e+02 3.039900e+04
total_funding 5.210746e+08 5.639210e+09
avg_funding 6.929184e+05 1.855064e+05
dac_rate 1.090000e-01 2.730000e-01
ghg_per_dollar 1.200000e-02 1.100000e-02
- Efficiency Sweet Spot
- Highest GHG efficiency (0.12-0.13 reduction per dollar) occurs in projects around $2M
- Secondary peak (0.06) in smaller projects under $1M
- Suggests optimal scale for GHG reduction isn't necessarily larger projects
- Size-Efficiency Relationship
- No clear linear relationship
- Most projects cluster in lower ranges (both size and efficiency)
- Largest projects ($12M+) show relatively low GHG efficiency
- Suggests diminishing returns as projects scale up
- Distribution Pattern
- Dense cluster of projects under $2M with varying efficiency
- Sparse distribution in higher project sizes
- Few projects achieve both large scale and high efficiency
Let's identify:
- What programs occupy that sweet spot around $2M with high efficiency?
- What characteristics do those efficient projects share?
- Are there geographic patterns in the more efficient projects?
In [15]:
# Let's identify the most GHG-efficient projects and analyze their characteristics
# Add efficiency metric to original program data
program_scale['efficiency_tier'] = pd.qcut(program_scale['ghg_per_dollar'],
q=4,
labels=['Low', 'Medium-Low', 'Medium-High', 'High'])
print("1. Most GHG-Efficient Programs:")
high_efficiency = program_scale[program_scale['efficiency_tier'] == 'High'].sort_values('ghg_per_dollar', ascending=False)
print(high_efficiency[['total_funding', 'avg_funding', 'ghg_per_dollar', 'dac_rate']].round(3))
# Analyze characteristics of high-efficiency programs
print("\n2. Characteristics of High-Efficiency Programs:")
print("\nSize Distribution:")
print(high_efficiency['size_category'].value_counts())
# Geographic analysis of high-efficiency programs
print("\n3. Geographic Distribution of High-Efficiency Projects:")
# Filter original data for these programs
high_eff_projects = data_filtered[
data_filtered['Program Name'].isin(high_efficiency.index)
]
geographic_dist = high_eff_projects.groupby('County').agg({
'Total Program GGRFFunding': ['count', 'sum'],
'Total Project GHGReductions': 'sum'
}).round(2)
# Calculate efficiency by region
geographic_dist['ghg_per_dollar'] = (
geographic_dist[('Total Project GHGReductions', 'sum')] /
geographic_dist[('Total Program GGRFFunding', 'sum')]
)
print(geographic_dist.nlargest(5, 'ghg_per_dollar'))
# Create visualization of efficiency patterns
plt.figure(figsize=(15, 8))
plt.subplot(1, 2, 1)
# Size vs Efficiency for high performers
plt.scatter(high_efficiency['avg_funding']/1e6,
high_efficiency['ghg_per_dollar'],
alpha=0.6,
s=100)
plt.xlabel('Average Project Size (Millions $)')
plt.ylabel('GHG Reduction per Dollar')
plt.title('High-Efficiency Programs: Size vs Performance')
# Add program labels
for idx, row in high_efficiency.iterrows():
plt.annotate(idx[:20] + '...' if len(idx) > 20 else idx,
(row['avg_funding']/1e6, row['ghg_per_dollar']),
xytext=(5, 5), textcoords='offset points')
plt.subplot(1, 2, 2)
# DAC Rate vs Efficiency
plt.scatter(high_efficiency['dac_rate'],
high_efficiency['ghg_per_dollar'],
alpha=0.6,
s=100)
plt.xlabel('DAC Benefit Rate')
plt.ylabel('GHG Reduction per Dollar')
plt.title('High-Efficiency Programs: Equity vs Performance')
plt.tight_layout()
plt.show()
1. Most GHG-Efficient Programs:
total_funding \
Program Name
Sustainable Agricultural Lands Conservation Pro... 122424176
Climate Smart Agriculture 338161549
Fluorinated Gases Emission Reduction Incentives 1000001
Low-Carbon Fuels Production Program 12500000
Food Production Investment Program 117791478
Wetlands and Watershed Restoration 16171301
Forest Health Program 207621340
Waste Diversion 118571417
Renewable Energy for Agriculture Program 9500000
Woodsmoke Reduction Program 7895909
avg_funding \
Program Name
Sustainable Agricultural Lands Conservation Pro... 1275251.83
Climate Smart Agriculture 257941.68
Fluorinated Gases Emission Reduction Incentives 66666.73
Low-Carbon Fuels Production Program 3125000.00
Food Production Investment Program 2103419.25
Wetlands and Watershed Restoration 2021412.62
Forest Health Program 1701814.26
Waste Diversion 463169.60
Renewable Energy for Agriculture Program 211111.11
Woodsmoke Reduction Program 8233.48
ghg_per_dollar dac_rate
Program Name
Sustainable Agricultural Lands Conservation Pro... 0.123 0.01
Climate Smart Agriculture 0.062 0.05
Fluorinated Gases Emission Reduction Incentives 0.037 0.00
Low-Carbon Fuels Production Program 0.036 0.00
Food Production Investment Program 0.025 0.00
Wetlands and Watershed Restoration 0.025 0.00
Forest Health Program 0.023 0.00
Waste Diversion 0.016 0.09
Renewable Energy for Agriculture Program 0.013 0.00
Woodsmoke Reduction Program 0.013 0.00
2. Characteristics of High-Efficiency Programs:
Size Distribution:
size_category
Large 5
Small 5
Name: count, dtype: int64
3. Geographic Distribution of High-Efficiency Projects:
Total Program GGRFFunding \
count sum
County
Sierra 5 485667
Mono 10 2946879
Lassen 28 5510913
Calaveras 32 9678108
Los Angeles, Solano 1 212629
Total Project GHGReductions ghg_per_dollar
sum
County
Sierra 730033 1.503155
Mono 1570341 0.532883
Lassen 2089977 0.379243
Calaveras 1857223 0.191899
Los Angeles, Solano 23680 0.111368
In [16]:
# Define urban/rural classification
def classify_urban_rural(county):
if isinstance(county, str): # Handle multi-county cases
counties = county.split(',')
county = counties[0].strip()
urban_counties = ['Los Angeles', 'San Francisco', 'Alameda', 'San Diego', 'Orange', 'Santa Clara']
central_valley = ['Fresno', 'Kern', 'Kings', 'Madera', 'Merced', 'San Joaquin', 'Stanislaus', 'Tulare']
if county in urban_counties:
return 'Urban'
elif county in central_valley:
return 'Central Valley'
else:
return 'Rural'
# Add classification to data
data_filtered['region_type'] = data_filtered['County'].map(classify_urban_rural)
# Analyze by region type
region_analysis = data_filtered.groupby('region_type').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': ['sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean'
}).round(3)
# Calculate GHG efficiency by region
ghg_efficiency = (region_analysis[('Total Project GHGReductions', 'sum')] /
region_analysis[('Total Program GGRFFunding', 'sum')])
print("Urban/Rural Analysis Summary:")
summary_df = pd.DataFrame({
'Project Count': region_analysis[('Total Program GGRFFunding', 'count')],
'Total Funding (B)': region_analysis[('Total Program GGRFFunding', 'sum')]/1e9,
'Avg Project Size (M)': region_analysis[('Total Program GGRFFunding', 'mean')]/1e6,
'GHG Efficiency': ghg_efficiency,
'DAC Rate': region_analysis[('Is Benefit Disadvantaged Communities', 'mean')]
})
print(summary_df.round(3))
# Visualize key metrics
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
# 1. Total Funding by Region
ax1.bar(summary_df.index, summary_df['Total Funding (B)'], color='skyblue')
ax1.set_title('Total Funding by Region (Billions $)')
ax1.set_ylabel('Funding (Billions $)')
# 2. Average Project Size
ax2.bar(summary_df.index, summary_df['Avg Project Size (M)'], color='skyblue')
ax2.set_title('Average Project Size by Region (Millions $)')
ax2.set_ylabel('Average Size (Millions $)')
# 3. GHG Efficiency
ax3.bar(summary_df.index, summary_df['GHG Efficiency'], color='skyblue')
ax3.set_title('GHG Reduction Efficiency by Region')
ax3.set_ylabel('GHG Reduction per Dollar')
# 4. DAC Benefit Rate
ax4.bar(summary_df.index, summary_df['DAC Rate'], color='skyblue')
ax4.set_title('DAC Benefit Rate by Region')
ax4.set_ylabel('Proportion Benefiting DACs')
plt.tight_layout()
plt.show()
# Look at program distribution by region
print("\nTop Programs by Region (Funding in Millions $):")
program_by_region = pd.crosstab(
data_filtered['Program Name'],
data_filtered['region_type'],
values=data_filtered['Total Program GGRFFunding'],
aggfunc='sum'
)/1e6
print(program_by_region.sort_values('Urban', ascending=False).head(10).round(2))
Urban/Rural Analysis Summary:
Project Count Total Funding (B) Avg Project Size (M) \
region_type
Central Valley 14496 1.388 0.096
Rural 8474 2.183 0.258
Urban 8181 2.589 0.316
GHG Efficiency DAC Rate
region_type
Central Valley 0.018 0.181
Rural 0.011 0.233
Urban 0.007 0.462
Top Programs by Region (Funding in Millions $): region_type Central Valley Rural \ Program Name Transit and Intercity Rail Capital Program 18.28 86.89 Affordable Housing and Sustainable Communities ... 205.61 321.80 Low Carbon Transit Operations Program 52.88 178.45 Community Air Protection 144.75 145.80 Transformative Climate Communities 69.17 62.67 Urban Greening Program 18.91 58.50 Fire Prevention Program 51.09 493.55 Waste Diversion 22.71 50.54 Low-Income Weatherization Program 62.87 57.02 Urban and Community Forestry Program 7.55 13.80 region_type Urban Program Name Transit and Intercity Rail Capital Program 666.38 Affordable Housing and Sustainable Communities ... 664.79 Low Carbon Transit Operations Program 544.58 Community Air Protection 238.97 Transformative Climate Communities 70.89 Urban Greening Program 61.36 Fire Prevention Program 51.63 Waste Diversion 45.32 Low-Income Weatherization Program 39.08 Urban and Community Forestry Program 37.84
- GHG Efficiency Gap
- Central Valley is ~3x more efficient than Urban areas (0.0175 vs 0.0065 GHG reduction per dollar)
- Rural areas also outperform Urban (0.010 vs 0.0065)
- Suggests climate dollars go further in non-urban areas
- Inverse DAC Pattern
- Urban areas have highest DAC benefit rate (~0.45)
- Rural areas at ~0.23 DAC rate
- Central Valley lowest at ~0.18
- Investment Distribution
- Urban areas get most funding ($2.5B)
- Rural second ($2.2B)
- Central Valley least ($1.4B)
- Average project sizes follow same pattern
In [17]:
# Calculate efficiency ratios and examine program types
print("Program Types and Efficiency by Region:")
program_efficiency = data_filtered.groupby(['Program Name', 'region_type']).agg({
'Total Program GGRFFunding': 'sum',
'Total Project GHGReductions': 'sum'
}).reset_index()
program_efficiency['efficiency'] = (program_efficiency['Total Project GHGReductions'] /
program_efficiency['Total Program GGRFFunding'])
# Show top efficient programs by region
for region in ['Urban', 'Rural', 'Central Valley']:
print(f"\n{region} Most Efficient Programs:")
region_data = program_efficiency[program_efficiency['region_type'] == region]
print(region_data.nlargest(5, 'efficiency')[
['Program Name', 'Total Program GGRFFunding', 'efficiency']
].round(4))
Program Types and Efficiency by Region:
Urban Most Efficient Programs:
Program Name \
100 Woodsmoke Reduction Program
29 Fluorinated Gases Emission Reduction Incentives
61 Renewable Energy for Agriculture Program
89 Waste Diversion
32 Food Production Investment Program
Total Program GGRFFunding efficiency
100 808 0.1027
29 530239 0.0380
61 42551 0.0290
89 45316021 0.0203
32 13221462 0.0129
Rural Most Efficient Programs:
Program Name \
69 Sustainable Agricultural Lands Conservation Pr...
49 Low-Carbon Fuels Production Program
28 Fluorinated Gases Emission Reduction Incentives
31 Food Production Investment Program
37 Forest Health Program
Total Program GGRFFunding efficiency
69 108577417 0.1273
49 5000000 0.0905
28 469762 0.0352
31 32710900 0.0298
37 163034942 0.0285
Central Valley Most Efficient Programs:
Program Name \
68 Sustainable Agricultural Lands Conservation Pr...
14 Climate Smart Agriculture
30 Food Production Investment Program
87 Waste Diversion
59 Renewable Energy for Agriculture Program
Total Program GGRFFunding efficiency
68 9744397 0.1253
14 274120208 0.0751
30 71859116 0.0255
87 22712521 0.0252
59 3761222 0.0180
This breakdown is really revealing about what works where:
- Urban Efficiency Leaders
- Woodsmoke Reduction (0.1027)
- Fluorinated Gases (0.0380)
- Renewable Energy for Agriculture (0.0290)
- But notice: Top performers have relatively small funding amounts (except Waste Diversion at $45M)
- Rural Efficiency Champions
- Sustainable Agricultural Lands (0.1273, $108M)
- Low-Carbon Fuels (0.0905)
- Larger programs achieving high efficiency
- Strong agriculture and land-use focus
- Central Valley Success Stories
- Sustainable Agricultural Lands (0.1253)
- Climate Smart Agriculture (0.0751, $274M)
- High efficiency with substantial funding amounts
Key Patterns:
- Agricultural and land-use programs dominate efficiency in rural/CV
- Urban areas achieve efficiency through smaller, targeted programs
- Central Valley shows ability to maintain efficiency at scale
- Similar program types (e.g., Food Production) perform differently by region
Would you like to:
- Analyze the characteristics of the most efficient programs in each region?
- Look at how program size relates to efficiency within each region?
- Examine whether these patterns change over time?
- Investigate if multi-county collaborations affect these regional differences?
In [18]:
# Analyze multi-county projects by region type
data_filtered['is_multi_county'] = data_filtered['County'].str.contains(',', na=False)
regional_collab = data_filtered.groupby(['region_type', 'is_multi_county']).agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': 'sum',
'Is Benefit Disadvantaged Communities': 'mean'
}).round(3)
# Calculate efficiency
regional_collab['ghg_per_dollar'] = (
regional_collab[('Total Project GHGReductions', 'sum')] /
regional_collab[('Total Program GGRFFunding', 'sum')]
)
print("Multi-County vs Single-County by Region:")
print(regional_collab)
Multi-County vs Single-County by Region:
Total Program GGRFFunding \
count sum
region_type is_multi_county
Central Valley False 14312 1314145988
True 184 73786209
Rural False 8194 2060718991
True 280 122681272
Urban False 7893 2264345480
True 288 324607138
Total Project GHGReductions \
mean sum
region_type is_multi_county
Central Valley False 91821.268 24949448
True 401012.005 371193
Rural False 251491.212 22645122
True 438147.400 906509
Urban False 286880.208 12069861
True 1127108.118 4945575
Is Benefit Disadvantaged Communities \
mean
region_type is_multi_county
Central Valley False 0.182
True 0.103
Rural False 0.238
True 0.086
Urban False 0.474
True 0.135
ghg_per_dollar
region_type is_multi_county
Central Valley False 0.018985
True 0.005031
Rural False 0.010989
True 0.007389
Urban False 0.005330
True 0.015236
This intersection of multi-county and regional patterns reveals:
- Project Size Patterns
- Average project size is consistently larger in multi-county projects across all regions:
- Urban: $1.13M vs $287K
- Rural: $438K vs $251K
- Central Valley: $401K vs $92K
- GHG Efficiency Varies By Region
- Single-county efficiency:
- Central Valley leads (0.019)
- Rural second (0.011)
- Urban lowest (0.005)
- Multi-county efficiency shows different pattern:
- Urban leads (0.015)
- Rural and Central Valley lower (0.007 and 0.005)
- DAC Benefits Trade-off
- Single-county DAC rates:
- Urban highest (47.4%)
- Rural (23.8%)
- Central Valley (18.2%)
- Multi-county shows lower DAC rates across all regions:
- Urban (13.5%)
- Central Valley (10.3%)
- Rural lowest (8.6%)
Key Findings:
- Multi-county collaborations most successful in urban areas for GHG efficiency
- Single-county projects better at reaching DACs across all regions
- Central Valley efficiency advantage disappears in multi-county projects
- Urban areas seem to benefit most from regional coordination
This suggests different collaboration strategies might be needed for different regions.
In [19]:
gdf.columns
Out[19]:
Index(['Tract', 'ZIP', 'County', 'ApproxLoc', 'TotPop19', 'CIscore',
'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5', 'PM2_5_P', 'DieselPM',
'DieselPM_P', 'Pesticide', 'PesticideP', 'Tox_Rel', 'Tox_Rel_P',
'Traffic', 'TrafficP', 'DrinkWat', 'DrinkWatP', 'Lead', 'Lead_P',
'Cleanup', 'CleanupP', 'GWThreat', 'GWThreatP', 'HazWaste', 'HazWasteP',
'ImpWatBod', 'ImpWatBodP', 'SolWaste', 'SolWasteP', 'PollBurd',
'PolBurdSc', 'PolBurdP', 'Asthma', 'AsthmaP', 'LowBirtWt', 'LowBirWP',
'Cardiovas', 'CardiovasP', 'Educatn', 'EducatP', 'Ling_Isol',
'Ling_IsolP', 'Poverty', 'PovertyP', 'Unempl', 'UnemplP', 'HousBurd',
'HousBurdP', 'PopChar', 'PopCharSc', 'PopCharP', 'Child_10',
'Pop_10_64', 'Elderly65', 'Hispanic', 'White', 'AfricanAm', 'NativeAm',
'OtherMult', 'Shape_Leng', 'Shape_Area', 'AAPI', 'geometry'],
dtype='object')
In [22]:
# Clean census tract IDs in both dataframes
def standardize_tract(tract):
if pd.isna(tract):
return None
if isinstance(tract, (int, float)):
return str(int(tract))
return str(tract).strip()
# Check the data first
print("\nCES Tract Examples:")
print(gdf['Tract'].head())
print("\nCCI Tract Examples:")
print(data_filtered['Census Tract'].head())
# Clean and standardize
gdf['Tract_clean'] = gdf['Tract'].apply(standardize_tract)
data_filtered['Tract_clean'] = data_filtered['Census Tract'].apply(standardize_tract)
# Check for missing tracts
print("\nMissing Tract Counts:")
print("CES missing tracts:", gdf['Tract_clean'].isna().sum())
print("CCI missing tracts:", data_filtered['Tract_clean'].isna().sum())
# Merge datasets, excluding null tracts
merged_data = pd.merge(
data_filtered[data_filtered['Tract_clean'].notna()],
gdf[['Tract_clean', 'CIscore', 'CIscoreP', 'PollBurd', 'Poverty', 'TotPop19']],
on='Tract_clean',
how='inner'
)
print("\nMerged Data Summary:")
print("Total rows:", len(merged_data))
print("Unique tracts:", merged_data['Tract_clean'].nunique())
# Basic analysis of funding by CES score
print("\nFunding Distribution by CalEnviroScreen Score Quintiles:")
merged_data['CES_quintile'] = pd.qcut(merged_data['CIscore'], q=5,
labels=['Lowest Burden', 'Low', 'Medium', 'High', 'Highest Burden'])
ces_analysis = merged_data.groupby('CES_quintile').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': 'sum',
'Is Benefit Disadvantaged Communities': 'mean'
}).round(2)
print(ces_analysis)
CES Tract Examples:
0 6083002103
1 6083002402
2 6083002102
3 6083002010
4 6083002009
Name: Tract, dtype: object
CCI Tract Examples:
16067 NaN
25098 6.001402e+09
25099 6.001402e+09
25100 6.001406e+09
25101 6.001406e+09
Name: Census Tract, dtype: float64
Missing Tract Counts:
CES missing tracts: 0
CCI missing tracts: 17967
Merged Data Summary:
Total rows: 13184
Unique tracts: 3665
Funding Distribution by CalEnviroScreen Score Quintiles:
Total Program GGRFFunding \
count sum mean
CES_quintile
Lowest Burden 2638 39278353 14889.44
Low 2642 39059436 14784.04
Medium 2636 61809955 23448.39
High 2643 50317266 19037.94
Highest Burden 2625 37787591 14395.27
Total Project GHGReductions \
sum
CES_quintile
Lowest Burden 2270359
Low 202457
Medium 265818
High 210633
Highest Burden 159776
Is Benefit Disadvantaged Communities
mean
CES_quintile
Lowest Burden 0.04
Low 0.55
Medium 0.84
High 0.87
Highest Burden 0.72
/tmp/ipykernel_30782/1664997265.py:41: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
ces_analysis = merged_data.groupby('CES_quintile').agg({
In [23]:
import matplotlib.pyplot as plt
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
# Average Funding by CES Score
merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].mean().plot(
kind='bar', ax=ax1, color='skyblue')
ax1.set_title('Average Funding by Environmental Burden')
ax1.set_ylabel('Average Funding ($)')
# GHG Reductions
merged_data.groupby('CES_quintile')['Total Project GHGReductions'].sum().plot(
kind='bar', ax=ax2, color='skyblue')
ax2.set_title('Total GHG Reductions by Environmental Burden')
ax2.set_ylabel('GHG Reductions')
# DAC Benefit Rate
merged_data.groupby('CES_quintile')['Is Benefit Disadvantaged Communities'].mean().plot(
kind='bar', ax=ax3, color='skyblue')
ax3.set_title('DAC Benefit Rate by Environmental Burden')
ax3.set_ylabel('Proportion Benefiting DACs')
# Project Counts
merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].count().plot(
kind='bar', ax=ax4, color='skyblue')
ax4.set_title('Number of Projects by Environmental Burden')
ax4.set_ylabel('Number of Projects')
plt.tight_layout()
plt.show()
# Also examine program types by CES quintile
print("\nProgram Types by Environmental Burden:")
program_dist = pd.crosstab(
merged_data['CES_quintile'],
merged_data['Program Name'],
values=merged_data['Total Program GGRFFunding'],
aggfunc='sum'
)/1e6 # Convert to millions
print(program_dist.round(2))
/tmp/ipykernel_30782/541290860.py:6: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].mean().plot(
/tmp/ipykernel_30782/541290860.py:12: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
merged_data.groupby('CES_quintile')['Total Project GHGReductions'].sum().plot(
/tmp/ipykernel_30782/541290860.py:18: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
merged_data.groupby('CES_quintile')['Is Benefit Disadvantaged Communities'].mean().plot(
/tmp/ipykernel_30782/541290860.py:24: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].count().plot(
Program Types by Environmental Burden: Program Name Low-Income Weatherization Program \ CES_quintile Lowest Burden 3.69 Low 28.77 Medium 50.45 High 45.19 Highest Burden 30.88 Program Name Sustainable Agricultural Lands Conservation Program \ CES_quintile Lowest Burden 22.05 Low 0.00 Medium 0.00 High 0.00 Highest Burden 0.00 Program Name Transformative Climate Communities Waste Diversion \ CES_quintile Lowest Burden 0.00 0.34 Low 0.48 0.23 Medium 0.94 0.15 High 0.33 0.12 Highest Burden 3.14 0.14 Program Name Water-Energy Efficiency Woodsmoke Reduction Program \ CES_quintile Lowest Burden 7.13 6.00 Low 8.02 1.50 Medium 9.63 0.36 High 4.31 0.01 Highest Burden 3.38 0.03 Program Name Workforce Training and Development CES_quintile Lowest Burden 0.07 Low 0.07 Medium 0.29 High 0.36 Highest Burden 0.21
In [24]:
# Create meaningful CES score categories
def categorize_ces(score):
if score >= 75: # Top 25% - most burdened
return 'Highest Burden (75-100%)'
elif score >= 50:
return 'High Burden (50-75%)'
elif score >= 25:
return 'Moderate Burden (25-50%)'
else: # Bottom 25% - least burdened
return 'Low Burden (0-25%)'
# Add new categorization
merged_data['CES_category'] = merged_data['CIscoreP'].apply(categorize_ces)
# Analyze distribution with new categories
ces_analysis_new = merged_data.groupby('CES_category').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': 'sum',
'Is Benefit Disadvantaged Communities': 'mean'
}).round(2)
print("Distribution using CES Percentile Thresholds:")
print(ces_analysis_new)
# Visualize with new categories
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
# Average Funding
merged_data.groupby('CES_category')['Total Program GGRFFunding'].mean().plot(
kind='bar', ax=ax1, color='skyblue')
ax1.set_title('Average Funding by Environmental Burden')
ax1.set_ylabel('Average Funding ($)')
# GHG Reductions
merged_data.groupby('CES_category')['Total Project GHGReductions'].sum().plot(
kind='bar', ax=ax2, color='skyblue')
ax2.set_title('Total GHG Reductions by Environmental Burden')
ax2.set_ylabel('GHG Reductions')
# DAC Benefit Rate
merged_data.groupby('CES_category')['Is Benefit Disadvantaged Communities'].mean().plot(
kind='bar', ax=ax3, color='skyblue')
ax3.set_title('DAC Benefit Rate by Environmental Burden')
ax3.set_ylabel('Proportion Benefiting DACs')
# Project Counts
merged_data.groupby('CES_category')['Total Program GGRFFunding'].count().plot(
kind='bar', ax=ax4, color='skyblue')
ax4.set_title('Number of Projects by Environmental Burden')
ax4.set_ylabel('Number of Projects')
plt.tight_layout()
plt.show()
Distribution using CES Percentile Thresholds:
Total Program GGRFFunding \
count sum mean
CES_category
High Burden (50-75%) 2932 42831569 14608.31
Highest Burden (75-100%) 7661 146493459 19121.98
Low Burden (0-25%) 1182 7475213 6324.21
Moderate Burden (25-50%) 1409 31452360 22322.47
Total Project GHGReductions \
sum
CES_category
High Burden (50-75%) 218725
Highest Burden (75-100%) 622363
Low Burden (0-25%) 163482
Moderate Burden (25-50%) 2104473
Is Benefit Disadvantaged Communities
mean
CES_category
High Burden (50-75%) 0.56
Highest Burden (75-100%) 0.81
Low Burden (0-25%) 0.01
Moderate Burden (25-50%) 0.07
In [25]:
# Define category order
category_order = [
'Low Burden (0-25%)',
'Moderate Burden (25-50%)',
'High Burden (50-75%)',
'Highest Burden (75-100%)'
]
# Create visualizations with ordered categories
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
# Average Funding
merged_data.groupby('CES_category')['Total Program GGRFFunding'].mean()[category_order].plot(
kind='bar', ax=ax1, color='skyblue')
ax1.set_title('Average Funding by Environmental Burden')
ax1.set_ylabel('Average Funding ($)')
# GHG Reductions
merged_data.groupby('CES_category')['Total Project GHGReductions'].sum()[category_order].plot(
kind='bar', ax=ax2, color='skyblue')
ax2.set_title('Total GHG Reductions by Environmental Burden')
ax2.set_ylabel('GHG Reductions')
# DAC Benefit Rate
merged_data.groupby('CES_category')['Is Benefit Disadvantaged Communities'].mean()[category_order].plot(
kind='bar', ax=ax3, color='skyblue')
ax3.set_title('DAC Benefit Rate by Environmental Burden')
ax3.set_ylabel('Proportion Benefiting DACs')
# Project Counts
merged_data.groupby('CES_category')['Total Program GGRFFunding'].count()[category_order].plot(
kind='bar', ax=ax4, color='skyblue')
ax4.set_title('Number of Projects by Environmental Burden')
ax4.set_ylabel('Number of Projects')
# Rotate x-labels for better readability
for ax in [ax1, ax2, ax3, ax4]:
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
plt.show()
In [26]:
# Agency Performance Analysis
agency_performance = data_filtered.groupby('Agency Name').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': 'sum',
'Is Benefit Disadvantaged Communities': 'mean'
}).round(3)
print("Agency Performance Overview:")
print(agency_performance)
# Add efficiency metrics
agency_performance['ghg_per_dollar'] = (
agency_performance[('Total Project GHGReductions', 'sum')] /
agency_performance[('Total Program GGRFFunding', 'sum')]
)
# Look at agency patterns in multi-county projects
multi_county = data_filtered[data_filtered['County'].str.contains(',', na=False)]
multi_agency_patterns = multi_county.groupby('Agency Name').agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': 'sum',
'Is Benefit Disadvantaged Communities': 'mean'
}).round(3)
print("\nAgency Performance in Multi-County Projects:")
print(multi_agency_patterns)
# Visualize agency performance metrics
plt.figure(figsize=(15, 10))
# Plot GHG efficiency vs DAC benefit rate by agency
plt.scatter(
agency_performance['ghg_per_dollar'],
agency_performance[('Is Benefit Disadvantaged Communities', 'mean')],
s=agency_performance[('Total Program GGRFFunding', 'sum')]/1e6, # Size by total funding
alpha=0.6
)
# Add agency labels
for idx, row in agency_performance.iterrows():
plt.annotate(idx[:20] + '...' if len(idx) > 20 else idx,
(row['ghg_per_dollar'],
row[('Is Benefit Disadvantaged Communities', 'mean')]),
xytext=(5, 5), textcoords='offset points')
plt.xlabel('GHG Reduction per Dollar')
plt.ylabel('DAC Benefit Rate')
plt.title('Agency Performance: Climate Impact vs Equity\nSize = Total Funding')
plt.tight_layout()
plt.show()
Agency Performance Overview:
Total Program GGRFFunding \
count
Agency Name
California Air Resources Board 12329
California Coastal Commission 17
California Conservation Corps 766
California Department of Community Services and... 6331
California Department of Fish and Wildlife 8
California Department of Food and Agriculture 1311
California Department of Forestry and Fire Prot... 2047
California Department of Resources Recycling an... 723
California Department of Transportation 769
California Department of Water Resources 4887
California Energy Commission 119
California Environmental Protection Agency 2
California Governors Office of Emergency Services 63
California Natural Resources Agency 213
California State Coastal Conservancy 10
California State Transportation Agency 135
California State Water Resources Control Board 100
California Strategic Growth Council 1200
California Wildlife Conservation Board 17
California Workforce Development Board 30
San Francisco Bay Conservation and Development ... 74
\
sum mean
Agency Name
California Air Resources Board 796368463 64593.111
California Coastal Commission 2667413 156906.647
California Conservation Corps 47161363 61568.359
California Department of Community Services and... 158967758 25109.423
California Department of Fish and Wildlife 16171301 2021412.625
California Department of Food and Agriculture 338161549 257941.685
California Department of Forestry and Fire Prot... 967562562 472673.455
California Department of Resources Recycling an... 175129441 242226.059
California Department of Transportation 785906434 1021984.960
California Department of Water Resources 52470432 10736.737
California Energy Commission 140791478 1183121.664
California Environmental Protection Agency 2560940 1280470.000
California Governors Office of Emergency Services 31037358 492656.476
California Natural Resources Agency 145023190 680860.047
California State Coastal Conservancy 7218244 721824.400
California State Transportation Agency 771556000 5715229.630
California State Water Resources Control Board 118601405 1186014.050
California Strategic Growth Council 1557246299 1297705.249
California Wildlife Conservation Board 12574800 739694.118
California Workforce Development Board 24428357 814278.567
San Francisco Bay Conservation and Development ... 8680291 117301.230
Total Project GHGReductions \
sum
Agency Name
California Air Resources Board 509888
California Coastal Commission 0
California Conservation Corps 292164
California Department of Community Services and... 583978
California Department of Fish and Wildlife 405413
California Department of Food and Agriculture 21019827
California Department of Forestry and Fire Prot... 5176685
California Department of Resources Recycling an... 1884352
California Department of Transportation 5709880
California Department of Water Resources 373494
California Energy Commission 3553812
California Environmental Protection Agency 0
California Governors Office of Emergency Services 0
California Natural Resources Agency 53327
California State Coastal Conservancy 0
California State Transportation Agency 9076036
California State Water Resources Control Board -3620
California Strategic Growth Council 17127711
California Wildlife Conservation Board 124761
California Workforce Development Board 0
San Francisco Bay Conservation and Development ... 0
Is Benefit Disadvantaged Communities
mean
Agency Name
California Air Resources Board 0.000
California Coastal Commission 0.000
California Conservation Corps 0.000
California Department of Community Services and... 0.917
California Department of Fish and Wildlife 0.000
California Department of Food and Agriculture 0.053
California Department of Forestry and Fire Prot... 0.084
California Department of Resources Recycling an... 0.032
California Department of Transportation 0.095
California Department of Water Resources 0.441
California Energy Commission 0.000
California Environmental Protection Agency 0.000
California Governors Office of Emergency Services 0.000
California Natural Resources Agency 0.150
California State Coastal Conservancy 0.000
California State Transportation Agency 0.193
California State Water Resources Control Board 0.000
California Strategic Growth Council 0.016
California Wildlife Conservation Board 0.000
California Workforce Development Board 0.000
San Francisco Bay Conservation and Development ... 0.000
Agency Performance in Multi-County Projects:
Total Program GGRFFunding \
count
Agency Name
California Air Resources Board 25
California Conservation Corps 9
California Department of Food and Agriculture 41
California Department of Forestry and Fire Prot... 339
California Department of Resources Recycling an... 23
California Department of Transportation 158
California Energy Commission 9
California Natural Resources Agency 7
California State Transportation Agency 57
California State Water Resources Control Board 45
California Strategic Growth Council 32
California Wildlife Conservation Board 7
\
sum mean
Agency Name
California Air Resources Board 2104048 84161.920
California Conservation Corps 1248353 138705.889
California Department of Food and Agriculture 6296871 153582.220
California Department of Forestry and Fire Prot... 32351508 95432.177
California Department of Resources Recycling an... 6962204 302704.522
California Department of Transportation 162505329 1028514.741
California Energy Commission 12379666 1375518.444
California Natural Resources Agency 2879148 411306.857
California State Transportation Agency 232590000 4080526.316
California State Water Resources Control Board 46265001 1028111.133
California Strategic Growth Council 11948291 373384.094
California Wildlife Conservation Board 3544200 506314.286
Total Project GHGReductions \
sum
Agency Name
California Air Resources Board -413
California Conservation Corps 842
California Department of Food and Agriculture 42951
California Department of Forestry and Fire Prot... 185424
California Department of Resources Recycling an... 35434
California Department of Transportation 2460280
California Energy Commission 501007
California Natural Resources Agency 1191
California State Transportation Agency 2748646
California State Water Resources Control Board -3234
California Strategic Growth Council 251149
California Wildlife Conservation Board 0
Is Benefit Disadvantaged Communities
mean
Agency Name
California Air Resources Board 0.000
California Conservation Corps 0.000
California Department of Food and Agriculture 0.000
California Department of Forestry and Fire Prot... 0.168
California Department of Resources Recycling an... 0.130
California Department of Transportation 0.089
California Energy Commission 0.000
California Natural Resources Agency 0.000
California State Transportation Agency 0.105
California State Water Resources Control Board 0.000
California Strategic Growth Council 0.062
California Wildlife Conservation Board 0.000
/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/matplotlib/text.py:1465: FutureWarning: Calling float on a single element Series is deprecated and will raise a TypeError in the future. Use float(ser.iloc[0]) instead x = float(self.convert_xunits(x))
In [27]:
plt.figure(figsize=(15, 10))
# Create shorter but still meaningful agency names
agency_shortnames = {
'California Air Resources Board': 'CARB',
'California Department of Community Services and...': 'Community Services',
'California Department of Food and Agriculture': 'CDFA',
'California Department of Forestry and Fire Prot...': 'CalFire',
'California Department of Resources Recycling an...': 'CalRecycle',
'California Department of Transportation': 'Caltrans',
'California Department of Water Resources': 'Water Resources',
'California Energy Commission': 'Energy Commission',
'California State Transportation Agency': 'CalSTA',
'California Strategic Growth Council': 'SGC',
# Add other agencies as needed
}
# Plot GHG efficiency vs DAC benefit rate by agency
plt.scatter(
agency_performance['ghg_per_dollar'],
agency_performance[('Is Benefit Disadvantaged Communities', 'mean')],
s=agency_performance[('Total Program GGRFFunding', 'sum')]/1e6, # Size by total funding
alpha=0.6
)
# Add agency labels with shorter names
for idx, row in agency_performance.iterrows():
short_name = agency_shortnames.get(idx, idx[:15] + '...')
plt.annotate(short_name,
(row['ghg_per_dollar'],
row[('Is Benefit Disadvantaged Communities', 'mean')]),
xytext=(5, 5), textcoords='offset points')
plt.xlabel('GHG Reduction per Dollar')
plt.ylabel('DAC Benefit Rate')
plt.title('Agency Performance: Climate Impact vs Equity\nSize = Total Funding')
# Add legend for bubble size
legend_elements = [plt.scatter([], [], s=s, label=f'${l}B', alpha=0.6, color='blue')
for s, l in zip([100, 500, 1000], ['0.1', '0.5', '1.0'])]
plt.legend(handles=legend_elements, title='Total Funding',
bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
- Strong GHG Performers
- California Department of Food and Agriculture shows highest GHG efficiency (~0.06 reductions per dollar)
- California Energy Commission also performs well (~0.03)
- However, both show low DAC benefit rates (0-5%)
- Strong Equity Performers
- California Department of Community Services leads in DAC benefits (91.7%)
- Department of Water Resources shows good equity performance (44.1%)
- But these have lower GHG efficiency
- Large Scale Implementers (shown by bubble size)
- Strategic Growth Council ($1.56B)
- CalSTA ($772M)
- CARB ($796M)
- Department of Forestry and Fire Protection ($968M)
- Multi-County Implementation
- Transportation agencies lead in multi-county projects (CalSTA: 57 projects, $233M)
- CalFire shows significant multi-county work (339 projects)
- Strategic Growth Council's multi-county projects show lower DAC rates than their overall portfolio
In [28]:
# Analyze agency performance across CES score categories
merged_agency_ces = merged_data.groupby(['Agency Name', 'CES_category']).agg({
'Total Program GGRFFunding': ['count', 'sum', 'mean'],
'Total Project GHGReductions': 'sum',
'Is Benefit Disadvantaged Communities': 'mean'
}).round(3)
# Calculate percentage of agency funding going to each CES category
agency_funding = pd.pivot_table(
merged_data,
values='Total Program GGRFFunding',
index='Agency Name',
columns='CES_category',
aggfunc='sum'
).fillna(0)
# Calculate percentages
agency_funding_pct = agency_funding.div(agency_funding.sum(axis=1), axis=0) * 100
print("Percentage of Agency Funding by Environmental Burden Category:")
print(agency_funding_pct.round(2))
# Visualize distribution patterns
plt.figure(figsize=(15, 8))
agency_funding_pct.plot(kind='barh', stacked=True)
plt.title('Distribution of Agency Funding Across Environmental Burden Categories')
plt.xlabel('Percentage of Agency Funding')
plt.tight_layout()
plt.show()
# Look at average project size by burden category for each agency
avg_project_size = merged_data.groupby(['Agency Name', 'CES_category'])['Total Program GGRFFunding'].mean()/1e6
avg_project_size = avg_project_size.unstack()
print("\nAverage Project Size (Millions $) by Environmental Burden Category:")
print(avg_project_size.round(2))
Percentage of Agency Funding by Environmental Burden Category: CES_category High Burden (50-75%) \ Agency Name California Air Resources Board 20.81 California Department of Community Services and... 19.77 California Department of Resources Recycling an... 26.02 California Department of Water Resources 27.36 California Energy Commission 14.29 California Strategic Growth Council 1.78 CES_category Highest Burden (75-100%) \ Agency Name California Air Resources Board 4.92 California Department of Community Services and... 77.98 California Department of Resources Recycling an... 41.17 California Department of Water Resources 50.91 California Energy Commission 78.57 California Strategic Growth Council 16.38 CES_category Low Burden (0-25%) \ Agency Name California Air Resources Board 36.65 California Department of Community Services and... 0.29 California Department of Resources Recycling an... 21.83 California Department of Water Resources 9.21 California Energy Commission 7.14 California Strategic Growth Council 3.15 CES_category Moderate Burden (25-50%) Agency Name California Air Resources Board 37.63 California Department of Community Services and... 1.96 California Department of Resources Recycling an... 10.98 California Department of Water Resources 12.51 California Energy Commission 0.00 California Strategic Growth Council 78.69
<Figure size 1500x800 with 0 Axes>
Average Project Size (Millions $) by Environmental Burden Category: CES_category High Burden (50-75%) \ Agency Name California Air Resources Board 0.01 California Department of Community Services and... 0.03 California Department of Resources Recycling an... 0.01 California Department of Water Resources 0.01 California Energy Commission 0.07 California Strategic Growth Council 0.00 CES_category Highest Burden (75-100%) \ Agency Name California Air Resources Board 0.01 California Department of Community Services and... 0.02 California Department of Resources Recycling an... 0.01 California Department of Water Resources 0.01 California Energy Commission 0.07 California Strategic Growth Council 0.01 CES_category Low Burden (0-25%) \ Agency Name California Air Resources Board 0.01 California Department of Community Services and... 0.07 California Department of Resources Recycling an... 0.01 California Department of Water Resources 0.00 California Energy Commission 0.07 California Strategic Growth Council 0.85 CES_category Moderate Burden (25-50%) Agency Name California Air Resources Board 0.01 California Department of Community Services and... 0.03 California Department of Resources Recycling an... 0.01 California Department of Water Resources 0.00 California Energy Commission NaN California Strategic Growth Council 1.93
This data reveals fascinating patterns in how different agencies serve communities with varying environmental burdens:
- Strong Environmental Justice Focus
- Community Services: 78% of funding to highest burden areas
- Energy Commission: 79% to highest burden areas
- Water Resources: 51% to highest burden areas
- CalRecycle: 41% to highest burden areas
- More Dispersed Distribution
- CARB shows relatively even distribution:
- 37.6% Moderate burden
- 36.7% Low burden
- 20.8% High burden
- Only 4.9% to highest burden areas
- Unique Strategic Growth Council Pattern
- Heavy focus on moderate burden areas (78.7%)
- Limited focus on highest burden areas (16.4%)
- Largest average project sizes:
- $1.93M in moderate burden areas
- $0.85M in low burden areas
- Much smaller in high burden areas
- Project Size Variations
- Most agencies maintain consistent project sizes across burden categories
- Notable exceptions:
- Strategic Growth Council: Much larger projects in moderate burden areas
- Community Services: Larger projects in low burden areas
This suggests:
- Different agency missions lead to different environmental justice approaches
- Some agencies specifically target high-burden areas
- Project size might be influenced by community capacity
- Need for better coordination in highest burden areas
In [29]:
# Analyze outcomes (GHG and equity) across environmental burden categories by agency
outcome_analysis = merged_data.groupby(['Agency Name', 'CES_category']).agg({
'Total Program GGRFFunding': ['count', 'sum'],
'Total Project GHGReductions': ['sum', 'mean'],
'Is Benefit Disadvantaged Communities': 'mean'
}).round(3)
# Calculate GHG efficiency
outcome_analysis['ghg_per_dollar'] = (
outcome_analysis[('Total Project GHGReductions', 'sum')] /
outcome_analysis[('Total Program GGRFFunding', 'sum')]
)
# Create visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
# 1. Total GHG Reductions by Burden Category for each Agency
ghg_by_burden = pd.pivot_table(
merged_data,
values='Total Project GHGReductions',
index='Agency Name',
columns='CES_category',
aggfunc='sum'
)
ghg_by_burden.plot(kind='barh', ax=ax1)
ax1.set_title('Total GHG Reductions by Environmental Burden Category')
ax1.set_xlabel('GHG Reductions')
# 2. GHG Efficiency by Burden Category
efficiency_by_burden = outcome_analysis['ghg_per_dollar'].unstack()
efficiency_by_burden.plot(kind='barh', ax=ax2)
ax2.set_title('GHG Reduction Efficiency by Environmental Burden Category')
ax2.set_xlabel('GHG Reductions per Dollar')
# 3. DAC Benefit Rate by Burden Category
dac_by_burden = pd.pivot_table(
merged_data,
values='Is Benefit Disadvantaged Communities',
index='Agency Name',
columns='CES_category',
aggfunc='mean'
)
dac_by_burden.plot(kind='barh', ax=ax3)
ax3.set_title('DAC Benefit Rate by Environmental Burden Category')
ax3.set_xlabel('Proportion Benefiting DACs')
# 4. Average Project Size
size_by_burden = pd.pivot_table(
merged_data,
values='Total Program GGRFFunding',
index='Agency Name',
columns='CES_category',
aggfunc='mean'
)/1e6 # Convert to millions
size_by_burden.plot(kind='barh', ax=ax4)
ax4.set_title('Average Project Size by Environmental Burden Category')
ax4.set_xlabel('Average Project Size (Millions $)')
plt.tight_layout()
plt.show()
# Print summary statistics
print("\nOutcome Analysis by Environmental Burden Category:")
print("\nGHG Efficiency (Reductions per Dollar):")
print(efficiency_by_burden.round(4))
print("\nDAC Benefit Rate:")
print(dac_by_burden.round(4))
Outcome Analysis by Environmental Burden Category: GHG Efficiency (Reductions per Dollar): CES_category High Burden (50-75%) \ Agency Name California Air Resources Board 0.0092 California Department of Community Services and... 0.0034 California Department of Resources Recycling an... 0.0023 California Department of Water Resources 0.0106 California Energy Commission 0.0000 California Strategic Growth Council 0.0021 CES_category Highest Burden (75-100%) \ Agency Name California Air Resources Board 0.0084 California Department of Community Services and... 0.0037 California Department of Resources Recycling an... 0.0026 California Department of Water Resources 0.0088 California Energy Commission 0.0000 California Strategic Growth Council 0.0022 CES_category Low Burden (0-25%) \ Agency Name California Air Resources Board 0.0163 California Department of Community Services and... 0.0034 California Department of Resources Recycling an... 0.0013 California Department of Water Resources 0.0122 California Energy Commission 0.0000 California Strategic Growth Council 0.0918 CES_category Moderate Burden (25-50%) Agency Name California Air Resources Board 0.0114 California Department of Community Services and... 0.0038 California Department of Resources Recycling an... 0.0017 California Department of Water Resources 0.0148 California Energy Commission NaN California Strategic Growth Council 0.0943 DAC Benefit Rate: CES_category High Burden (50-75%) \ Agency Name California Air Resources Board 0.0000 California Department of Community Services and... 0.8851 California Department of Resources Recycling an... 0.0000 California Department of Water Resources 0.4012 California Energy Commission 0.0000 California Strategic Growth Council 0.0000 CES_category Highest Burden (75-100%) \ Agency Name California Air Resources Board 0.0000 California Department of Community Services and... 0.9344 California Department of Resources Recycling an... 0.0000 California Department of Water Resources 0.8456 California Energy Commission 0.0000 California Strategic Growth Council 0.0000 CES_category Low Burden (0-25%) \ Agency Name California Air Resources Board 0.0000 California Department of Community Services and... 0.1429 California Department of Resources Recycling an... 0.0000 California Department of Water Resources 0.0147 California Energy Commission 0.0000 California Strategic Growth Council 0.0000 CES_category Moderate Burden (25-50%) Agency Name California Air Resources Board 0.0000 California Department of Community Services and... 0.4835 California Department of Resources Recycling an... 0.0000 California Department of Water Resources 0.0565 California Energy Commission NaN California Strategic Growth Council 0.0000
- GHG Efficiency Paradox
- Highest efficiency in lower burden areas:
- Strategic Growth Council: 0.0918 (low burden) vs 0.0022 (highest burden)
- CARB: 0.0163 (low burden) vs 0.0084 (highest burden)
- Water Resources: 0.0122 (low burden) vs 0.0088 (highest burden)
- Suggests potential infrastructure or implementation challenges in high-burden areas
- DAC Benefit Distribution
- Strong targeting by some agencies:
- Community Services: 93% DAC benefit in highest burden areas
- Water Resources: 85% DAC benefit in highest burden areas
- Others show no DAC benefits across categories:
- CARB
- Energy Commission
- Strategic Growth Council
- Efficiency-Equity Trade-off
- Clear inverse relationship between GHG efficiency and DAC benefits
- Community Services shows high DAC benefits but lower GHG efficiency
- Strategic Growth Council achieves high GHG efficiency but low DAC benefits
- Agency Performance Patterns
- Community Services most consistent in DAC benefits across burden levels
- Water Resources shows good balance between efficiency and equity
- Strategic Growth Council most efficient but least equitable
Key Policy Implications:
- Need for targeted capacity building in high-burden areas
- Potential for program design improvements to balance outcomes
- Opportunity for cross-agency learning on successful approaches