+
+
+
+
+
+
+
+Data: postgis db
+Project: California Equity Research¶
Data: postgis db calif_equity with california climate investment and california enviroscreen data¶
Goal: Analyze the relationship between climate investment and environmental justice in California¶
This notebook: second take¶
Author: dpadams¶
Date: 2024-11-24¶
+
+
+
+
+
+
+
+In [2]:
+
+
+
+
+
+# Importing the necessary libraries
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+
+
+
+
+
+
+
+In [3]:
+
+
+
+
+
+## set directory
+import os
+os.chdir('/home/dadams/Repos/california_equity_git')
+
+
+
+
+
+
+
+In [4]:
+
+
+
+
+
+# read in the data
+data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)
+
+
+
+
+
+
+
+In [5]:
+
+
+
+
+
+data.columns
+
+
+
+
+
+
+
+
+Out[5]:
+
+
+Index(['Project IDNumber', 'Reporting Cycle Name', 'Agency Name', + 'Program Name', 'Program Description', 'Sub Program Name', + 'Record Type', 'Project Name', 'Project Type', 'Project Description', + ... + 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months', + 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE', + 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE', + 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'], + dtype='object', length=127)+
+
+
+
+
+
+
+In [6]:
+
+
+
+
+
+import geopandas as gpd
+
+# Load the shapefile
+shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'
+gdf = gpd.read_file(shapefile_path)
+
+# Print the head of the GeoDataFrame
+print(gdf.head())
+
+
+
+
+
+
+
+
+
+
+
+Tract ZIP County ApproxLoc TotPop19 CIscore \ +0 6.083002e+09 93454 Santa Barbara Santa Maria 4495 36.019653 +1 6.083002e+09 93455 Santa Barbara Santa Maria 13173 37.030667 +2 6.083002e+09 93454 Santa Barbara Santa Maria 2398 31.213140 +3 6.083002e+09 93455 Santa Barbara Orcutt 4496 6.639331 +4 6.083002e+09 93455 Santa Barbara Orcutt 4008 14.022852 + + CIscoreP Ozone OzoneP PM2_5 ... Elderly65 Hispanic \ +0 69.162885 0.034190 10.566273 7.567724 ... 12.5028 68.9210 +1 70.637922 0.035217 11.561917 7.624775 ... 5.3519 78.6229 +2 61.069087 0.034190 10.566273 7.548835 ... 12.8857 65.7214 +3 5.988401 0.036244 13.615432 7.660570 ... 14.4128 22.9537 +4 23.121533 0.036244 13.615432 7.663210 ... 18.8872 33.4082 + + White AfricanAm NativeAm OtherMult Shape_Leng Shape_Area \ +0 20.8899 0.4004 0.2670 1.3126 6999.357689 2.847611e+06 +1 13.2240 2.5051 0.0000 0.9489 19100.578232 1.635292e+07 +2 30.6088 0.9591 0.0000 2.1685 4970.985897 1.352329e+06 +3 69.1948 0.9342 0.7117 2.5356 6558.956012 2.417717e+06 +4 59.7804 0.6986 1.4721 1.3723 6570.368730 2.608422e+06 + + AAPI geometry +0 8.2091 POLYGON ((-39795.07 -341919.191, -38126.384 -3... +1 4.6990 POLYGON ((-39795.07 -341919.191, -39803.632 -3... +2 0.5421 POLYGON ((-38115.747 -341130.248, -38126.384 -... +3 3.6699 POLYGON ((-37341.662 -348530.437, -37252.307 -... +4 3.2685 POLYGON ((-39465.107 -348499.262, -38244.305 -... + +[5 rows x 67 columns] ++
+
+
+
+
+
+
+In [7]:
+
+
+
+
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Basic cleaning
+data['Date Operational'] = pd.to_datetime(data['Date Operational'])
+data = data[
+ (data['Date Operational'] >= '2010-01-01') &
+ (data['Date Operational'] <= '2024-11-01')
+].copy()
+
+# Remove rows with no GGRF funding
+data = data.dropna(subset=['Total Program GGRFFunding'])
+
+# Add derived columns
+data['Year'] = data['Date Operational'].dt.year
+data['is_multi_county'] = data['County'].str.contains(',', na=False)
+data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1
+
+# Quick validation
+print(f"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B")
+print(f"Number of projects: {len(data)}")
+
+
+
+
+
+
+
+
+
+
+
+Total GGRF Funding: $8.13B +Number of projects: 131428 ++
+
+
+
+
+
+
+In [8]:
+
+
+
+
+
+# Temporal analysis of GGRF funding
+temporal = data.groupby('Year').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean',
+ 'is_multi_county': ['count', 'mean'],
+ 'partnership_size': 'mean'
+}).round(2)
+
+# Visualize key metrics
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
+
+# Projects per year
+ax1.plot(temporal.index, temporal[('Total Program GGRFFunding', 'count')],
+ marker='o', linewidth=2)
+ax1.set_title('Number of GGRF Projects by Year')
+ax1.grid(True, alpha=0.3)
+
+# Average funding per project
+ax2.plot(temporal.index, temporal[('Total Program GGRFFunding', 'mean')]/1e6,
+ marker='o', linewidth=2)
+ax2.set_title('Average GGRF Funding per Project (Millions $)')
+ax2.grid(True, alpha=0.3)
+
+# DAC benefit rate
+ax3.plot(temporal.index, temporal[('Is Benefit Disadvantaged Communities', 'mean')],
+ marker='o', linewidth=2)
+ax3.set_title('DAC Benefit Rate')
+ax3.grid(True, alpha=0.3)
+
+# Multi-county projects
+ax4.plot(temporal.index, temporal[('partnership_size', 'mean')],
+ marker='o', linewidth=2)
+ax4.set_title('Average Number of Partner Counties')
+ax4.grid(True, alpha=0.3)
+
+plt.tight_layout()
+plt.show()
+
+print("Key metrics by period:")
+print(temporal)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Key metrics by period: + Total Program GGRFFunding \ + count sum mean +Year +2011 1 11500 11500.00 +2012 98 2328417 23759.36 +2013 3379 50726172 15012.18 +2014 7281 76042854 10444.01 +2015 6235 61034095 9788.95 +2016 8001 105685277 13209.01 +2017 12745 368260901 28894.54 +2018 18071 641426028 35494.77 +2019 21131 1131373505 53540.94 +2020 18281 1555581678 85092.81 +2021 15957 1147697486 71924.39 +2022 12906 1187728536 92029.18 +2023 6825 1114799921 163340.65 +2024 517 686268968 1327406.13 + + Is Benefit Disadvantaged Communities is_multi_county \ + mean count mean +Year +2011 1.00 1 0.00 +2012 0.29 98 0.00 +2013 0.28 3379 0.00 +2014 0.40 7281 0.00 +2015 0.48 6235 0.01 +2016 0.59 8001 0.00 +2017 0.45 12745 0.00 +2018 0.44 18071 0.00 +2019 0.35 21131 0.00 +2020 0.30 18281 0.01 +2021 0.27 15957 0.01 +2022 0.23 12906 0.01 +2023 0.00 6825 0.01 +2024 0.00 517 0.07 + + partnership_size + mean +Year +2011 1.00 +2012 1.00 +2013 1.00 +2014 1.00 +2015 1.02 +2016 1.00 +2017 1.01 +2018 1.01 +2019 1.01 +2020 1.06 +2021 1.02 +2022 1.03 +2023 1.03 +2024 1.32 ++
+
+
+
+
+
+
+In [9]:
+
+
+
+
+
+print("2024 Projects by Program:")
+print(data[data['Year'] == 2024].groupby('Program Name').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean']
+}).round(2))
+
+print("\nLargest 2024 Projects:")
+print(data[data['Year'] == 2024].nlargest(5, 'Total Program GGRFFunding')[
+ ['Program Name', 'County', 'Total Program GGRFFunding', 'Date Operational']
+])
+
+
+
+
+
+
+
+
+
+
+
+2024 Projects by Program: + Total Program GGRFFunding \ + count +Program Name +Affordable Housing and Sustainable Communities ... 8 +Climate Adaptation and Resiliency Program 1 +Community Air Protection 220 +Fire Prevention Program 12 +Fluorinated Gases Emission Reduction Incentives 15 +Food Production Investment Program 30 +Forest Carbon Plan Implementation 5 +Forest Health Program 20 +Low Carbon Transit Operations Program 18 +Low Carbon Transportation 4 +Safe and Affordable Drinking Water Fund 10 +Transformative Climate Communities 10 +Transit and Intercity Rail Capital Program 29 +Urban and Community Forestry Program 132 +Waste Diversion 1 +Wetlands and Watershed Restoration 2 + + + sum mean +Program Name +Affordable Housing and Sustainable Communities ... 176615877 22076984.62 +Climate Adaptation and Resiliency Program 299000 299000.00 +Community Air Protection 80955408 367979.13 +Fire Prevention Program 7806649 650554.08 +Fluorinated Gases Emission Reduction Incentives 1000001 66666.73 +Food Production Investment Program 70824290 2360809.67 +Forest Carbon Plan Implementation 1108131 221626.20 +Forest Health Program 57230331 2861516.55 +Low Carbon Transit Operations Program 11165512 620306.22 +Low Carbon Transportation 10744732 2686183.00 +Safe and Affordable Drinking Water Fund 10457866 1045786.60 +Transformative Climate Communities 38277301 3827730.10 +Transit and Intercity Rail Capital Program 189696000 6541241.38 +Urban and Community Forestry Program 18561997 140621.19 +Waste Diversion 3950527 3950527.00 +Wetlands and Watershed Restoration 7575346 3787673.00 + +Largest 2024 Projects: + Program Name County \ +117879 Transit and Intercity Rail Capital Program Alameda +90922 Transit and Intercity Rail Capital Program Los Angeles +136661 Affordable Housing and Sustainable Communities... Los Angeles +141400 Affordable Housing and Sustainable Communities... San Francisco +100763 Affordable Housing and Sustainable Communities... San Francisco + + Total Program GGRFFunding Date Operational +117879 107100000 2024-03-01 +90922 40000000 2024-09-23 +136661 29889806 2024-04-01 +141400 29269952 2024-08-01 +100763 25424799 2024-01-01 ++
+
+
+
+
+
+
+In [10]:
+
+
+
+
+
+# Compare 2023 vs 2024 by program
+years_comparison = data[data['Year'].isin([2023, 2024])].groupby(['Year', 'Program Name']).agg({
+ 'Total Program GGRFFunding': ['count', 'sum'],
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(2)
+
+print("2023 vs 2024 Program Comparison:")
+print(years_comparison)
+
+# Calculate percent changes in key metrics
+print("\nPercent Changes 2023-2024:")
+metrics_2023 = data[data['Year'] == 2023].agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean'
+})
+
+metrics_2024 = data[data['Year'] == 2024].agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean'
+})
+
+pct_change = ((metrics_2024 - metrics_2023) / metrics_2023 * 100).round(2)
+print(pct_change)
+
+
+
+
+
+
+
+
+
+
+
+2023 vs 2024 Program Comparison: + Total Program GGRFFunding \ + count +Year Program Name +2023 Affordable Housing and Sustainable Communities ... 20 + Climate Adaptation and Resiliency Program 6 + Climate Change Research Program 3 + Climate Ready Program 4 + Climate Resilience Planning 36 + Climate Smart Agriculture 363 + Community Air Protection 1926 + Fire Prevention Program 11 + Food Production Investment Program 20 + Forest Carbon Plan Implementation 83 + Forest Health Program 27 + Funding Agricultural Replacement Measures for E... 539 + Low Carbon Transit Operations Program 99 + Low Carbon Transportation 3187 + Low-Income Weatherization Program 26 + SB 1383 Local Assistance Grant Program 12 + Safe and Affordable Drinking Water Fund 22 + Sustainable Agricultural Lands Conservation Pro... 15 + Training and Workforce Development Program 136 + Transformative Climate Communities 142 + Transit and Intercity Rail Capital Program 14 + Urban Greening Program 42 + Waste Diversion 18 + Water-Energy Efficiency 51 + Wetlands and Watershed Restoration 1 + Woodsmoke Reduction Program 22 +2024 Affordable Housing and Sustainable Communities ... 8 + Climate Adaptation and Resiliency Program 1 + Community Air Protection 220 + Fire Prevention Program 12 + Fluorinated Gases Emission Reduction Incentives 15 + Food Production Investment Program 30 + Forest Carbon Plan Implementation 5 + Forest Health Program 20 + Low Carbon Transit Operations Program 18 + Low Carbon Transportation 4 + Safe and Affordable Drinking Water Fund 10 + Transformative Climate Communities 10 + Transit and Intercity Rail Capital Program 29 + Urban and Community Forestry Program 132 + Waste Diversion 1 + Wetlands and Watershed Restoration 2 + + \ + sum +Year Program Name +2023 Affordable Housing and Sustainable Communities ... 334283890 + Climate Adaptation and Resiliency Program 3374200 + Climate Change Research Program 396119 + Climate Ready Program 4368244 + Climate Resilience Planning 5041046 + Climate Smart Agriculture 43068601 + Community Air Protection 45727103 + Fire Prevention Program 9456431 + Food Production Investment Program 42815128 + Forest Carbon Plan Implementation 22524961 + Forest Health Program 37902075 + Funding Agricultural Replacement Measures for E... 40805691 + Low Carbon Transit Operations Program 114507056 + Low Carbon Transportation 169989768 + Low-Income Weatherization Program 4226240 + SB 1383 Local Assistance Grant Program 5969666 + Safe and Affordable Drinking Water Fund 11621901 + Sustainable Agricultural Lands Conservation Pro... 24711311 + Training and Workforce Development Program 7332437 + Transformative Climate Communities 40263894 + Transit and Intercity Rail Capital Program 118568000 + Urban Greening Program 26722800 + Waste Diversion 213843 + Water-Energy Efficiency 73165 + Wetlands and Watershed Restoration 743216 + Woodsmoke Reduction Program 93135 +2024 Affordable Housing and Sustainable Communities ... 176615877 + Climate Adaptation and Resiliency Program 299000 + Community Air Protection 80955408 + Fire Prevention Program 7806649 + Fluorinated Gases Emission Reduction Incentives 1000001 + Food Production Investment Program 70824290 + Forest Carbon Plan Implementation 1108131 + Forest Health Program 57230331 + Low Carbon Transit Operations Program 11165512 + Low Carbon Transportation 10744732 + Safe and Affordable Drinking Water Fund 10457866 + Transformative Climate Communities 38277301 + Transit and Intercity Rail Capital Program 189696000 + Urban and Community Forestry Program 18561997 + Waste Diversion 3950527 + Wetlands and Watershed Restoration 7575346 + + Is Benefit Disadvantaged Communities + mean +Year Program Name +2023 Affordable Housing and Sustainable Communities ... 0.00 + Climate Adaptation and Resiliency Program 0.00 + Climate Change Research Program 0.00 + Climate Ready Program 0.00 + Climate Resilience Planning 0.00 + Climate Smart Agriculture 0.00 + Community Air Protection 0.00 + Fire Prevention Program 0.00 + Food Production Investment Program 0.00 + Forest Carbon Plan Implementation 0.00 + Forest Health Program 0.00 + Funding Agricultural Replacement Measures for E... 0.00 + Low Carbon Transit Operations Program 0.00 + Low Carbon Transportation 0.00 + Low-Income Weatherization Program 0.00 + SB 1383 Local Assistance Grant Program 0.00 + Safe and Affordable Drinking Water Fund 0.00 + Sustainable Agricultural Lands Conservation Pro... 0.00 + Training and Workforce Development Program 0.00 + Transformative Climate Communities 0.00 + Transit and Intercity Rail Capital Program 0.36 + Urban Greening Program 0.00 + Waste Diversion 0.00 + Water-Energy Efficiency 0.06 + Wetlands and Watershed Restoration 0.00 + Woodsmoke Reduction Program 0.00 +2024 Affordable Housing and Sustainable Communities ... 0.00 + Climate Adaptation and Resiliency Program 0.00 + Community Air Protection 0.00 + Fire Prevention Program 0.00 + Fluorinated Gases Emission Reduction Incentives 0.00 + Food Production Investment Program 0.00 + Forest Carbon Plan Implementation 0.00 + Forest Health Program 0.00 + Low Carbon Transit Operations Program 0.00 + Low Carbon Transportation 0.00 + Safe and Affordable Drinking Water Fund 0.00 + Transformative Climate Communities 0.00 + Transit and Intercity Rail Capital Program 0.00 + Urban and Community Forestry Program 0.00 + Waste Diversion 0.00 + Wetlands and Watershed Restoration 0.00 + +Percent Changes 2023-2024: + Total Program GGRFFunding Is Benefit Disadvantaged Communities +count -92.42 NaN +sum -38.44 NaN +mean 712.66 -100.0 ++
+
+
+
+
+
+
+In [11]:
+
+
+
+
+
+# Filter out Low Carbon Transportation
+data_filtered = data[data['Program Name'] != 'Low Carbon Transportation'].copy()
+
+# Recalculate temporal analysis
+temporal_filtered = data_filtered.groupby('Year').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean',
+ 'is_multi_county': ['count', 'mean'],
+ 'partnership_size': 'mean'
+}).round(2)
+
+# Compare 2023-2024 without LCT
+years_comparison = data_filtered[data_filtered['Year'].isin([2023, 2024])].groupby(['Year', 'Program Name']).agg({
+ 'Total Program GGRFFunding': ['count', 'sum'],
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(2)
+
+print("2023-2024 Changes (excluding Low Carbon Transportation):")
+changes = pd.DataFrame({
+ '2023': temporal_filtered.loc[2023],
+ '2024': temporal_filtered.loc[2024]
+})
+print(changes)
+
+
+
+
+
+
+
+
+
+
+
+2023-2024 Changes (excluding Low Carbon Transportation): + 2023 2024 +Total Program GGRFFunding count 3.638000e+03 5.130000e+02 + sum 9.448102e+08 6.755242e+08 + mean 2.597059e+05 1.316811e+06 +Is Benefit Disadvantaged Communities mean 0.000000e+00 0.000000e+00 +is_multi_county count 3.638000e+03 5.130000e+02 + mean 2.000000e-02 7.000000e-02 +partnership_size mean 1.050000e+00 1.320000e+00 ++
+
+
+
+
+
+
+
+In [12]:
+
+
+
+
+
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
+
+# Projects count
+ax1.plot(temporal_filtered.index,
+ temporal_filtered[('Total Program GGRFFunding', 'count')],
+ marker='o', linewidth=2)
+ax1.set_title('Number of Projects (Excluding LCT)')
+ax1.grid(True, alpha=0.3)
+
+# Average funding
+ax2.plot(temporal_filtered.index,
+ temporal_filtered[('Total Program GGRFFunding', 'mean')]/1e6,
+ marker='o', linewidth=2)
+ax2.set_title('Average Project Funding (Millions $)')
+ax2.grid(True, alpha=0.3)
+
+# Total funding
+ax3.plot(temporal_filtered.index,
+ temporal_filtered[('Total Program GGRFFunding', 'sum')]/1e9,
+ marker='o', linewidth=2)
+ax3.set_title('Total GGRF Funding (Billions $)')
+ax3.grid(True, alpha=0.3)
+
+# Partnership size
+ax4.plot(temporal_filtered.index,
+ temporal_filtered[('partnership_size', 'mean')],
+ marker='o', linewidth=2)
+ax4.set_title('Average Number of Partner Counties')
+ax4.grid(True, alpha=0.3)
+
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-
+
- Project Volume Evolution +
-
+
- Growth phase: 2014-2019 (from ~0 to 5000+ projects) +
- Plateau: 2019-2022 (~4000-5000 projects) +
- Sharp decline: 2023-2024 (down to ~500 projects) +
-
+
- Average Project Size +
-
+
- Relatively stable 2014-2023 ($0.1-0.3M per project) +
- Dramatic increase in 2024 (to ~$1.3M per project) +
- Suggests shift to fewer but larger projects +
-
+
- Total GGRF Funding +
-
+
- Steady increase: 2014-2020 (reaching ~$1.1B) +
- Recent decline: 2020-2024 (down to ~$0.67B) +
- More stable pattern than project counts +
-
+
- Partnership Trends +
-
+
- Generally stable at 1.0-1.1 partners until 2020 +
- Spike in 2020 (~1.24 partners) +
- New peak in 2024 (~1.32 partners) +
- Suggests increasing regional collaboration +
+
+
+
+
+
+
+
+In [13]:
+
+
+
+
+
+# 1. Program Scale Analysis
+program_scale = data_filtered.groupby('Program Name').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean',
+ 'Total Project GHGReductions': 'sum'
+}).round(2)
+
+# Flatten column names
+program_scale.columns = ['project_count', 'total_funding', 'avg_funding',
+ 'dac_rate', 'total_ghg']
+
+# Calculate GHG efficiency
+program_scale['ghg_per_dollar'] = program_scale['total_ghg'] / program_scale['total_funding']
+
+# Categorize programs by size
+def categorize_program_size(mean_funding):
+ if mean_funding > 10e6: # 10M
+ return 'Mega'
+ elif mean_funding > 1e6: # 1M
+ return 'Large'
+ elif mean_funding > 500e3: # 500K
+ return 'Medium'
+ else:
+ return 'Small'
+
+program_scale['size_category'] = program_scale['avg_funding'].apply(categorize_program_size)
+
+print("Program Scale Distribution:")
+print(program_scale['size_category'].value_counts())
+
+# 2. Geographic Analysis
+geographic_dist = data_filtered.groupby('County').agg({
+ 'Total Program GGRFFunding': ['count', 'sum'],
+ 'Is Benefit Disadvantaged Communities': 'mean',
+ 'Total Project GHGReductions': 'sum'
+})
+
+geographic_dist.columns = ['project_count', 'total_funding', 'dac_rate', 'total_ghg']
+
+# Calculate concentration metrics
+total_funding = geographic_dist['total_funding'].sum()
+top_5_counties = geographic_dist['total_funding'].nlargest(5)
+concentration = (top_5_counties.sum() / total_funding) * 100
+
+print("\nGeographic Concentration:")
+print(f"Top 5 counties account for {concentration:.1f}% of funding")
+
+# 3. Print key findings
+print("\nProgram Categories by Total Funding (Billions $):")
+size_summary = program_scale.groupby('size_category')['total_funding'].sum().sort_values(ascending=False)/1e9
+print(size_summary.round(2))
+
+# Show largest programs
+print("\nLargest Programs (by total funding):")
+print(program_scale.nlargest(5, 'total_funding')[
+ ['total_funding', 'project_count', 'avg_funding', 'dac_rate', 'ghg_per_dollar']
+].round(2))
+
+
+
+
+
+
+
+
+
+
+
+Program Scale Distribution: +size_category +Small 21 +Large 11 +Medium 5 +Mega 1 +Name: count, dtype: int64 + +Geographic Concentration: +Top 5 counties account for 39.3% of funding + +Program Categories by Total Funding (Billions $): +size_category +Large 2.19 +Small 2.00 +Mega 1.19 +Medium 0.78 +Name: total_funding, dtype: float64 + +Largest Programs (by total funding): + total_funding \ +Program Name +Affordable Housing and Sustainable Communities ... 1192203125 +Low Carbon Transit Operations Program 775906434 +Transit and Intercity Rail Capital Program 771556000 +Fire Prevention Program 596274123 +Community Air Protection 529523228 + + project_count \ +Program Name +Affordable Housing and Sustainable Communities ... 93 +Low Carbon Transit Operations Program 766 +Transit and Intercity Rail Capital Program 135 +Fire Prevention Program 600 +Community Air Protection 5187 + + avg_funding dac_rate \ +Program Name +Affordable Housing and Sustainable Communities ... 12819388.44 0.16 +Low Carbon Transit Operations Program 1012932.68 0.09 +Transit and Intercity Rail Capital Program 5715229.63 0.19 +Fire Prevention Program 993790.20 0.00 +Community Air Protection 102086.61 0.00 + + ghg_per_dollar +Program Name +Affordable Housing and Sustainable Communities ... 0.00 +Low Carbon Transit Operations Program 0.01 +Transit and Intercity Rail Capital Program 0.01 +Fire Prevention Program 0.00 +Community Air Protection 0.00 ++
+
+
+
+
+
+
+
+
+-
+
- Program Scale Distribution +
-
+
- Most programs (21) are "Small" scale +
- 11 "Large" programs +
- Only 1 "Mega" program (Affordable Housing at $1.19B) +
- More balanced distribution than when including transportation subsidies +
-
+
- Funding Allocation +
-
+
- Large programs: $2.19B total +
- Small programs: $2.00B total +
- Mega programs: $1.19B total +
- Medium programs: $0.78B total +
- Total GGRF funding: ~$6.16B +
-
+
- Top Programs by Funding +
-
+
- Affordable Housing: $1.19B (93 projects) +
- Low Carbon Transit: $776M (766 projects) +
- Transit/Rail Capital: $772M (135 projects) +
- Fire Prevention: $596M (600 projects) +
- Community Air Protection: $530M (5,187 projects) +
-
+
- Program Characteristics +
-
+
- Wide range in project counts (93 to 5,187) +
- Average project sizes vary significantly:
-
+
- Affordable Housing: $12.8M/project +
- Transit/Rail: $5.7M/project +
- Community Air Protection: $102K/project +
+
-
+
- Geographic Distribution +
-
+
- Less concentrated than before +
- Top 5 counties: 39.3% of funding (vs. previous 75.6%) +
- Suggests more equitable geographic distribution +
+
+
+
+
+
+
+
+In [14]:
+
+
+
+
+
+# 1. DAC Benefits by Program Size
+dac_by_size = program_scale.groupby('size_category').agg({
+ 'dac_rate': ['mean', 'min', 'max'],
+ 'total_funding': 'sum',
+ 'project_count': 'sum'
+}).round(3)
+
+# 2. Geographic Analysis
+# Add region classification
+def classify_region(county):
+ if isinstance(county, str): # Handle multi-county cases
+ counties = county.split(',')
+ county = counties[0].strip()
+
+ urban_counties = ['Los Angeles', 'San Francisco', 'Alameda', 'San Diego', 'Orange']
+ central_valley = ['Fresno', 'Kern', 'Kings', 'Madera', 'Merced', 'San Joaquin', 'Stanislaus', 'Tulare']
+
+ if county in urban_counties:
+ return 'Urban'
+ elif county in central_valley:
+ return 'Central Valley'
+ else:
+ return 'Other'
+
+geographic_dist['region'] = geographic_dist.index.map(classify_region)
+regional_metrics = geographic_dist.groupby('region').agg({
+ 'total_funding': 'sum',
+ 'project_count': 'sum',
+ 'dac_rate': 'mean',
+ 'total_ghg': 'sum'
+}).round(3)
+
+# 3. Project Size vs GHG Efficiency
+# Create scatter plot
+plt.figure(figsize=(12, 8))
+plt.scatter(program_scale['avg_funding']/1e6,
+ program_scale['ghg_per_dollar'],
+ alpha=0.6)
+plt.xlabel('Average Project Size (Millions $)')
+plt.ylabel('GHG Reduction per Dollar')
+plt.title('Project Size vs. GHG Efficiency')
+
+# 4. Multi-county Analysis
+multi_county_data = data_filtered[data_filtered['County'].str.contains(',', na=False)]
+single_county_data = data_filtered[~data_filtered['County'].str.contains(',', na=False)]
+
+multi_vs_single = pd.DataFrame({
+ 'Multi-County': {
+ 'project_count': len(multi_county_data),
+ 'total_funding': multi_county_data['Total Program GGRFFunding'].sum(),
+ 'avg_funding': multi_county_data['Total Program GGRFFunding'].mean(),
+ 'dac_rate': multi_county_data['Is Benefit Disadvantaged Communities'].mean(),
+ 'ghg_per_dollar': (multi_county_data['Total Project GHGReductions'].sum() /
+ multi_county_data['Total Program GGRFFunding'].sum())
+ },
+ 'Single-County': {
+ 'project_count': len(single_county_data),
+ 'total_funding': single_county_data['Total Program GGRFFunding'].sum(),
+ 'avg_funding': single_county_data['Total Program GGRFFunding'].mean(),
+ 'dac_rate': single_county_data['Is Benefit Disadvantaged Communities'].mean(),
+ 'ghg_per_dollar': (single_county_data['Total Project GHGReductions'].sum() /
+ single_county_data['Total Program GGRFFunding'].sum())
+ }
+})
+
+print("1. DAC Benefits by Program Size:")
+print(dac_by_size)
+
+print("\n2. Regional Distribution:")
+print(regional_metrics)
+
+print("\n4. Multi-County vs Single-County Projects:")
+print(multi_vs_single.round(3))
+
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+1. DAC Benefits by Program Size: + dac_rate total_funding project_count + mean min max sum sum +size_category +Large 0.087 0.00 0.67 2185763970 1314 +Medium 0.036 0.00 0.18 779274523 831 +Mega 0.160 0.16 0.16 1192203125 93 +Small 0.084 0.00 0.92 2003043460 28913 + +2. Regional Distribution: + total_funding project_count dac_rate total_ghg +region +Central Valley 1387932197 14496 0.104 25320641 +Other 2289435192 9195 0.101 24839824 +Urban 2482917689 7460 0.161 15727243 + +4. Multi-County vs Single-County Projects: + Multi-County Single-County +project_count 7.520000e+02 3.039900e+04 +total_funding 5.210746e+08 5.639210e+09 +avg_funding 6.929184e+05 1.855064e+05 +dac_rate 1.090000e-01 2.730000e-01 +ghg_per_dollar 1.200000e-02 1.100000e-02 ++
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-
+
- Efficiency Sweet Spot +
-
+
- Highest GHG efficiency (0.12-0.13 reduction per dollar) occurs in projects around $2M +
- Secondary peak (0.06) in smaller projects under $1M +
- Suggests optimal scale for GHG reduction isn't necessarily larger projects +
-
+
- Size-Efficiency Relationship +
-
+
- No clear linear relationship +
- Most projects cluster in lower ranges (both size and efficiency) +
- Largest projects ($12M+) show relatively low GHG efficiency +
- Suggests diminishing returns as projects scale up +
-
+
- Distribution Pattern +
-
+
- Dense cluster of projects under $2M with varying efficiency +
- Sparse distribution in higher project sizes +
- Few projects achieve both large scale and high efficiency +
Let's identify:
+-
+
- What programs occupy that sweet spot around $2M with high efficiency? +
- What characteristics do those efficient projects share? +
- Are there geographic patterns in the more efficient projects? +
+
+
+
+
+
+
+In [15]:
+
+
+
+
+
+# Let's identify the most GHG-efficient projects and analyze their characteristics
+
+# Add efficiency metric to original program data
+program_scale['efficiency_tier'] = pd.qcut(program_scale['ghg_per_dollar'],
+ q=4,
+ labels=['Low', 'Medium-Low', 'Medium-High', 'High'])
+
+print("1. Most GHG-Efficient Programs:")
+high_efficiency = program_scale[program_scale['efficiency_tier'] == 'High'].sort_values('ghg_per_dollar', ascending=False)
+print(high_efficiency[['total_funding', 'avg_funding', 'ghg_per_dollar', 'dac_rate']].round(3))
+
+# Analyze characteristics of high-efficiency programs
+print("\n2. Characteristics of High-Efficiency Programs:")
+print("\nSize Distribution:")
+print(high_efficiency['size_category'].value_counts())
+
+# Geographic analysis of high-efficiency programs
+print("\n3. Geographic Distribution of High-Efficiency Projects:")
+# Filter original data for these programs
+high_eff_projects = data_filtered[
+ data_filtered['Program Name'].isin(high_efficiency.index)
+]
+geographic_dist = high_eff_projects.groupby('County').agg({
+ 'Total Program GGRFFunding': ['count', 'sum'],
+ 'Total Project GHGReductions': 'sum'
+}).round(2)
+
+# Calculate efficiency by region
+geographic_dist['ghg_per_dollar'] = (
+ geographic_dist[('Total Project GHGReductions', 'sum')] /
+ geographic_dist[('Total Program GGRFFunding', 'sum')]
+)
+
+print(geographic_dist.nlargest(5, 'ghg_per_dollar'))
+
+# Create visualization of efficiency patterns
+plt.figure(figsize=(15, 8))
+
+plt.subplot(1, 2, 1)
+# Size vs Efficiency for high performers
+plt.scatter(high_efficiency['avg_funding']/1e6,
+ high_efficiency['ghg_per_dollar'],
+ alpha=0.6,
+ s=100)
+plt.xlabel('Average Project Size (Millions $)')
+plt.ylabel('GHG Reduction per Dollar')
+plt.title('High-Efficiency Programs: Size vs Performance')
+
+# Add program labels
+for idx, row in high_efficiency.iterrows():
+ plt.annotate(idx[:20] + '...' if len(idx) > 20 else idx,
+ (row['avg_funding']/1e6, row['ghg_per_dollar']),
+ xytext=(5, 5), textcoords='offset points')
+
+plt.subplot(1, 2, 2)
+# DAC Rate vs Efficiency
+plt.scatter(high_efficiency['dac_rate'],
+ high_efficiency['ghg_per_dollar'],
+ alpha=0.6,
+ s=100)
+plt.xlabel('DAC Benefit Rate')
+plt.ylabel('GHG Reduction per Dollar')
+plt.title('High-Efficiency Programs: Equity vs Performance')
+
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+1. Most GHG-Efficient Programs: + total_funding \ +Program Name +Sustainable Agricultural Lands Conservation Pro... 122424176 +Climate Smart Agriculture 338161549 +Fluorinated Gases Emission Reduction Incentives 1000001 +Low-Carbon Fuels Production Program 12500000 +Food Production Investment Program 117791478 +Wetlands and Watershed Restoration 16171301 +Forest Health Program 207621340 +Waste Diversion 118571417 +Renewable Energy for Agriculture Program 9500000 +Woodsmoke Reduction Program 7895909 + + avg_funding \ +Program Name +Sustainable Agricultural Lands Conservation Pro... 1275251.83 +Climate Smart Agriculture 257941.68 +Fluorinated Gases Emission Reduction Incentives 66666.73 +Low-Carbon Fuels Production Program 3125000.00 +Food Production Investment Program 2103419.25 +Wetlands and Watershed Restoration 2021412.62 +Forest Health Program 1701814.26 +Waste Diversion 463169.60 +Renewable Energy for Agriculture Program 211111.11 +Woodsmoke Reduction Program 8233.48 + + ghg_per_dollar dac_rate +Program Name +Sustainable Agricultural Lands Conservation Pro... 0.123 0.01 +Climate Smart Agriculture 0.062 0.05 +Fluorinated Gases Emission Reduction Incentives 0.037 0.00 +Low-Carbon Fuels Production Program 0.036 0.00 +Food Production Investment Program 0.025 0.00 +Wetlands and Watershed Restoration 0.025 0.00 +Forest Health Program 0.023 0.00 +Waste Diversion 0.016 0.09 +Renewable Energy for Agriculture Program 0.013 0.00 +Woodsmoke Reduction Program 0.013 0.00 + +2. Characteristics of High-Efficiency Programs: + +Size Distribution: +size_category +Large 5 +Small 5 +Name: count, dtype: int64 + +3. Geographic Distribution of High-Efficiency Projects: + Total Program GGRFFunding \ + count sum +County +Sierra 5 485667 +Mono 10 2946879 +Lassen 28 5510913 +Calaveras 32 9678108 +Los Angeles, Solano 1 212629 + + Total Project GHGReductions ghg_per_dollar + sum +County +Sierra 730033 1.503155 +Mono 1570341 0.532883 +Lassen 2089977 0.379243 +Calaveras 1857223 0.191899 +Los Angeles, Solano 23680 0.111368 ++
+
+
+
+
+
+
+
+
+
+
+
+
+
+In [16]:
+
+
+
+
+
+# Define urban/rural classification
+def classify_urban_rural(county):
+ if isinstance(county, str): # Handle multi-county cases
+ counties = county.split(',')
+ county = counties[0].strip()
+
+ urban_counties = ['Los Angeles', 'San Francisco', 'Alameda', 'San Diego', 'Orange', 'Santa Clara']
+ central_valley = ['Fresno', 'Kern', 'Kings', 'Madera', 'Merced', 'San Joaquin', 'Stanislaus', 'Tulare']
+
+ if county in urban_counties:
+ return 'Urban'
+ elif county in central_valley:
+ return 'Central Valley'
+ else:
+ return 'Rural'
+
+# Add classification to data
+data_filtered['region_type'] = data_filtered['County'].map(classify_urban_rural)
+
+# Analyze by region type
+region_analysis = data_filtered.groupby('region_type').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': ['sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(3)
+
+# Calculate GHG efficiency by region
+ghg_efficiency = (region_analysis[('Total Project GHGReductions', 'sum')] /
+ region_analysis[('Total Program GGRFFunding', 'sum')])
+
+print("Urban/Rural Analysis Summary:")
+summary_df = pd.DataFrame({
+ 'Project Count': region_analysis[('Total Program GGRFFunding', 'count')],
+ 'Total Funding (B)': region_analysis[('Total Program GGRFFunding', 'sum')]/1e9,
+ 'Avg Project Size (M)': region_analysis[('Total Program GGRFFunding', 'mean')]/1e6,
+ 'GHG Efficiency': ghg_efficiency,
+ 'DAC Rate': region_analysis[('Is Benefit Disadvantaged Communities', 'mean')]
+})
+print(summary_df.round(3))
+
+# Visualize key metrics
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
+
+# 1. Total Funding by Region
+ax1.bar(summary_df.index, summary_df['Total Funding (B)'], color='skyblue')
+ax1.set_title('Total Funding by Region (Billions $)')
+ax1.set_ylabel('Funding (Billions $)')
+
+# 2. Average Project Size
+ax2.bar(summary_df.index, summary_df['Avg Project Size (M)'], color='skyblue')
+ax2.set_title('Average Project Size by Region (Millions $)')
+ax2.set_ylabel('Average Size (Millions $)')
+
+# 3. GHG Efficiency
+ax3.bar(summary_df.index, summary_df['GHG Efficiency'], color='skyblue')
+ax3.set_title('GHG Reduction Efficiency by Region')
+ax3.set_ylabel('GHG Reduction per Dollar')
+
+# 4. DAC Benefit Rate
+ax4.bar(summary_df.index, summary_df['DAC Rate'], color='skyblue')
+ax4.set_title('DAC Benefit Rate by Region')
+ax4.set_ylabel('Proportion Benefiting DACs')
+
+plt.tight_layout()
+plt.show()
+
+# Look at program distribution by region
+print("\nTop Programs by Region (Funding in Millions $):")
+program_by_region = pd.crosstab(
+ data_filtered['Program Name'],
+ data_filtered['region_type'],
+ values=data_filtered['Total Program GGRFFunding'],
+ aggfunc='sum'
+)/1e6
+
+print(program_by_region.sort_values('Urban', ascending=False).head(10).round(2))
+
+
+
+
+
+
+
+
+
+
+
+Urban/Rural Analysis Summary: + Project Count Total Funding (B) Avg Project Size (M) \ +region_type +Central Valley 14496 1.388 0.096 +Rural 8474 2.183 0.258 +Urban 8181 2.589 0.316 + + GHG Efficiency DAC Rate +region_type +Central Valley 0.018 0.181 +Rural 0.011 0.233 +Urban 0.007 0.462 ++
+
+
+
+
+
+
+
+
+
+
++Top Programs by Region (Funding in Millions $): +region_type Central Valley Rural \ +Program Name +Transit and Intercity Rail Capital Program 18.28 86.89 +Affordable Housing and Sustainable Communities ... 205.61 321.80 +Low Carbon Transit Operations Program 52.88 178.45 +Community Air Protection 144.75 145.80 +Transformative Climate Communities 69.17 62.67 +Urban Greening Program 18.91 58.50 +Fire Prevention Program 51.09 493.55 +Waste Diversion 22.71 50.54 +Low-Income Weatherization Program 62.87 57.02 +Urban and Community Forestry Program 7.55 13.80 + +region_type Urban +Program Name +Transit and Intercity Rail Capital Program 666.38 +Affordable Housing and Sustainable Communities ... 664.79 +Low Carbon Transit Operations Program 544.58 +Community Air Protection 238.97 +Transformative Climate Communities 70.89 +Urban Greening Program 61.36 +Fire Prevention Program 51.63 +Waste Diversion 45.32 +Low-Income Weatherization Program 39.08 +Urban and Community Forestry Program 37.84 ++
+
+
+
+
+
+
+
+
+-
+
- GHG Efficiency Gap +
-
+
- Central Valley is ~3x more efficient than Urban areas (0.0175 vs 0.0065 GHG reduction per dollar) +
- Rural areas also outperform Urban (0.010 vs 0.0065) +
- Suggests climate dollars go further in non-urban areas +
-
+
- Inverse DAC Pattern +
-
+
- Urban areas have highest DAC benefit rate (~0.45) +
- Rural areas at ~0.23 DAC rate +
- Central Valley lowest at ~0.18 +
-
+
- Investment Distribution +
-
+
- Urban areas get most funding ($2.5B) +
- Rural second ($2.2B) +
- Central Valley least ($1.4B) +
- Average project sizes follow same pattern +
+
+
+
+
+
+
+
+In [17]:
+
+
+
+
+
+# Calculate efficiency ratios and examine program types
+print("Program Types and Efficiency by Region:")
+program_efficiency = data_filtered.groupby(['Program Name', 'region_type']).agg({
+ 'Total Program GGRFFunding': 'sum',
+ 'Total Project GHGReductions': 'sum'
+}).reset_index()
+
+program_efficiency['efficiency'] = (program_efficiency['Total Project GHGReductions'] /
+ program_efficiency['Total Program GGRFFunding'])
+
+# Show top efficient programs by region
+for region in ['Urban', 'Rural', 'Central Valley']:
+ print(f"\n{region} Most Efficient Programs:")
+ region_data = program_efficiency[program_efficiency['region_type'] == region]
+ print(region_data.nlargest(5, 'efficiency')[
+ ['Program Name', 'Total Program GGRFFunding', 'efficiency']
+ ].round(4))
+
+
+
+
+
+
+
+
+
+
+
+Program Types and Efficiency by Region: + +Urban Most Efficient Programs: + Program Name \ +100 Woodsmoke Reduction Program +29 Fluorinated Gases Emission Reduction Incentives +61 Renewable Energy for Agriculture Program +89 Waste Diversion +32 Food Production Investment Program + + Total Program GGRFFunding efficiency +100 808 0.1027 +29 530239 0.0380 +61 42551 0.0290 +89 45316021 0.0203 +32 13221462 0.0129 + +Rural Most Efficient Programs: + Program Name \ +69 Sustainable Agricultural Lands Conservation Pr... +49 Low-Carbon Fuels Production Program +28 Fluorinated Gases Emission Reduction Incentives +31 Food Production Investment Program +37 Forest Health Program + + Total Program GGRFFunding efficiency +69 108577417 0.1273 +49 5000000 0.0905 +28 469762 0.0352 +31 32710900 0.0298 +37 163034942 0.0285 + +Central Valley Most Efficient Programs: + Program Name \ +68 Sustainable Agricultural Lands Conservation Pr... +14 Climate Smart Agriculture +30 Food Production Investment Program +87 Waste Diversion +59 Renewable Energy for Agriculture Program + + Total Program GGRFFunding efficiency +68 9744397 0.1253 +14 274120208 0.0751 +30 71859116 0.0255 +87 22712521 0.0252 +59 3761222 0.0180 ++
+
+
+
+
+
+
+
+
+This breakdown is really revealing about what works where:
+-
+
- Urban Efficiency Leaders +
-
+
- Woodsmoke Reduction (0.1027) +
- Fluorinated Gases (0.0380) +
- Renewable Energy for Agriculture (0.0290) +
- But notice: Top performers have relatively small funding amounts (except Waste Diversion at $45M) +
-
+
- Rural Efficiency Champions +
-
+
- Sustainable Agricultural Lands (0.1273, $108M) +
- Low-Carbon Fuels (0.0905) +
- Larger programs achieving high efficiency +
- Strong agriculture and land-use focus +
-
+
- Central Valley Success Stories +
-
+
- Sustainable Agricultural Lands (0.1253) +
- Climate Smart Agriculture (0.0751, $274M) +
- High efficiency with substantial funding amounts +
Key Patterns:
+-
+
- Agricultural and land-use programs dominate efficiency in rural/CV +
- Urban areas achieve efficiency through smaller, targeted programs +
- Central Valley shows ability to maintain efficiency at scale +
- Similar program types (e.g., Food Production) perform differently by region +
Would you like to:
+-
+
- Analyze the characteristics of the most efficient programs in each region? +
- Look at how program size relates to efficiency within each region? +
- Examine whether these patterns change over time? +
- Investigate if multi-county collaborations affect these regional differences? +
+
+
+
+
+
+
+
+In [18]:
+
+
+
+
+
+# Analyze multi-county projects by region type
+data_filtered['is_multi_county'] = data_filtered['County'].str.contains(',', na=False)
+
+regional_collab = data_filtered.groupby(['region_type', 'is_multi_county']).agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': 'sum',
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(3)
+
+# Calculate efficiency
+regional_collab['ghg_per_dollar'] = (
+ regional_collab[('Total Project GHGReductions', 'sum')] /
+ regional_collab[('Total Program GGRFFunding', 'sum')]
+)
+
+print("Multi-County vs Single-County by Region:")
+print(regional_collab)
+
+
+
+
+
+
+
+
+
+
+
+Multi-County vs Single-County by Region: + Total Program GGRFFunding \ + count sum +region_type is_multi_county +Central Valley False 14312 1314145988 + True 184 73786209 +Rural False 8194 2060718991 + True 280 122681272 +Urban False 7893 2264345480 + True 288 324607138 + + Total Project GHGReductions \ + mean sum +region_type is_multi_county +Central Valley False 91821.268 24949448 + True 401012.005 371193 +Rural False 251491.212 22645122 + True 438147.400 906509 +Urban False 286880.208 12069861 + True 1127108.118 4945575 + + Is Benefit Disadvantaged Communities \ + mean +region_type is_multi_county +Central Valley False 0.182 + True 0.103 +Rural False 0.238 + True 0.086 +Urban False 0.474 + True 0.135 + + ghg_per_dollar + +region_type is_multi_county +Central Valley False 0.018985 + True 0.005031 +Rural False 0.010989 + True 0.007389 +Urban False 0.005330 + True 0.015236 ++
+
+
+
+
+
+
+
+
+This intersection of multi-county and regional patterns reveals:
+-
+
- Project Size Patterns +
-
+
- Average project size is consistently larger in multi-county projects across all regions:
-
+
- Urban: $1.13M vs $287K +
- Rural: $438K vs $251K +
- Central Valley: $401K vs $92K +
+
-
+
- GHG Efficiency Varies By Region +
-
+
- Single-county efficiency:
-
+
- Central Valley leads (0.019) +
- Rural second (0.011) +
- Urban lowest (0.005) +
+ - Multi-county efficiency shows different pattern:
-
+
- Urban leads (0.015) +
- Rural and Central Valley lower (0.007 and 0.005) +
+
-
+
- DAC Benefits Trade-off +
-
+
- Single-county DAC rates:
-
+
- Urban highest (47.4%) +
- Rural (23.8%) +
- Central Valley (18.2%) +
+ - Multi-county shows lower DAC rates across all regions:
-
+
- Urban (13.5%) +
- Central Valley (10.3%) +
- Rural lowest (8.6%) +
+
Key Findings:
+-
+
- Multi-county collaborations most successful in urban areas for GHG efficiency +
- Single-county projects better at reaching DACs across all regions +
- Central Valley efficiency advantage disappears in multi-county projects +
- Urban areas seem to benefit most from regional coordination +
This suggests different collaboration strategies might be needed for different regions.
+
+
+
+
+
+
+
+In [19]:
+
+
+
+
+
+gdf.columns
+
+
+
+
+
+
+
+
+Out[19]:
+
+
+Index(['Tract', 'ZIP', 'County', 'ApproxLoc', 'TotPop19', 'CIscore', + 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5', 'PM2_5_P', 'DieselPM', + 'DieselPM_P', 'Pesticide', 'PesticideP', 'Tox_Rel', 'Tox_Rel_P', + 'Traffic', 'TrafficP', 'DrinkWat', 'DrinkWatP', 'Lead', 'Lead_P', + 'Cleanup', 'CleanupP', 'GWThreat', 'GWThreatP', 'HazWaste', 'HazWasteP', + 'ImpWatBod', 'ImpWatBodP', 'SolWaste', 'SolWasteP', 'PollBurd', + 'PolBurdSc', 'PolBurdP', 'Asthma', 'AsthmaP', 'LowBirtWt', 'LowBirWP', + 'Cardiovas', 'CardiovasP', 'Educatn', 'EducatP', 'Ling_Isol', + 'Ling_IsolP', 'Poverty', 'PovertyP', 'Unempl', 'UnemplP', 'HousBurd', + 'HousBurdP', 'PopChar', 'PopCharSc', 'PopCharP', 'Child_10', + 'Pop_10_64', 'Elderly65', 'Hispanic', 'White', 'AfricanAm', 'NativeAm', + 'OtherMult', 'Shape_Leng', 'Shape_Area', 'AAPI', 'geometry'], + dtype='object')+
+
+
+
+
+
+
+In [22]:
+
+
+
+
+
+# Clean census tract IDs in both dataframes
+def standardize_tract(tract):
+ if pd.isna(tract):
+ return None
+ if isinstance(tract, (int, float)):
+ return str(int(tract))
+ return str(tract).strip()
+
+# Check the data first
+print("\nCES Tract Examples:")
+print(gdf['Tract'].head())
+print("\nCCI Tract Examples:")
+print(data_filtered['Census Tract'].head())
+
+# Clean and standardize
+gdf['Tract_clean'] = gdf['Tract'].apply(standardize_tract)
+data_filtered['Tract_clean'] = data_filtered['Census Tract'].apply(standardize_tract)
+
+# Check for missing tracts
+print("\nMissing Tract Counts:")
+print("CES missing tracts:", gdf['Tract_clean'].isna().sum())
+print("CCI missing tracts:", data_filtered['Tract_clean'].isna().sum())
+
+# Merge datasets, excluding null tracts
+merged_data = pd.merge(
+ data_filtered[data_filtered['Tract_clean'].notna()],
+ gdf[['Tract_clean', 'CIscore', 'CIscoreP', 'PollBurd', 'Poverty', 'TotPop19']],
+ on='Tract_clean',
+ how='inner'
+)
+
+print("\nMerged Data Summary:")
+print("Total rows:", len(merged_data))
+print("Unique tracts:", merged_data['Tract_clean'].nunique())
+
+# Basic analysis of funding by CES score
+print("\nFunding Distribution by CalEnviroScreen Score Quintiles:")
+merged_data['CES_quintile'] = pd.qcut(merged_data['CIscore'], q=5,
+ labels=['Lowest Burden', 'Low', 'Medium', 'High', 'Highest Burden'])
+
+ces_analysis = merged_data.groupby('CES_quintile').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': 'sum',
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(2)
+
+print(ces_analysis)
+
+
+
+
+
+
+
+
+
+
+
++CES Tract Examples: +0 6083002103 +1 6083002402 +2 6083002102 +3 6083002010 +4 6083002009 +Name: Tract, dtype: object + +CCI Tract Examples: +16067 NaN +25098 6.001402e+09 +25099 6.001402e+09 +25100 6.001406e+09 +25101 6.001406e+09 +Name: Census Tract, dtype: float64 + +Missing Tract Counts: +CES missing tracts: 0 +CCI missing tracts: 17967 + +Merged Data Summary: +Total rows: 13184 +Unique tracts: 3665 + +Funding Distribution by CalEnviroScreen Score Quintiles: + Total Program GGRFFunding \ + count sum mean +CES_quintile +Lowest Burden 2638 39278353 14889.44 +Low 2642 39059436 14784.04 +Medium 2636 61809955 23448.39 +High 2643 50317266 19037.94 +Highest Burden 2625 37787591 14395.27 + + Total Project GHGReductions \ + sum +CES_quintile +Lowest Burden 2270359 +Low 202457 +Medium 265818 +High 210633 +Highest Burden 159776 + + Is Benefit Disadvantaged Communities + mean +CES_quintile +Lowest Burden 0.04 +Low 0.55 +Medium 0.84 +High 0.87 +Highest Burden 0.72 ++
+
+
+
+
+/tmp/ipykernel_30782/1664997265.py:41: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
+ ces_analysis = merged_data.groupby('CES_quintile').agg({
+
+
+
+
+
+
+
+
+In [23]:
+
+
+
+
+
+import matplotlib.pyplot as plt
+
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
+
+# Average Funding by CES Score
+merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].mean().plot(
+ kind='bar', ax=ax1, color='skyblue')
+ax1.set_title('Average Funding by Environmental Burden')
+ax1.set_ylabel('Average Funding ($)')
+
+# GHG Reductions
+merged_data.groupby('CES_quintile')['Total Project GHGReductions'].sum().plot(
+ kind='bar', ax=ax2, color='skyblue')
+ax2.set_title('Total GHG Reductions by Environmental Burden')
+ax2.set_ylabel('GHG Reductions')
+
+# DAC Benefit Rate
+merged_data.groupby('CES_quintile')['Is Benefit Disadvantaged Communities'].mean().plot(
+ kind='bar', ax=ax3, color='skyblue')
+ax3.set_title('DAC Benefit Rate by Environmental Burden')
+ax3.set_ylabel('Proportion Benefiting DACs')
+
+# Project Counts
+merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].count().plot(
+ kind='bar', ax=ax4, color='skyblue')
+ax4.set_title('Number of Projects by Environmental Burden')
+ax4.set_ylabel('Number of Projects')
+
+plt.tight_layout()
+plt.show()
+
+# Also examine program types by CES quintile
+print("\nProgram Types by Environmental Burden:")
+program_dist = pd.crosstab(
+ merged_data['CES_quintile'],
+ merged_data['Program Name'],
+ values=merged_data['Total Program GGRFFunding'],
+ aggfunc='sum'
+)/1e6 # Convert to millions
+
+print(program_dist.round(2))
+
+
+
+
+
+
+
+
+
+
+
+/tmp/ipykernel_30782/541290860.py:6: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
+ merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].mean().plot(
+/tmp/ipykernel_30782/541290860.py:12: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
+ merged_data.groupby('CES_quintile')['Total Project GHGReductions'].sum().plot(
+/tmp/ipykernel_30782/541290860.py:18: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
+ merged_data.groupby('CES_quintile')['Is Benefit Disadvantaged Communities'].mean().plot(
+/tmp/ipykernel_30782/541290860.py:24: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
+ merged_data.groupby('CES_quintile')['Total Program GGRFFunding'].count().plot(
+
+
+
+
+
+
+
+
+
+
+
+
++Program Types by Environmental Burden: +Program Name Low-Income Weatherization Program \ +CES_quintile +Lowest Burden 3.69 +Low 28.77 +Medium 50.45 +High 45.19 +Highest Burden 30.88 + +Program Name Sustainable Agricultural Lands Conservation Program \ +CES_quintile +Lowest Burden 22.05 +Low 0.00 +Medium 0.00 +High 0.00 +Highest Burden 0.00 + +Program Name Transformative Climate Communities Waste Diversion \ +CES_quintile +Lowest Burden 0.00 0.34 +Low 0.48 0.23 +Medium 0.94 0.15 +High 0.33 0.12 +Highest Burden 3.14 0.14 + +Program Name Water-Energy Efficiency Woodsmoke Reduction Program \ +CES_quintile +Lowest Burden 7.13 6.00 +Low 8.02 1.50 +Medium 9.63 0.36 +High 4.31 0.01 +Highest Burden 3.38 0.03 + +Program Name Workforce Training and Development +CES_quintile +Lowest Burden 0.07 +Low 0.07 +Medium 0.29 +High 0.36 +Highest Burden 0.21 ++
+
+
+
+
+
+
+In [24]:
+
+
+
+
+
+# Create meaningful CES score categories
+def categorize_ces(score):
+ if score >= 75: # Top 25% - most burdened
+ return 'Highest Burden (75-100%)'
+ elif score >= 50:
+ return 'High Burden (50-75%)'
+ elif score >= 25:
+ return 'Moderate Burden (25-50%)'
+ else: # Bottom 25% - least burdened
+ return 'Low Burden (0-25%)'
+
+# Add new categorization
+merged_data['CES_category'] = merged_data['CIscoreP'].apply(categorize_ces)
+
+# Analyze distribution with new categories
+ces_analysis_new = merged_data.groupby('CES_category').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': 'sum',
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(2)
+
+print("Distribution using CES Percentile Thresholds:")
+print(ces_analysis_new)
+
+# Visualize with new categories
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
+
+# Average Funding
+merged_data.groupby('CES_category')['Total Program GGRFFunding'].mean().plot(
+ kind='bar', ax=ax1, color='skyblue')
+ax1.set_title('Average Funding by Environmental Burden')
+ax1.set_ylabel('Average Funding ($)')
+
+# GHG Reductions
+merged_data.groupby('CES_category')['Total Project GHGReductions'].sum().plot(
+ kind='bar', ax=ax2, color='skyblue')
+ax2.set_title('Total GHG Reductions by Environmental Burden')
+ax2.set_ylabel('GHG Reductions')
+
+# DAC Benefit Rate
+merged_data.groupby('CES_category')['Is Benefit Disadvantaged Communities'].mean().plot(
+ kind='bar', ax=ax3, color='skyblue')
+ax3.set_title('DAC Benefit Rate by Environmental Burden')
+ax3.set_ylabel('Proportion Benefiting DACs')
+
+# Project Counts
+merged_data.groupby('CES_category')['Total Program GGRFFunding'].count().plot(
+ kind='bar', ax=ax4, color='skyblue')
+ax4.set_title('Number of Projects by Environmental Burden')
+ax4.set_ylabel('Number of Projects')
+
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+Distribution using CES Percentile Thresholds: + Total Program GGRFFunding \ + count sum mean +CES_category +High Burden (50-75%) 2932 42831569 14608.31 +Highest Burden (75-100%) 7661 146493459 19121.98 +Low Burden (0-25%) 1182 7475213 6324.21 +Moderate Burden (25-50%) 1409 31452360 22322.47 + + Total Project GHGReductions \ + sum +CES_category +High Burden (50-75%) 218725 +Highest Burden (75-100%) 622363 +Low Burden (0-25%) 163482 +Moderate Burden (25-50%) 2104473 + + Is Benefit Disadvantaged Communities + mean +CES_category +High Burden (50-75%) 0.56 +Highest Burden (75-100%) 0.81 +Low Burden (0-25%) 0.01 +Moderate Burden (25-50%) 0.07 ++
+
+
+
+
+
+
+
+
+
+
+
+
+In [25]:
+
+
+
+
+
+# Define category order
+category_order = [
+ 'Low Burden (0-25%)',
+ 'Moderate Burden (25-50%)',
+ 'High Burden (50-75%)',
+ 'Highest Burden (75-100%)'
+]
+
+# Create visualizations with ordered categories
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
+
+# Average Funding
+merged_data.groupby('CES_category')['Total Program GGRFFunding'].mean()[category_order].plot(
+ kind='bar', ax=ax1, color='skyblue')
+ax1.set_title('Average Funding by Environmental Burden')
+ax1.set_ylabel('Average Funding ($)')
+
+# GHG Reductions
+merged_data.groupby('CES_category')['Total Project GHGReductions'].sum()[category_order].plot(
+ kind='bar', ax=ax2, color='skyblue')
+ax2.set_title('Total GHG Reductions by Environmental Burden')
+ax2.set_ylabel('GHG Reductions')
+
+# DAC Benefit Rate
+merged_data.groupby('CES_category')['Is Benefit Disadvantaged Communities'].mean()[category_order].plot(
+ kind='bar', ax=ax3, color='skyblue')
+ax3.set_title('DAC Benefit Rate by Environmental Burden')
+ax3.set_ylabel('Proportion Benefiting DACs')
+
+# Project Counts
+merged_data.groupby('CES_category')['Total Program GGRFFunding'].count()[category_order].plot(
+ kind='bar', ax=ax4, color='skyblue')
+ax4.set_title('Number of Projects by Environmental Burden')
+ax4.set_ylabel('Number of Projects')
+
+# Rotate x-labels for better readability
+for ax in [ax1, ax2, ax3, ax4]:
+ ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
+
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+In [26]:
+
+
+
+
+
+# Agency Performance Analysis
+agency_performance = data_filtered.groupby('Agency Name').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': 'sum',
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(3)
+
+print("Agency Performance Overview:")
+print(agency_performance)
+
+# Add efficiency metrics
+agency_performance['ghg_per_dollar'] = (
+ agency_performance[('Total Project GHGReductions', 'sum')] /
+ agency_performance[('Total Program GGRFFunding', 'sum')]
+)
+
+# Look at agency patterns in multi-county projects
+multi_county = data_filtered[data_filtered['County'].str.contains(',', na=False)]
+multi_agency_patterns = multi_county.groupby('Agency Name').agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': 'sum',
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(3)
+
+print("\nAgency Performance in Multi-County Projects:")
+print(multi_agency_patterns)
+
+# Visualize agency performance metrics
+plt.figure(figsize=(15, 10))
+
+# Plot GHG efficiency vs DAC benefit rate by agency
+plt.scatter(
+ agency_performance['ghg_per_dollar'],
+ agency_performance[('Is Benefit Disadvantaged Communities', 'mean')],
+ s=agency_performance[('Total Program GGRFFunding', 'sum')]/1e6, # Size by total funding
+ alpha=0.6
+)
+
+# Add agency labels
+for idx, row in agency_performance.iterrows():
+ plt.annotate(idx[:20] + '...' if len(idx) > 20 else idx,
+ (row['ghg_per_dollar'],
+ row[('Is Benefit Disadvantaged Communities', 'mean')]),
+ xytext=(5, 5), textcoords='offset points')
+
+plt.xlabel('GHG Reduction per Dollar')
+plt.ylabel('DAC Benefit Rate')
+plt.title('Agency Performance: Climate Impact vs Equity\nSize = Total Funding')
+
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+Agency Performance Overview: + Total Program GGRFFunding \ + count +Agency Name +California Air Resources Board 12329 +California Coastal Commission 17 +California Conservation Corps 766 +California Department of Community Services and... 6331 +California Department of Fish and Wildlife 8 +California Department of Food and Agriculture 1311 +California Department of Forestry and Fire Prot... 2047 +California Department of Resources Recycling an... 723 +California Department of Transportation 769 +California Department of Water Resources 4887 +California Energy Commission 119 +California Environmental Protection Agency 2 +California Governors Office of Emergency Services 63 +California Natural Resources Agency 213 +California State Coastal Conservancy 10 +California State Transportation Agency 135 +California State Water Resources Control Board 100 +California Strategic Growth Council 1200 +California Wildlife Conservation Board 17 +California Workforce Development Board 30 +San Francisco Bay Conservation and Development ... 74 + + \ + sum mean +Agency Name +California Air Resources Board 796368463 64593.111 +California Coastal Commission 2667413 156906.647 +California Conservation Corps 47161363 61568.359 +California Department of Community Services and... 158967758 25109.423 +California Department of Fish and Wildlife 16171301 2021412.625 +California Department of Food and Agriculture 338161549 257941.685 +California Department of Forestry and Fire Prot... 967562562 472673.455 +California Department of Resources Recycling an... 175129441 242226.059 +California Department of Transportation 785906434 1021984.960 +California Department of Water Resources 52470432 10736.737 +California Energy Commission 140791478 1183121.664 +California Environmental Protection Agency 2560940 1280470.000 +California Governors Office of Emergency Services 31037358 492656.476 +California Natural Resources Agency 145023190 680860.047 +California State Coastal Conservancy 7218244 721824.400 +California State Transportation Agency 771556000 5715229.630 +California State Water Resources Control Board 118601405 1186014.050 +California Strategic Growth Council 1557246299 1297705.249 +California Wildlife Conservation Board 12574800 739694.118 +California Workforce Development Board 24428357 814278.567 +San Francisco Bay Conservation and Development ... 8680291 117301.230 + + Total Project GHGReductions \ + sum +Agency Name +California Air Resources Board 509888 +California Coastal Commission 0 +California Conservation Corps 292164 +California Department of Community Services and... 583978 +California Department of Fish and Wildlife 405413 +California Department of Food and Agriculture 21019827 +California Department of Forestry and Fire Prot... 5176685 +California Department of Resources Recycling an... 1884352 +California Department of Transportation 5709880 +California Department of Water Resources 373494 +California Energy Commission 3553812 +California Environmental Protection Agency 0 +California Governors Office of Emergency Services 0 +California Natural Resources Agency 53327 +California State Coastal Conservancy 0 +California State Transportation Agency 9076036 +California State Water Resources Control Board -3620 +California Strategic Growth Council 17127711 +California Wildlife Conservation Board 124761 +California Workforce Development Board 0 +San Francisco Bay Conservation and Development ... 0 + + Is Benefit Disadvantaged Communities + mean +Agency Name +California Air Resources Board 0.000 +California Coastal Commission 0.000 +California Conservation Corps 0.000 +California Department of Community Services and... 0.917 +California Department of Fish and Wildlife 0.000 +California Department of Food and Agriculture 0.053 +California Department of Forestry and Fire Prot... 0.084 +California Department of Resources Recycling an... 0.032 +California Department of Transportation 0.095 +California Department of Water Resources 0.441 +California Energy Commission 0.000 +California Environmental Protection Agency 0.000 +California Governors Office of Emergency Services 0.000 +California Natural Resources Agency 0.150 +California State Coastal Conservancy 0.000 +California State Transportation Agency 0.193 +California State Water Resources Control Board 0.000 +California Strategic Growth Council 0.016 +California Wildlife Conservation Board 0.000 +California Workforce Development Board 0.000 +San Francisco Bay Conservation and Development ... 0.000 + +Agency Performance in Multi-County Projects: + Total Program GGRFFunding \ + count +Agency Name +California Air Resources Board 25 +California Conservation Corps 9 +California Department of Food and Agriculture 41 +California Department of Forestry and Fire Prot... 339 +California Department of Resources Recycling an... 23 +California Department of Transportation 158 +California Energy Commission 9 +California Natural Resources Agency 7 +California State Transportation Agency 57 +California State Water Resources Control Board 45 +California Strategic Growth Council 32 +California Wildlife Conservation Board 7 + + \ + sum mean +Agency Name +California Air Resources Board 2104048 84161.920 +California Conservation Corps 1248353 138705.889 +California Department of Food and Agriculture 6296871 153582.220 +California Department of Forestry and Fire Prot... 32351508 95432.177 +California Department of Resources Recycling an... 6962204 302704.522 +California Department of Transportation 162505329 1028514.741 +California Energy Commission 12379666 1375518.444 +California Natural Resources Agency 2879148 411306.857 +California State Transportation Agency 232590000 4080526.316 +California State Water Resources Control Board 46265001 1028111.133 +California Strategic Growth Council 11948291 373384.094 +California Wildlife Conservation Board 3544200 506314.286 + + Total Project GHGReductions \ + sum +Agency Name +California Air Resources Board -413 +California Conservation Corps 842 +California Department of Food and Agriculture 42951 +California Department of Forestry and Fire Prot... 185424 +California Department of Resources Recycling an... 35434 +California Department of Transportation 2460280 +California Energy Commission 501007 +California Natural Resources Agency 1191 +California State Transportation Agency 2748646 +California State Water Resources Control Board -3234 +California Strategic Growth Council 251149 +California Wildlife Conservation Board 0 + + Is Benefit Disadvantaged Communities + mean +Agency Name +California Air Resources Board 0.000 +California Conservation Corps 0.000 +California Department of Food and Agriculture 0.000 +California Department of Forestry and Fire Prot... 0.168 +California Department of Resources Recycling an... 0.130 +California Department of Transportation 0.089 +California Energy Commission 0.000 +California Natural Resources Agency 0.000 +California State Transportation Agency 0.105 +California State Water Resources Control Board 0.000 +California Strategic Growth Council 0.062 +California Wildlife Conservation Board 0.000 ++
+
+
+
+
+/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/matplotlib/text.py:1465: FutureWarning: Calling float on a single element Series is deprecated and will raise a TypeError in the future. Use float(ser.iloc[0]) instead + x = float(self.convert_xunits(x)) ++
+
+
+
+
+
+
+
+
+
+
+
+
+
+In [27]:
+
+
+
+
+
+plt.figure(figsize=(15, 10))
+
+# Create shorter but still meaningful agency names
+agency_shortnames = {
+ 'California Air Resources Board': 'CARB',
+ 'California Department of Community Services and...': 'Community Services',
+ 'California Department of Food and Agriculture': 'CDFA',
+ 'California Department of Forestry and Fire Prot...': 'CalFire',
+ 'California Department of Resources Recycling an...': 'CalRecycle',
+ 'California Department of Transportation': 'Caltrans',
+ 'California Department of Water Resources': 'Water Resources',
+ 'California Energy Commission': 'Energy Commission',
+ 'California State Transportation Agency': 'CalSTA',
+ 'California Strategic Growth Council': 'SGC',
+ # Add other agencies as needed
+}
+
+# Plot GHG efficiency vs DAC benefit rate by agency
+plt.scatter(
+ agency_performance['ghg_per_dollar'],
+ agency_performance[('Is Benefit Disadvantaged Communities', 'mean')],
+ s=agency_performance[('Total Program GGRFFunding', 'sum')]/1e6, # Size by total funding
+ alpha=0.6
+)
+
+# Add agency labels with shorter names
+for idx, row in agency_performance.iterrows():
+ short_name = agency_shortnames.get(idx, idx[:15] + '...')
+ plt.annotate(short_name,
+ (row['ghg_per_dollar'],
+ row[('Is Benefit Disadvantaged Communities', 'mean')]),
+ xytext=(5, 5), textcoords='offset points')
+
+plt.xlabel('GHG Reduction per Dollar')
+plt.ylabel('DAC Benefit Rate')
+plt.title('Agency Performance: Climate Impact vs Equity\nSize = Total Funding')
+
+# Add legend for bubble size
+legend_elements = [plt.scatter([], [], s=s, label=f'${l}B', alpha=0.6, color='blue')
+ for s, l in zip([100, 500, 1000], ['0.1', '0.5', '1.0'])]
+plt.legend(handles=legend_elements, title='Total Funding',
+ bbox_to_anchor=(1.05, 1), loc='upper left')
+
+plt.tight_layout()
+plt.show()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+-
+
- Strong GHG Performers +
-
+
- California Department of Food and Agriculture shows highest GHG efficiency (~0.06 reductions per dollar) +
- California Energy Commission also performs well (~0.03) +
- However, both show low DAC benefit rates (0-5%) +
-
+
- Strong Equity Performers +
-
+
- California Department of Community Services leads in DAC benefits (91.7%) +
- Department of Water Resources shows good equity performance (44.1%) +
- But these have lower GHG efficiency +
-
+
- Large Scale Implementers (shown by bubble size) +
-
+
- Strategic Growth Council ($1.56B) +
- CalSTA ($772M) +
- CARB ($796M) +
- Department of Forestry and Fire Protection ($968M) +
-
+
- Multi-County Implementation +
-
+
- Transportation agencies lead in multi-county projects (CalSTA: 57 projects, $233M) +
- CalFire shows significant multi-county work (339 projects) +
- Strategic Growth Council's multi-county projects show lower DAC rates than their overall portfolio +
+
+
+
+
+
+
+
+In [28]:
+
+
+
+
+
+# Analyze agency performance across CES score categories
+merged_agency_ces = merged_data.groupby(['Agency Name', 'CES_category']).agg({
+ 'Total Program GGRFFunding': ['count', 'sum', 'mean'],
+ 'Total Project GHGReductions': 'sum',
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(3)
+
+# Calculate percentage of agency funding going to each CES category
+agency_funding = pd.pivot_table(
+ merged_data,
+ values='Total Program GGRFFunding',
+ index='Agency Name',
+ columns='CES_category',
+ aggfunc='sum'
+).fillna(0)
+
+# Calculate percentages
+agency_funding_pct = agency_funding.div(agency_funding.sum(axis=1), axis=0) * 100
+
+print("Percentage of Agency Funding by Environmental Burden Category:")
+print(agency_funding_pct.round(2))
+
+# Visualize distribution patterns
+plt.figure(figsize=(15, 8))
+agency_funding_pct.plot(kind='barh', stacked=True)
+plt.title('Distribution of Agency Funding Across Environmental Burden Categories')
+plt.xlabel('Percentage of Agency Funding')
+plt.tight_layout()
+plt.show()
+
+# Look at average project size by burden category for each agency
+avg_project_size = merged_data.groupby(['Agency Name', 'CES_category'])['Total Program GGRFFunding'].mean()/1e6
+avg_project_size = avg_project_size.unstack()
+print("\nAverage Project Size (Millions $) by Environmental Burden Category:")
+print(avg_project_size.round(2))
+
+
+
+
+
+
+
+
+
+
+
+Percentage of Agency Funding by Environmental Burden Category: +CES_category High Burden (50-75%) \ +Agency Name +California Air Resources Board 20.81 +California Department of Community Services and... 19.77 +California Department of Resources Recycling an... 26.02 +California Department of Water Resources 27.36 +California Energy Commission 14.29 +California Strategic Growth Council 1.78 + +CES_category Highest Burden (75-100%) \ +Agency Name +California Air Resources Board 4.92 +California Department of Community Services and... 77.98 +California Department of Resources Recycling an... 41.17 +California Department of Water Resources 50.91 +California Energy Commission 78.57 +California Strategic Growth Council 16.38 + +CES_category Low Burden (0-25%) \ +Agency Name +California Air Resources Board 36.65 +California Department of Community Services and... 0.29 +California Department of Resources Recycling an... 21.83 +California Department of Water Resources 9.21 +California Energy Commission 7.14 +California Strategic Growth Council 3.15 + +CES_category Moderate Burden (25-50%) +Agency Name +California Air Resources Board 37.63 +California Department of Community Services and... 1.96 +California Department of Resources Recycling an... 10.98 +California Department of Water Resources 12.51 +California Energy Commission 0.00 +California Strategic Growth Council 78.69 ++
+
+
+
+
+<Figure size 1500x800 with 0 Axes>+
+
+
+
+
+
+
+
+
+
+
++Average Project Size (Millions $) by Environmental Burden Category: +CES_category High Burden (50-75%) \ +Agency Name +California Air Resources Board 0.01 +California Department of Community Services and... 0.03 +California Department of Resources Recycling an... 0.01 +California Department of Water Resources 0.01 +California Energy Commission 0.07 +California Strategic Growth Council 0.00 + +CES_category Highest Burden (75-100%) \ +Agency Name +California Air Resources Board 0.01 +California Department of Community Services and... 0.02 +California Department of Resources Recycling an... 0.01 +California Department of Water Resources 0.01 +California Energy Commission 0.07 +California Strategic Growth Council 0.01 + +CES_category Low Burden (0-25%) \ +Agency Name +California Air Resources Board 0.01 +California Department of Community Services and... 0.07 +California Department of Resources Recycling an... 0.01 +California Department of Water Resources 0.00 +California Energy Commission 0.07 +California Strategic Growth Council 0.85 + +CES_category Moderate Burden (25-50%) +Agency Name +California Air Resources Board 0.01 +California Department of Community Services and... 0.03 +California Department of Resources Recycling an... 0.01 +California Department of Water Resources 0.00 +California Energy Commission NaN +California Strategic Growth Council 1.93 ++
+
+
+
+
+
+
+
+
+This data reveals fascinating patterns in how different agencies serve communities with varying environmental burdens:
+-
+
- Strong Environmental Justice Focus +
-
+
- Community Services: 78% of funding to highest burden areas +
- Energy Commission: 79% to highest burden areas +
- Water Resources: 51% to highest burden areas +
- CalRecycle: 41% to highest burden areas +
-
+
- More Dispersed Distribution +
-
+
- CARB shows relatively even distribution:
-
+
- 37.6% Moderate burden +
- 36.7% Low burden +
- 20.8% High burden +
- Only 4.9% to highest burden areas +
+
-
+
- Unique Strategic Growth Council Pattern +
-
+
- Heavy focus on moderate burden areas (78.7%) +
- Limited focus on highest burden areas (16.4%) +
- Largest average project sizes:
-
+
- $1.93M in moderate burden areas +
- $0.85M in low burden areas +
- Much smaller in high burden areas +
+
-
+
- Project Size Variations +
-
+
- Most agencies maintain consistent project sizes across burden categories +
- Notable exceptions:
-
+
- Strategic Growth Council: Much larger projects in moderate burden areas +
- Community Services: Larger projects in low burden areas +
+
This suggests:
+-
+
- Different agency missions lead to different environmental justice approaches +
- Some agencies specifically target high-burden areas +
- Project size might be influenced by community capacity +
- Need for better coordination in highest burden areas +
+
+
+
+
+
+
+
+In [29]:
+
+
+
+
+
+# Analyze outcomes (GHG and equity) across environmental burden categories by agency
+outcome_analysis = merged_data.groupby(['Agency Name', 'CES_category']).agg({
+ 'Total Program GGRFFunding': ['count', 'sum'],
+ 'Total Project GHGReductions': ['sum', 'mean'],
+ 'Is Benefit Disadvantaged Communities': 'mean'
+}).round(3)
+
+# Calculate GHG efficiency
+outcome_analysis['ghg_per_dollar'] = (
+ outcome_analysis[('Total Project GHGReductions', 'sum')] /
+ outcome_analysis[('Total Program GGRFFunding', 'sum')]
+)
+
+# Create visualization
+fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
+
+# 1. Total GHG Reductions by Burden Category for each Agency
+ghg_by_burden = pd.pivot_table(
+ merged_data,
+ values='Total Project GHGReductions',
+ index='Agency Name',
+ columns='CES_category',
+ aggfunc='sum'
+)
+
+ghg_by_burden.plot(kind='barh', ax=ax1)
+ax1.set_title('Total GHG Reductions by Environmental Burden Category')
+ax1.set_xlabel('GHG Reductions')
+
+# 2. GHG Efficiency by Burden Category
+efficiency_by_burden = outcome_analysis['ghg_per_dollar'].unstack()
+efficiency_by_burden.plot(kind='barh', ax=ax2)
+ax2.set_title('GHG Reduction Efficiency by Environmental Burden Category')
+ax2.set_xlabel('GHG Reductions per Dollar')
+
+# 3. DAC Benefit Rate by Burden Category
+dac_by_burden = pd.pivot_table(
+ merged_data,
+ values='Is Benefit Disadvantaged Communities',
+ index='Agency Name',
+ columns='CES_category',
+ aggfunc='mean'
+)
+dac_by_burden.plot(kind='barh', ax=ax3)
+ax3.set_title('DAC Benefit Rate by Environmental Burden Category')
+ax3.set_xlabel('Proportion Benefiting DACs')
+
+# 4. Average Project Size
+size_by_burden = pd.pivot_table(
+ merged_data,
+ values='Total Program GGRFFunding',
+ index='Agency Name',
+ columns='CES_category',
+ aggfunc='mean'
+)/1e6 # Convert to millions
+size_by_burden.plot(kind='barh', ax=ax4)
+ax4.set_title('Average Project Size by Environmental Burden Category')
+ax4.set_xlabel('Average Project Size (Millions $)')
+
+plt.tight_layout()
+plt.show()
+
+# Print summary statistics
+print("\nOutcome Analysis by Environmental Burden Category:")
+print("\nGHG Efficiency (Reductions per Dollar):")
+print(efficiency_by_burden.round(4))
+
+print("\nDAC Benefit Rate:")
+print(dac_by_burden.round(4))
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
++Outcome Analysis by Environmental Burden Category: + +GHG Efficiency (Reductions per Dollar): +CES_category High Burden (50-75%) \ +Agency Name +California Air Resources Board 0.0092 +California Department of Community Services and... 0.0034 +California Department of Resources Recycling an... 0.0023 +California Department of Water Resources 0.0106 +California Energy Commission 0.0000 +California Strategic Growth Council 0.0021 + +CES_category Highest Burden (75-100%) \ +Agency Name +California Air Resources Board 0.0084 +California Department of Community Services and... 0.0037 +California Department of Resources Recycling an... 0.0026 +California Department of Water Resources 0.0088 +California Energy Commission 0.0000 +California Strategic Growth Council 0.0022 + +CES_category Low Burden (0-25%) \ +Agency Name +California Air Resources Board 0.0163 +California Department of Community Services and... 0.0034 +California Department of Resources Recycling an... 0.0013 +California Department of Water Resources 0.0122 +California Energy Commission 0.0000 +California Strategic Growth Council 0.0918 + +CES_category Moderate Burden (25-50%) +Agency Name +California Air Resources Board 0.0114 +California Department of Community Services and... 0.0038 +California Department of Resources Recycling an... 0.0017 +California Department of Water Resources 0.0148 +California Energy Commission NaN +California Strategic Growth Council 0.0943 + +DAC Benefit Rate: +CES_category High Burden (50-75%) \ +Agency Name +California Air Resources Board 0.0000 +California Department of Community Services and... 0.8851 +California Department of Resources Recycling an... 0.0000 +California Department of Water Resources 0.4012 +California Energy Commission 0.0000 +California Strategic Growth Council 0.0000 + +CES_category Highest Burden (75-100%) \ +Agency Name +California Air Resources Board 0.0000 +California Department of Community Services and... 0.9344 +California Department of Resources Recycling an... 0.0000 +California Department of Water Resources 0.8456 +California Energy Commission 0.0000 +California Strategic Growth Council 0.0000 + +CES_category Low Burden (0-25%) \ +Agency Name +California Air Resources Board 0.0000 +California Department of Community Services and... 0.1429 +California Department of Resources Recycling an... 0.0000 +California Department of Water Resources 0.0147 +California Energy Commission 0.0000 +California Strategic Growth Council 0.0000 + +CES_category Moderate Burden (25-50%) +Agency Name +California Air Resources Board 0.0000 +California Department of Community Services and... 0.4835 +California Department of Resources Recycling an... 0.0000 +California Department of Water Resources 0.0565 +California Energy Commission NaN +California Strategic Growth Council 0.0000 ++
+
+
+
+
+
+
+
+
+
+-
+
- GHG Efficiency Paradox +
-
+
- Highest efficiency in lower burden areas:
-
+
- Strategic Growth Council: 0.0918 (low burden) vs 0.0022 (highest burden) +
- CARB: 0.0163 (low burden) vs 0.0084 (highest burden) +
- Water Resources: 0.0122 (low burden) vs 0.0088 (highest burden) +
+ - Suggests potential infrastructure or implementation challenges in high-burden areas +
-
+
- DAC Benefit Distribution +
-
+
- Strong targeting by some agencies:
-
+
- Community Services: 93% DAC benefit in highest burden areas +
- Water Resources: 85% DAC benefit in highest burden areas +
+ - Others show no DAC benefits across categories:
-
+
- CARB +
- Energy Commission +
- Strategic Growth Council +
+
-
+
- Efficiency-Equity Trade-off +
-
+
- Clear inverse relationship between GHG efficiency and DAC benefits +
- Community Services shows high DAC benefits but lower GHG efficiency +
- Strategic Growth Council achieves high GHG efficiency but low DAC benefits +
-
+
- Agency Performance Patterns +
-
+
- Community Services most consistent in DAC benefits across burden levels +
- Water Resources shows good balance between efficiency and equity +
- Strategic Growth Council most efficient but least equitable +
Key Policy Implications:
+-
+
- Need for targeted capacity building in high-burden areas +
- Potential for program design improvements to balance outcomes +
- Opportunity for cross-agency learning on successful approaches +
+
+
+
+
+
+
+
+
+
+