Files
california-equity-git/initial_view/secondtake.ipynb
2024-11-24 21:48:46 -08:00

105 lines
4.4 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Project: California Equity Research\n",
"### Data: postgis db `calif_equity` with california climate investment and california enviroscreen data\n",
"#### Goal: Analyze the relationship between climate investment and environmental justice in California\n",
"#### This notebook: second take \n",
"##### Author: [dpadams](dpadams@fullerton.edu)\n",
"##### Date: 2024-11-24"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'scipy'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mseaborn\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msns\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m stats\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Read the data (I see you already have this loaded as 'data')\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# 1. First, let's create our core analytical metrics\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcalculate_program_metrics\u001b[39m(df):\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'scipy'"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy import stats\n",
"\n",
"# Read the data (I see you already have this loaded as 'data')\n",
"\n",
"# 1. First, let's create our core analytical metrics\n",
"def calculate_program_metrics(df):\n",
" metrics = df.groupby('Program Name').agg({\n",
" 'Total Project Cost': ['count', 'sum', 'mean'],\n",
" 'Total Project GHGReductions': ['sum', 'mean'],\n",
" 'Total GGRFDisadvantaged Community Funding': ['sum', 'mean'],\n",
" 'Is Benefit Disadvantaged Communities': 'mean',\n",
" 'Is Low Income Communities': 'mean'\n",
" }).round(2)\n",
" \n",
" # Add efficiency metrics\n",
" metrics['GHG_per_dollar'] = (metrics[('Total Project GHGReductions', 'sum')] / \n",
" metrics[('Total Project Cost', 'sum')]).round(4)\n",
" \n",
" metrics['DAC_funding_ratio'] = (metrics[('Total GGRFDisadvantaged Community Funding', 'sum')] / \n",
" metrics[('Total Project Cost', 'sum')]).round(4)\n",
" \n",
" return metrics\n",
"\n",
"program_metrics = calculate_program_metrics(data)\n",
"\n",
"# Display top programs by different metrics\n",
"print(\"\\nTop 5 Programs by Total Investment:\")\n",
"print(program_metrics.sort_values(('Total Project Cost', 'sum'), ascending=False).head())\n",
"\n",
"print(\"\\nTop 5 Programs by GHG Reduction Efficiency:\")\n",
"print(program_metrics.sort_values('GHG_per_dollar', ascending=False).head())\n",
"\n",
"print(\"\\nTop 5 Programs by DAC Funding Ratio:\")\n",
"print(program_metrics.sort_values('DAC_funding_ratio', ascending=False).head())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}