{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Project: California Equity Research\n", "### Data: postgis db `calif_equity` with california climate investment and california enviroscreen data\n", "#### Goal: Analyze the relationship between climate investment and environmental justice in California\n", "#### This notebook: second take \n", "##### Author: [dpadams](dpadams@fullerton.edu)\n", "##### Date: 2024-11-24" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'scipy'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mseaborn\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msns\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m stats\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Read the data (I see you already have this loaded as 'data')\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# 1. First, let's create our core analytical metrics\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcalculate_program_metrics\u001b[39m(df):\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'scipy'" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from scipy import stats\n", "\n", "# Read the data (I see you already have this loaded as 'data')\n", "\n", "# 1. First, let's create our core analytical metrics\n", "def calculate_program_metrics(df):\n", " metrics = df.groupby('Program Name').agg({\n", " 'Total Project Cost': ['count', 'sum', 'mean'],\n", " 'Total Project GHGReductions': ['sum', 'mean'],\n", " 'Total GGRFDisadvantaged Community Funding': ['sum', 'mean'],\n", " 'Is Benefit Disadvantaged Communities': 'mean',\n", " 'Is Low Income Communities': 'mean'\n", " }).round(2)\n", " \n", " # Add efficiency metrics\n", " metrics['GHG_per_dollar'] = (metrics[('Total Project GHGReductions', 'sum')] / \n", " metrics[('Total Project Cost', 'sum')]).round(4)\n", " \n", " metrics['DAC_funding_ratio'] = (metrics[('Total GGRFDisadvantaged Community Funding', 'sum')] / \n", " metrics[('Total Project Cost', 'sum')]).round(4)\n", " \n", " return metrics\n", "\n", "program_metrics = calculate_program_metrics(data)\n", "\n", "# Display top programs by different metrics\n", "print(\"\\nTop 5 Programs by Total Investment:\")\n", "print(program_metrics.sort_values(('Total Project Cost', 'sum'), ascending=False).head())\n", "\n", "print(\"\\nTop 5 Programs by GHG Reduction Efficiency:\")\n", "print(program_metrics.sort_values('GHG_per_dollar', ascending=False).head())\n", "\n", "print(\"\\nTop 5 Programs by DAC Funding Ratio:\")\n", "print(program_metrics.sort_values('DAC_funding_ratio', ascending=False).head())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.6" } }, "nbformat": 4, "nbformat_minor": 2 }