file organizations

2025-04-09 20:26:45 -07:00
parent 3dd1bd6dee
commit c7c3d75ec9
26 changed files with 141960 additions and 466099 deletions
--- a/archive/analysis/collab_patterns_outcomes.ipynb
+++ b/archive/analysis/collab_patterns_outcomes.ipynb
--- a/archive/analysis/collab_patterns_outcomes_2.ipynb
+++ b/archive/analysis/collab_patterns_outcomes_2.ipynb
--- a/archive/analysis/database_setup.ipynb
+++ b/archive/analysis/database_setup.ipynb
@@ -0,0 +1,261 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully connected to the database!\n",
+      "Dropping existing database objects...\n",
+      "Processing CalEnviroScreen data...\n",
+      "Loading CES data to database...\n",
+      "Processing CCI project data...\n",
+      "Loading CCI data to database...\n",
+      "Creating analysis views...\n",
+      "Data loading completed successfully!\n",
+      "\n",
+      "Record counts:\n",
+      "CES data: 8035 records\n",
+      "CCI projects: 120715 records\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import os\n",
+    "import geopandas as gpd\n",
+    "from sqlalchemy import create_engine, text\n",
+    "from datetime import datetime\n",
+    "\n",
+    "# Database configuration\n",
+    "DB_USER = os.getenv('DB_USER', 'postgres')\n",
+    "DB_PASSWORD = os.getenv('DB_PASSWORD', 'MandyLinkToby3')\n",
+    "DB_HOST = os.getenv('DB_HOST', '192.168.0.74')\n",
+    "DB_PORT = os.getenv('DB_PORT', '5432')\n",
+    "DB_NAME = 'calif_equity'\n",
+    "\n",
+    "# Set working directory\n",
+    "os.chdir('/home/dadams/Repos/california_equity_git')\n",
+    "\n",
+    "# Create database connection\n",
+    "def create_db_engine():\n",
+    "    connection_string = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'\n",
+    "    return create_engine(connection_string)\n",
+    "\n",
+    "def drop_existing_objects(engine):\n",
+    "    \"\"\"Drop existing database objects in the correct order\"\"\"\n",
+    "    with engine.connect() as connection:\n",
+    "        # Drop views first\n",
+    "        connection.execute(text(\"DROP VIEW IF EXISTS project_efficiency CASCADE\"))\n",
+    "        connection.execute(text(\"DROP VIEW IF EXISTS regional_collaboration CASCADE\"))\n",
+    "        # Then drop tables\n",
+    "        connection.execute(text(\"DROP TABLE IF EXISTS agency_partnerships CASCADE\"))\n",
+    "        connection.execute(text(\"DROP TABLE IF EXISTS cci_projects CASCADE\"))\n",
+    "        connection.execute(text(\"DROP TABLE IF EXISTS ces_data CASCADE\"))\n",
+    "        connection.commit()\n",
+    "\n",
+    "def process_ces_data(filepath):\n",
+    "    print(\"Processing CalEnviroScreen data...\")\n",
+    "    gdf = gpd.read_file(filepath)\n",
+    "    \n",
+    "    # Clean and standardize column names\n",
+    "    gdf.columns = [col.lower().replace(' ', '_') for col in gdf.columns]\n",
+    "    \n",
+    "    # Convert tract ID to string and ensure it's clean\n",
+    "    gdf['tract'] = gdf['tract'].astype(str).str.strip()\n",
+    "    \n",
+    "    # Select and rename relevant columns\n",
+    "    ces_data = gdf[['tract', 'zip', 'county', 'approxloc', 'totpop19',\n",
+    "                    'ciscore', 'ciscorep', 'ozone', 'ozonep', 'pm2_5',\n",
+    "                    'pm2_5_p', 'drinkwat', 'drinkwatp', 'poverty',\n",
+    "                    'povertyp', 'unempl', 'unemplp', 'housburd',\n",
+    "                    'housburdp', 'geometry']]\n",
+    "    \n",
+    "    # Rename columns to match database schema\n",
+    "    column_map = {\n",
+    "        'tract': 'tract_id',\n",
+    "        'zip': 'zip_code',\n",
+    "        'approxloc': 'approx_loc',\n",
+    "        'totpop19': 'total_pop_19',\n",
+    "        'ciscore': 'ci_score',\n",
+    "        'ciscorep': 'ci_score_pctl',\n",
+    "        'pm2_5': 'pm25',\n",
+    "        'pm2_5_p': 'pm25_pctl',\n",
+    "        'drinkwat': 'drinking_water',\n",
+    "        'drinkwatp': 'drinking_water_pctl',\n",
+    "        'housburd': 'housing_burden',\n",
+    "        'housburdp': 'housing_burden_pctl',\n",
+    "        'geometry': 'geom'\n",
+    "    }\n",
+    "    ces_data = ces_data.rename(columns=column_map)\n",
+    "    \n",
+    "    # Set the geometry column explicitly\n",
+    "    ces_data = ces_data.set_geometry('geom')\n",
+    "    \n",
+    "    return ces_data\n",
+    "\n",
+    "def process_cci_data(filepath):\n",
+    "    print(\"Processing CCI project data...\")\n",
+    "    df = pd.read_csv(filepath, low_memory=False)\n",
+    "    \n",
+    "    # Clean column names\n",
+    "    df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n",
+    "    \n",
+    "    # Convert date columns\n",
+    "    df['date_operational'] = pd.to_datetime(df['date_operational'])\n",
+    "    \n",
+    "    # Filter date range\n",
+    "    df = df[\n",
+    "        (df['date_operational'] >= '2015-01-01') &\n",
+    "        (df['date_operational'] <= '2024-12-31')\n",
+    "    ]\n",
+    "    \n",
+    "    # Process project partners\n",
+    "    df['project_partners'] = df['project_partners'].fillna('')\n",
+    "    \n",
+    "    # Select and prepare relevant columns\n",
+    "    cci_data = df[[\n",
+    "        'project_idnumber', 'reporting_cycle_name', 'agency_name',\n",
+    "        'program_name', 'program_description', 'project_name',\n",
+    "        'project_type', 'project_description', 'date_operational',\n",
+    "        'census_tract', 'county', 'total_program_ggrffunding',\n",
+    "        'total_project_ghgreductions', 'is_benefit_disadvantaged_communities',\n",
+    "        'project_partners'\n",
+    "    ]]\n",
+    "    \n",
+    "    # Rename columns to match schema\n",
+    "    column_map = {\n",
+    "        'project_idnumber': 'project_id',\n",
+    "        'reporting_cycle_name': 'reporting_cycle',\n",
+    "        'total_program_ggrffunding': 'total_funding',\n",
+    "        'total_project_ghgreductions': 'ghg_reduction',\n",
+    "        'is_benefit_disadvantaged_communities': 'dac_benefit'\n",
+    "    }\n",
+    "    cci_data = cci_data.rename(columns=column_map)\n",
+    "    \n",
+    "    # Convert boolean columns\n",
+    "    cci_data['dac_benefit'] = cci_data['dac_benefit'].astype(bool)\n",
+    "    \n",
+    "    return cci_data\n",
+    "\n",
+    "def create_views(engine):\n",
+    "    with engine.connect() as connection:\n",
+    "        # Project efficiency view\n",
+    "        connection.execute(text(\"\"\"\n",
+    "            CREATE VIEW project_efficiency AS\n",
+    "            SELECT \n",
+    "                p.project_id,\n",
+    "                p.program_name,\n",
+    "                p.agency_name,\n",
+    "                p.total_funding,\n",
+    "                p.ghg_reduction,\n",
+    "                p.dac_benefit,\n",
+    "                CASE \n",
+    "                    WHEN p.total_funding > 0 THEN p.ghg_reduction / p.total_funding \n",
+    "                    ELSE 0 \n",
+    "                END as ghg_efficiency,\n",
+    "                c.ci_score as ces_score,\n",
+    "                CASE \n",
+    "                    WHEN p.project_partners = '' THEN 0\n",
+    "                    ELSE (length(p.project_partners) - length(replace(p.project_partners, ',', '')) + 1)\n",
+    "                END as partner_count\n",
+    "            FROM cci_projects p\n",
+    "            LEFT JOIN ces_data c ON cast(p.census_tract as text) = cast(c.tract_id as text)\n",
+    "        \"\"\"))\n",
+    "        \n",
+    "        # Regional collaboration view\n",
+    "        connection.execute(text(\"\"\"\n",
+    "            CREATE VIEW regional_collaboration AS\n",
+    "            SELECT \n",
+    "                county,\n",
+    "                COUNT(DISTINCT project_id) as project_count,\n",
+    "                AVG(CASE \n",
+    "                    WHEN project_partners = '' THEN 0\n",
+    "                    ELSE (length(project_partners) - length(replace(project_partners, ',', '')) + 1)\n",
+    "                END) as avg_partners,\n",
+    "                SUM(total_funding) as total_funding,\n",
+    "                SUM(CASE WHEN dac_benefit THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as dac_rate,\n",
+    "                SUM(ghg_reduction) / NULLIF(SUM(total_funding), 0) as region_efficiency\n",
+    "            FROM cci_projects\n",
+    "            GROUP BY county\n",
+    "        \"\"\"))\n",
+    "        \n",
+    "        connection.commit()\n",
+    "\n",
+    "def load_data_to_db():\n",
+    "    try:\n",
+    "        engine = create_db_engine()\n",
+    "        \n",
+    "        # Drop existing objects first\n",
+    "        print(\"Dropping existing database objects...\")\n",
+    "        drop_existing_objects(engine)\n",
+    "        \n",
+    "        # Load CES data\n",
+    "        ces_data = process_ces_data('/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp')\n",
+    "        print(\"Loading CES data to database...\")\n",
+    "        ces_data.to_postgis('ces_data', engine, if_exists='replace', index=False)\n",
+    "        \n",
+    "        # Load CCI data\n",
+    "        cci_data = process_cci_data('/home/dadams/Repos/california_equity_git/data_raw/cci_programs_data.csv')\n",
+    "        print(\"Loading CCI data to database...\")\n",
+    "        cci_data.to_sql('cci_projects', engine, if_exists='replace', index=False)\n",
+    "        \n",
+    "        print(\"Creating analysis views...\")\n",
+    "        create_views(engine)\n",
+    "        \n",
+    "        print(\"Data loading completed successfully!\")\n",
+    "        \n",
+    "        return {\n",
+    "            'ces_records': len(ces_data),\n",
+    "            'cci_records': len(cci_data)\n",
+    "        }\n",
+    "        \n",
+    "    except Exception as e:\n",
+    "        print(f\"Error loading data: {e}\")\n",
+    "        raise\n",
+    "\n",
+    "# Test database connection\n",
+    "try:\n",
+    "    engine = create_db_engine()\n",
+    "    with engine.connect() as conn:\n",
+    "        print(\"Successfully connected to the database!\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Error connecting to the database: {str(e)}\")\n",
+    "\n",
+    "# Execute loading\n",
+    "record_counts = load_data_to_db()\n",
+    "print(\"\\nRecord counts:\")\n",
+    "print(f\"CES data: {record_counts['ces_records']} records\")\n",
+    "print(f\"CCI projects: {record_counts['cci_records']} records\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/archive/analysis_firstlook/collaboration.ipynb
+++ b/archive/analysis_firstlook/collaboration.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# calif equity\n",
+    "## Looking at collaboration components \n",
+    "Date: 2024-12-19"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Importing the necessary libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## set directory\n",
+    "import os\n",
+    "os.chdir('/home/dadams/Repos/california_equity_git')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Project IDNumber', 'Reporting Cycle Name', 'Agency Name',\n",
+       "       'Program Name', 'Program Description', 'Sub Program Name',\n",
+       "       'Record Type', 'Project Name', 'Project Type', 'Project Description',\n",
+       "       ...\n",
+       "       'Net Density  DUA', 'Applicants  Assisted', 'Invasive Cover 12 Months',\n",
+       "       'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
+       "       'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
+       "       'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
+       "      dtype='object', length=127)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import geopandas as gpd\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "def debug_shapefile(shapefile_path):\n",
+    "    \"\"\"\n",
+    "    Debug shapefile reading issues by checking file existence and required components.\n",
+    "    \n",
+    "    Args:\n",
+    "        shapefile_path (str): Path to the .shp file\n",
+    "    \n",
+    "    Returns:\n",
+    "        dict: Dictionary containing debug information\n",
+    "    \"\"\"\n",
+    "    base_path = Path(shapefile_path).parent\n",
+    "    file_name = Path(shapefile_path).stem\n",
+    "    \n",
+    "    # Required shapefile components\n",
+    "    required_extensions = ['.shp', '.shx', '.dbf']\n",
+    "    optional_extensions = ['.prj', '.cpg', '.sbn', '.sbx']\n",
+    "    \n",
+    "    debug_info = {\n",
+    "        'file_exists': os.path.exists(shapefile_path),\n",
+    "        'parent_dir_exists': os.path.exists(base_path),\n",
+    "        'components': {},\n",
+    "        'file_sizes': {},\n",
+    "        'readable': False\n",
+    "    }\n",
+    "    \n",
+    "    # Check for all component files\n",
+    "    for ext in required_extensions + optional_extensions:\n",
+    "        full_path = base_path / f\"{file_name}{ext}\"\n",
+    "        exists = full_path.exists()\n",
+    "        debug_info['components'][ext] = exists\n",
+    "        if exists:\n",
+    "            debug_info['file_sizes'][ext] = os.path.getsize(full_path)\n",
+    "    \n",
+    "    # Try reading with explicit driver\n",
+    "    try:\n",
+    "        gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n",
+    "        debug_info['readable'] = True\n",
+    "        debug_info['num_features'] = len(gdf)\n",
+    "    except Exception as e:\n",
+    "        debug_info['error'] = str(e)\n",
+    "    \n",
+    "    return debug_info\n",
+    "\n",
+    "# Usage example\n",
+    "shapefile_path = \"/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\"\n",
+    "debug_results = debug_shapefile(shapefile_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Checking .shp file: True\n",
+      "File permissions: 644\n",
+      "Checking .shx file: True\n",
+      "File permissions: 644\n",
+      "Checking .dbf file: True\n",
+      "File permissions: 644\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Define base path and file name\n",
+    "base_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape'\n",
+    "file_name = 'CES4 Final Shapefile'\n",
+    "\n",
+    "# Check for existence of all required files\n",
+    "required_files = ['.shp', '.shx', '.dbf']\n",
+    "for ext in required_files:\n",
+    "    full_path = os.path.join(base_path, file_name + ext)\n",
+    "    print(f\"Checking {ext} file: {os.path.exists(full_path)}\")\n",
+    "    if os.path.exists(full_path):\n",
+    "        print(f\"File permissions: {oct(os.stat(full_path).st_mode)[-3:]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Approach 1 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.\n",
+      "Approach 2 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.\n",
+      "Directory contents: ['CES4 Final Shapefile.sbx', 'CES4 Final Shapefile.shp', 'CES4 Final Shapefile.sbn', 'CES4 Final Shapefile.shp.xml', 'CES4 Final Shapefile.shx', 'CES4 Final Shapefile.prj', 'CES4 Final Shapefile.cpg', 'CES4 Final Shapefile.dbf']\n"
+     ]
+    }
+   ],
+   "source": [
+    "import geopandas as gpd\n",
+    "\n",
+    "# Approach 1: Using absolute path with normalized separators\n",
+    "shapefile_path = Path(base_path) / f\"{file_name}.shp\"\n",
+    "try:\n",
+    "    gdf = gpd.read_file(shapefile_path)\n",
+    "except Exception as e:\n",
+    "    print(f\"Approach 1 failed: {e}\")\n",
+    "\n",
+    "# Approach 2: Using explicit ESRI Shapefile driver\n",
+    "try:\n",
+    "    gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n",
+    "except Exception as e:\n",
+    "    print(f\"Approach 2 failed: {e}\")\n",
+    "\n",
+    "# Approach 3: Check if the directory is readable\n",
+    "try:\n",
+    "    print(f\"Directory contents: {os.listdir(base_path)}\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Cannot list directory: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GeoPandas version: 1.0.1\n",
+      "Fiona version: 1.10.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import geopandas as gpd\n",
+    "import fiona\n",
+    "print(f\"GeoPandas version: {gpd.__version__}\")\n",
+    "print(f\"Fiona version: {fiona.__version__}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "DataSourceError",
+     "evalue": "'/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mDataSourceError\u001b[0m                           Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[30], line 5\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Load the shapefile\u001b[39;00m\n\u001b[1;32m      4\u001b[0m shapefile_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 5\u001b[0m gdf \u001b[38;5;241m=\u001b[39m \u001b[43mgpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshapefile_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# Print the head of the GeoDataFrame\u001b[39;00m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28mprint\u001b[39m(gdf\u001b[38;5;241m.\u001b[39mhead())\n",
+      "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:294\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, columns, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m    291\u001b[0m             from_bytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyogrio\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 294\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read_file_pyogrio\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    295\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrows\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrows\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    296\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    298\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiona\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    299\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mapi\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mis_file_like(filename):\n",
+      "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:547\u001b[0m, in \u001b[0;36m_read_file_pyogrio\u001b[0;34m(path_or_bytes, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m    538\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m    539\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keywords are deprecated, and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    540\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill be removed in a future release. You can use the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keyword \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    543\u001b[0m         stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,\n\u001b[1;32m    544\u001b[0m     )\n\u001b[1;32m    545\u001b[0m     kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpyogrio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_bytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/geopandas.py:265\u001b[0m, in \u001b[0;36mread_dataframe\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m    260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_arrow:\n\u001b[1;32m    261\u001b[0m     \u001b[38;5;66;03m# For arrow, datetimes are read as is.\u001b[39;00m\n\u001b[1;32m    262\u001b[0m     \u001b[38;5;66;03m# For numpy IO, datetimes are read as string values to preserve timezone info\u001b[39;00m\n\u001b[1;32m    263\u001b[0m     \u001b[38;5;66;03m# as numpy does not directly support timezones.\u001b[39;00m\n\u001b[1;32m    264\u001b[0m     kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatetime_as_string\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 265\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mread_func\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    267\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    268\u001b[0m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    269\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    270\u001b[0m \u001b[43m    \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    271\u001b[0m \u001b[43m    \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgdal_force_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    272\u001b[0m \u001b[43m    \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    273\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    274\u001b[0m \u001b[43m    \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    275\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    276\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    277\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    278\u001b[0m \u001b[43m    \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    279\u001b[0m \u001b[43m    \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    280\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfid_as_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    281\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    282\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    284\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_arrow:\n\u001b[1;32m    285\u001b[0m     meta, table \u001b[38;5;241m=\u001b[39m result\n",
+      "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/raw.py:198\u001b[0m, in \u001b[0;36mread\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)\u001b[0m\n\u001b[1;32m     59\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Read OGR data source into numpy arrays.\u001b[39;00m\n\u001b[1;32m     60\u001b[0m \n\u001b[1;32m     61\u001b[0m \u001b[38;5;124;03mIMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    194\u001b[0m \n\u001b[1;32m    195\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    196\u001b[0m dataset_kwargs \u001b[38;5;241m=\u001b[39m _preprocess_options_key_value(kwargs) \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m--> 198\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mogr_read\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    199\u001b[0m \u001b[43m    \u001b[49m\u001b[43mget_vsi_path_or_buffer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    200\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    201\u001b[0m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[43m    \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    204\u001b[0m \u001b[43m    \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    205\u001b[0m \u001b[43m    \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    206\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    207\u001b[0m \u001b[43m    \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    208\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    209\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_mask_to_wkb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    210\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    211\u001b[0m \u001b[43m    \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    212\u001b[0m \u001b[43m    \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    213\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_fids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    214\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdataset_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    215\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdatetime_as_string\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdatetime_as_string\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:1240\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_read\u001b[0;34m()\u001b[0m\n",
+      "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:216\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_open\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mDataSourceError\u001b[0m: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
+     ]
+    }
+   ],
+   "source": [
+    "import geopandas as gpd\n",
+    "\n",
+    "# Load the shapefile\n",
+    "shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'\n",
+    "gdf = gpd.read_file(shapefile_path)\n",
+    "\n",
+    "# Print the head of the GeoDataFrame\n",
+    "print(gdf.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total GGRF Funding: $8.13B\n",
+      "Number of projects: 131428\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Basic cleaning\n",
+    "data['Date Operational'] = pd.to_datetime(data['Date Operational'])\n",
+    "data = data[\n",
+    "    (data['Date Operational'] >= '2010-01-01') & \n",
+    "    (data['Date Operational'] <= '2024-11-01')\n",
+    "].copy()\n",
+    "\n",
+    "# Remove rows with no GGRF funding\n",
+    "data = data.dropna(subset=['Total Program GGRFFunding'])\n",
+    "\n",
+    "# Add derived columns\n",
+    "data['Year'] = data['Date Operational'].dt.year\n",
+    "data['is_multi_county'] = data['County'].str.contains(',', na=False)\n",
+    "data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1\n",
+    "\n",
+    "# Quick validation\n",
+    "print(f\"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B\")\n",
+    "print(f\"Number of projects: {len(data)}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/archive/analysis_firstlook/spatial_analysis_1.ipynb
+++ b/archive/analysis_firstlook/spatial_analysis_1.ipynb
--- a/archive/analysis_new/collaboration_ghg_cost_fixed.png
+++ b/archive/analysis_new/collaboration_ghg_cost_fixed.png
--- a/archive/analysis_new/dac_benefit_per_program_fixed.png
+++ b/archive/analysis_new/dac_benefit_per_program_fixed.png
--- a/archive/analysis_new/ghg_cost_per_ton_fixed.png
+++ b/archive/analysis_new/ghg_cost_per_ton_fixed.png
--- a/archive/analysis_new/ghg_cost_trends_over_time.png
+++ b/archive/analysis_new/ghg_cost_trends_over_time.png
--- a/archive/analysis_new/new1.ipynb
+++ b/archive/analysis_new/new1.ipynb
--- a/archive/initial_view/overview_hypotesting_20241031.html
+++ b/archive/initial_view/overview_hypotesting_20241031.html
--- a/archive/initial_view/overview_hypotesting_20241031.ipynb
+++ b/archive/initial_view/overview_hypotesting_20241031.ipynb
--- a/archive/initial_view/overview_hypotesting_20241031.pdf
+++ b/archive/initial_view/overview_hypotesting_20241031.pdf
--- a/archive/initial_view/secondtake.html
+++ b/archive/initial_view/secondtake.html
--- a/archive/initial_view/secondtake.ipynb
+++ b/archive/initial_view/secondtake.ipynb
--- a/archive/initial_view/secondtake.pdf
+++ b/archive/initial_view/secondtake.pdf
--- a/archive/initial_view/some_descriptives.ipynb
+++ b/archive/initial_view/some_descriptives.ipynb
--- a/archive/merged_california_climate_investment.csv
+++ b/archive/merged_california_climate_investment.csv
--- a/archive/two_way_table.csv
+++ b/archive/two_way_table.csv