california-equity-git/initial_view/merge_cci_califequity_postgis.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Title: Evaluating Equity and Impact\n",
    "### Author: David P. Adams\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: pandas in ./.venv/lib/python3.12/site-packages (2.2.3)\n",
      "Requirement already satisfied: numpy in ./.venv/lib/python3.12/site-packages (2.1.1)\n",
      "Requirement already satisfied: matplotlib in ./.venv/lib/python3.12/site-packages (3.9.2)\n",
      "Requirement already satisfied: seaborn in ./.venv/lib/python3.12/site-packages (0.13.2)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
      "Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas) (2024.2)\n",
      "Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas) (2024.2)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (1.3.0)\n",
      "Requirement already satisfied: cycler>=0.10 in ./.venv/lib/python3.12/site-packages (from matplotlib) (0.12.1)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in ./.venv/lib/python3.12/site-packages (from matplotlib) (4.54.1)\n",
      "Requirement already satisfied: kiwisolver>=1.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (1.4.7)\n",
      "Requirement already satisfied: packaging>=20.0 in ./.venv/lib/python3.12/site-packages (from matplotlib) (24.1)\n",
      "Requirement already satisfied: pillow>=8 in ./.venv/lib/python3.12/site-packages (from matplotlib) (10.4.0)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (3.1.4)\n",
      "Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install pandas numpy matplotlib seaborn\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importing the necessary libraries\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import os\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "## set directory\n",
    "import os\n",
    "os.chdir('/home/dadams/CSU Fullerton Dropbox/David Adams/Research Projects/California Equity/california_equity_git')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read in the data\n",
    "data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "columns_data = pd.DataFrame(data.columns)\n",
    "\n",
    "# export the 'DataFrame' to a csv file\n",
    "columns_data.to_csv('data_raw/columns.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check the data types\n",
    "data.dtypes\n",
    "\n",
    "# save the data types to a csv file\n",
    "data.dtypes.to_csv('data_raw/data_types.csv', header = False)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Census Tract</th>\n",
       "      <th>Total Project Cost</th>\n",
       "      <th>Total Program GGRFFunding</th>\n",
       "      <th>Total Project GHGReductions</th>\n",
       "      <th>Annual Project GHGReductions</th>\n",
       "      <th>Project Count</th>\n",
       "      <th>Total GGRFDisadvantaged Community Funding</th>\n",
       "      <th>Funding Benefiting Disadvantaged Communities</th>\n",
       "      <th>Estimated Num Vehicles In Service</th>\n",
       "      <th>Funding Within Disadvantage Communities</th>\n",
       "      <th>...</th>\n",
       "      <th>Indirect Jobs Fte</th>\n",
       "      <th>Induced Jobs Fte</th>\n",
       "      <th>Compost  Produced Tons</th>\n",
       "      <th>Compost  Produced Tons  Yr</th>\n",
       "      <th>Net Density  DUA</th>\n",
       "      <th>Applicants  Assisted</th>\n",
       "      <th>Invasive Cover 12 Months</th>\n",
       "      <th>Invasive Cover 36 Months</th>\n",
       "      <th>Project Acreage</th>\n",
       "      <th>Intermediary Admin Expenses Calc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.193700e+05</td>\n",
       "      <td>1.414290e+05</td>\n",
       "      <td>1.414290e+05</td>\n",
       "      <td>1.414290e+05</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>5.518700e+04</td>\n",
       "      <td>55187.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>5.518700e+04</td>\n",
       "      <td>...</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.0</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>141429.000000</td>\n",
       "      <td>1.414290e+05</td>\n",
       "      <td>1.414290e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>6.053889e+09</td>\n",
       "      <td>9.206412e+05</td>\n",
       "      <td>7.791664e+04</td>\n",
       "      <td>7.717972e+02</td>\n",
       "      <td>0.205389</td>\n",
       "      <td>4.090872</td>\n",
       "      <td>2.736820e+04</td>\n",
       "      <td>110.217551</td>\n",
       "      <td>0.047331</td>\n",
       "      <td>2.030028e+04</td>\n",
       "      <td>...</td>\n",
       "      <td>0.064567</td>\n",
       "      <td>0.117056</td>\n",
       "      <td>0.440977</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.082260</td>\n",
       "      <td>0.019642</td>\n",
       "      <td>0.010656</td>\n",
       "      <td>0.010578</td>\n",
       "      <td>9.771087e+00</td>\n",
       "      <td>1.911114e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.641870e+07</td>\n",
       "      <td>3.736191e+07</td>\n",
       "      <td>1.011645e+06</td>\n",
       "      <td>2.371604e+04</td>\n",
       "      <td>3.361723</td>\n",
       "      <td>18.381861</td>\n",
       "      <td>6.327936e+05</td>\n",
       "      <td>1738.772195</td>\n",
       "      <td>1.972262</td>\n",
       "      <td>5.590536e+05</td>\n",
       "      <td>...</td>\n",
       "      <td>1.444316</td>\n",
       "      <td>2.261146</td>\n",
       "      <td>45.712955</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.382593</td>\n",
       "      <td>1.406914</td>\n",
       "      <td>0.758250</td>\n",
       "      <td>0.757677</td>\n",
       "      <td>3.669526e+03</td>\n",
       "      <td>1.196317e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>6.001400e+09</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-2.785930e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>6.037271e+09</td>\n",
       "      <td>4.000000e+03</td>\n",
       "      <td>3.500000e+03</td>\n",
       "      <td>8.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>6.059022e+09</td>\n",
       "      <td>8.000000e+03</td>\n",
       "      <td>7.500000e+03</td>\n",
       "      <td>1.500000e+01</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.073016e+09</td>\n",
       "      <td>2.150000e+04</td>\n",
       "      <td>1.900000e+04</td>\n",
       "      <td>4.000000e+01</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.500000e+03</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.500000e+03</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>6.115041e+09</td>\n",
       "      <td>5.767700e+09</td>\n",
       "      <td>1.412670e+08</td>\n",
       "      <td>4.748924e+06</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>2072.000000</td>\n",
       "      <td>6.443700e+07</td>\n",
       "      <td>102348.000000</td>\n",
       "      <td>503.000000</td>\n",
       "      <td>6.443700e+07</td>\n",
       "      <td>...</td>\n",
       "      <td>110.170000</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>10365.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>706.000000</td>\n",
       "      <td>320.000000</td>\n",
       "      <td>85.000000</td>\n",
       "      <td>85.000000</td>\n",
       "      <td>1.380000e+06</td>\n",
       "      <td>2.000000e+07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 82 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Census Tract  Total Project Cost  Total Program GGRFFunding  \\\n",
       "count  1.193700e+05        1.414290e+05               1.414290e+05   \n",
       "mean   6.053889e+09        9.206412e+05               7.791664e+04   \n",
       "std    2.641870e+07        3.736191e+07               1.011645e+06   \n",
       "min    6.001400e+09        0.000000e+00               0.000000e+00   \n",
       "25%    6.037271e+09        4.000000e+03               3.500000e+03   \n",
       "50%    6.059022e+09        8.000000e+03               7.500000e+03   \n",
       "75%    6.073016e+09        2.150000e+04               1.900000e+04   \n",
       "max    6.115041e+09        5.767700e+09               1.412670e+08   \n",
       "\n",
       "       Total Project GHGReductions  Annual Project GHGReductions  \\\n",
       "count                 1.414290e+05                 141429.000000   \n",
       "mean                  7.717972e+02                      0.205389   \n",
       "std                   2.371604e+04                      3.361723   \n",
       "min                  -2.785930e+05                      0.000000   \n",
       "25%                   8.000000e+00                      0.000000   \n",
       "50%                   1.500000e+01                      0.000000   \n",
       "75%                   4.000000e+01                      0.000000   \n",
       "max                   4.748924e+06                    336.000000   \n",
       "\n",
       "       Project Count  Total GGRFDisadvantaged Community Funding  \\\n",
       "count  141429.000000                               5.518700e+04   \n",
       "mean        4.090872                               2.736820e+04   \n",
       "std        18.381861                               6.327936e+05   \n",
       "min         0.000000                               0.000000e+00   \n",
       "25%         1.000000                               0.000000e+00   \n",
       "50%         1.000000                               0.000000e+00   \n",
       "75%         3.000000                               5.500000e+03   \n",
       "max      2072.000000                               6.443700e+07   \n",
       "\n",
       "       Funding Benefiting Disadvantaged Communities  \\\n",
       "count                                  55187.000000   \n",
       "mean                                     110.217551   \n",
       "std                                     1738.772195   \n",
       "min                                        0.000000   \n",
       "25%                                        0.000000   \n",
       "50%                                        0.000000   \n",
       "75%                                        0.000000   \n",
       "max                                   102348.000000   \n",
       "\n",
       "       Estimated Num Vehicles In Service  \\\n",
       "count                      141429.000000   \n",
       "mean                            0.047331   \n",
       "std                             1.972262   \n",
       "min                             0.000000   \n",
       "25%                             0.000000   \n",
       "50%                             0.000000   \n",
       "75%                             0.000000   \n",
       "max                           503.000000   \n",
       "\n",
       "       Funding Within Disadvantage Communities  ...  Indirect Jobs Fte  \\\n",
       "count                             5.518700e+04  ...      141429.000000   \n",
       "mean                              2.030028e+04  ...           0.064567   \n",
       "std                               5.590536e+05  ...           1.444316   \n",
       "min                               0.000000e+00  ...           0.000000   \n",
       "25%                               0.000000e+00  ...           0.000000   \n",
       "50%                               0.000000e+00  ...           0.000000   \n",
       "75%                               1.500000e+03  ...           0.000000   \n",
       "max                               6.443700e+07  ...         110.170000   \n",
       "\n",
       "       Induced Jobs Fte  Compost  Produced Tons  Compost  Produced Tons  Yr  \\\n",
       "count     141429.000000           141429.000000                    141429.0   \n",
       "mean           0.117056                0.440977                         0.0   \n",
       "std            2.261146               45.712955                         0.0   \n",
       "min            0.000000                0.000000                         0.0   \n",
       "25%            0.000000                0.000000                         0.0   \n",
       "50%            0.000000                0.000000                         0.0   \n",
       "75%            0.000000                0.000000                         0.0   \n",
       "max          151.000000            10365.000000                         0.0   \n",
       "\n",
       "       Net Density  DUA  Applicants  Assisted  Invasive Cover 12 Months  \\\n",
       "count     141429.000000         141429.000000             141429.000000   \n",
       "mean           0.082260              0.019642                  0.010656   \n",
       "std            4.382593              1.406914                  0.758250   \n",
       "min            0.000000              0.000000                  0.000000   \n",
       "25%            0.000000              0.000000                  0.000000   \n",
       "50%            0.000000              0.000000                  0.000000   \n",
       "75%            0.000000              0.000000                  0.000000   \n",
       "max          706.000000            320.000000                 85.000000   \n",
       "\n",
       "       Invasive Cover 36 Months  Project Acreage  \\\n",
       "count             141429.000000     1.414290e+05   \n",
       "mean                   0.010578     9.771087e+00   \n",
       "std                    0.757677     3.669526e+03   \n",
       "min                    0.000000     0.000000e+00   \n",
       "25%                    0.000000     0.000000e+00   \n",
       "50%                    0.000000     0.000000e+00   \n",
       "75%                    0.000000     0.000000e+00   \n",
       "max                   85.000000     1.380000e+06   \n",
       "\n",
       "       Intermediary Admin Expenses Calc  \n",
       "count                      1.414290e+05  \n",
       "mean                       1.911114e+03  \n",
       "std                        1.196317e+05  \n",
       "min                        0.000000e+00  \n",
       "25%                        0.000000e+00  \n",
       "50%                        0.000000e+00  \n",
       "75%                        0.000000e+00  \n",
       "max                        2.000000e+07  \n",
       "\n",
       "[8 rows x 82 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# describe the data\n",
    "data.describe()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Tract    ZIP         County    ApproxLoc  TotPop19    CIscore  \\\n",
      "0  6.083002e+09  93454  Santa Barbara  Santa Maria      4495  36.019653   \n",
      "1  6.083002e+09  93455  Santa Barbara  Santa Maria     13173  37.030667   \n",
      "2  6.083002e+09  93454  Santa Barbara  Santa Maria      2398  31.213140   \n",
      "3  6.083002e+09  93455  Santa Barbara       Orcutt      4496   6.639331   \n",
      "4  6.083002e+09  93455  Santa Barbara       Orcutt      4008  14.022852   \n",
      "\n",
      "    CIscoreP     Ozone     OzoneP     PM2_5  ...  Elderly65  Hispanic  \\\n",
      "0  69.162885  0.034190  10.566273  7.567724  ...    12.5028   68.9210   \n",
      "1  70.637922  0.035217  11.561917  7.624775  ...     5.3519   78.6229   \n",
      "2  61.069087  0.034190  10.566273  7.548835  ...    12.8857   65.7214   \n",
      "3   5.988401  0.036244  13.615432  7.660570  ...    14.4128   22.9537   \n",
      "4  23.121533  0.036244  13.615432  7.663210  ...    18.8872   33.4082   \n",
      "\n",
      "     White  AfricanAm  NativeAm  OtherMult    Shape_Leng    Shape_Area  \\\n",
      "0  20.8899     0.4004    0.2670     1.3126   6999.357689  2.847611e+06   \n",
      "1  13.2240     2.5051    0.0000     0.9489  19100.578232  1.635292e+07   \n",
      "2  30.6088     0.9591    0.0000     2.1685   4970.985897  1.352329e+06   \n",
      "3  69.1948     0.9342    0.7117     2.5356   6558.956012  2.417717e+06   \n",
      "4  59.7804     0.6986    1.4721     1.3723   6570.368730  2.608422e+06   \n",
      "\n",
      "     AAPI                                           geometry  \n",
      "0  8.2091  POLYGON ((-39795.07 -341919.191, -38126.384 -3...  \n",
      "1  4.6990  POLYGON ((-39795.07 -341919.191, -39803.632 -3...  \n",
      "2  0.5421  POLYGON ((-38115.747 -341130.248, -38126.384 -...  \n",
      "3  3.6699  POLYGON ((-37341.662 -348530.437, -37252.307 -...  \n",
      "4  3.2685  POLYGON ((-39465.107 -348499.262, -38244.305 -...  \n",
      "\n",
      "[5 rows x 67 columns]\n"
     ]
    }
   ],
   "source": [
    "import geopandas as gpd\n",
    "\n",
    "# Read the shapefile\n",
    "calenviroscreen = gpd.read_file(\"california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\")\n",
    "\n",
    "# Check the first few rows\n",
    "print(calenviroscreen.head())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert Census Tract columns to string and ensure they have the same format\n",
    "data['Census Tract'] = data['Census Tract'].astype(str).str.zfill(11)\n",
    "calenviroscreen['Tract'] = calenviroscreen['Tract'].astype(str).str.zfill(11)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Merge the datasets\n",
    "merged_data = calenviroscreen.merge(data, left_on='Tract', right_on='Census Tract', how='inner')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Or as a CSV if you're not saving spatial data\n",
    "merged_data.drop(columns='geometry').to_csv('merged_california_climate_investment.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "import geopandas as gpd\n",
    "# Ensure merged_data is a GeoDataFrame\n",
    "if not isinstance(merged_data, gpd.GeoDataFrame):\n",
    "    merged_data = gpd.GeoDataFrame(merged_data)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EPSG:3310\n"
     ]
    }
   ],
   "source": [
    "# Check CRS\n",
    "print(merged_data.crs)\n",
    "\n",
    "# Set CRS if necessary\n",
    "if merged_data.crs is None:\n",
    "    merged_data.set_crs(epsg=4326, inplace=True)  # Or use the appropriate EPSG code\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "geometry\n"
     ]
    }
   ],
   "source": [
    "print(merged_data.geometry.name)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: geoalchemy2 in ./.venv/lib/python3.12/site-packages (0.15.2)\n",
      "Requirement already satisfied: sqlalchemy in ./.venv/lib/python3.12/site-packages (2.0.35)\n",
      "Requirement already satisfied: packaging in ./.venv/lib/python3.12/site-packages (from geoalchemy2) (24.1)\n",
      "Requirement already satisfied: typing-extensions>=4.6.0 in ./.venv/lib/python3.12/site-packages (from sqlalchemy) (4.12.2)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in ./.venv/lib/python3.12/site-packages (from sqlalchemy) (3.1.1)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install geoalchemy2 sqlalchemy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Tract    ZIP       County_x    ApproxLoc  TotPop19    CIscore  \\\n",
      "0  6083002103.0  93454  Santa Barbara  Santa Maria      4495  36.019653   \n",
      "1  6083002103.0  93454  Santa Barbara  Santa Maria      4495  36.019653   \n",
      "2  6083002103.0  93454  Santa Barbara  Santa Maria      4495  36.019653   \n",
      "3  6083002103.0  93454  Santa Barbara  Santa Maria      4495  36.019653   \n",
      "4  6083002103.0  93454  Santa Barbara  Santa Maria      4495  36.019653   \n",
      "\n",
      "    CIscoreP    Ozone     OzoneP     PM2_5  ...  Net Density  DUA  \\\n",
      "0  69.162885  0.03419  10.566273  7.567724  ...               0.0   \n",
      "1  69.162885  0.03419  10.566273  7.567724  ...               0.0   \n",
      "2  69.162885  0.03419  10.566273  7.567724  ...               0.0   \n",
      "3  69.162885  0.03419  10.566273  7.567724  ...               0.0   \n",
      "4  69.162885  0.03419  10.566273  7.567724  ...               0.0   \n",
      "\n",
      "   Applicants  Assisted  Invasive Cover 12 Months  Invasive Cover 36 Months  \\\n",
      "0                     0                         0                         0   \n",
      "1                     0                         0                         0   \n",
      "2                     0                         0                         0   \n",
      "3                     0                         0                         0   \n",
      "4                     0                         0                         0   \n",
      "\n",
      "   Project Acreage  IS IAE  Intermediary Admin Expenses Calc  \\\n",
      "0                0   False                                 0   \n",
      "1                0   False                                 0   \n",
      "2                0   False                                 0   \n",
      "3                0   False                                 0   \n",
      "4                0   False                                 0   \n",
      "\n",
      "   PRIMARY_FUNDING_RECIPIENT_TYPE  TRIBAL AFFILIATION  PROJECT PARTNERS  \n",
      "0                            None                None              None  \n",
      "1                            None                None              None  \n",
      "2                            None                None              None  \n",
      "3                            None                None              None  \n",
      "4                            None                None              None  \n",
      "\n",
      "[5 rows x 194 columns]\n",
      "Spatial index created successfully.\n",
      "Data uploaded and spatial index created successfully.\n"
     ]
    }
   ],
   "source": [
    "import geopandas as gpd\n",
    "from sqlalchemy import create_engine, text\n",
    "\n",
    "# Step 1: Load your merged GeoDataFrame (assuming it already exists as 'merged_data')\n",
    "# If it's not a GeoDataFrame, convert it to one\n",
    "if not isinstance(merged_data, gpd.GeoDataFrame):\n",
    "    if 'geometry' not in merged_data.columns:\n",
    "        raise ValueError(\"The DataFrame must have a 'geometry' column to be converted into a GeoDataFrame.\")\n",
    "    merged_data = gpd.GeoDataFrame(merged_data, geometry='geometry')\n",
    "\n",
    "# Step 2: Set CRS to EPSG:3110 (California Albers) if not already set\n",
    "if merged_data.crs is None or merged_data.crs.to_string() != 'EPSG:3110':\n",
    "    merged_data = merged_data.set_crs(epsg=4326)  # Set initial CRS if not defined\n",
    "    merged_data = merged_data.to_crs(epsg=3110)\n",
    "\n",
    "# Step 3: Confirm geometry column is named 'geometry'\n",
    "geometry_col = 'geometry'\n",
    "if geometry_col not in merged_data.columns:\n",
    "    raise ValueError(\"GeoDataFrame does not contain a geometry column named 'geometry'.\")\n",
    "\n",
    "# Step 4: Create a connection to your PostGIS database\n",
    "engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/calif_equity')\n",
    "\n",
    "# Step 5: Push the data to PostGIS, replacing the existing table if it exists\n",
    "# The `to_postgis()` function automatically recognizes the geometry column\n",
    "try:\n",
    "    merged_data.to_postgis('california_climate_investment', engine, if_exists='replace')\n",
    "except Exception as e:\n",
    "    print(f\"Error uploading data to PostGIS: {e}\")\n",
    "\n",
    "# Step 6: Verify data upload by querying the table (select the first 5 rows)\n",
    "try:\n",
    "    gdf = gpd.read_postgis(f\"SELECT * FROM california_climate_investment LIMIT 5\", engine, geom_col=geometry_col)\n",
    "    print(gdf.head())\n",
    "except Exception as e:\n",
    "    print(f\"Error reading data from PostGIS: {e}\")\n",
    "\n",
    "# Step 7: Create a spatial index on the geometry column (using SQLAlchemy's text() function)\n",
    "try:\n",
    "    with engine.connect() as conn:\n",
    "        conn.execute(text(f\"\"\"\n",
    "            CREATE INDEX IF NOT EXISTS california_climate_investment_geom_idx\n",
    "            ON california_climate_investment\n",
    "            USING GIST ({geometry_col});\n",
    "        \"\"\"))\n",
    "    print(\"Spatial index created successfully.\")\n",
    "except Exception as e:\n",
    "    print(f\"Error creating spatial index: {e}\")\n",
    "\n",
    "print(\"Data uploaded and spatial index created successfully.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EPSG:3110\n",
      "EPSG:3110\n"
     ]
    }
   ],
   "source": [
    "print(merged_data.crs)\n",
    "print(gdf.crs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Row count of merged_data: 119356\n",
      "Row count of gdf: 5\n",
      "Columns of merged_data: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
      "       'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
      "       ...\n",
      "       'Net Density  DUA', 'Applicants  Assisted', 'Invasive Cover 12 Months',\n",
      "       'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
      "       'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
      "       'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
      "      dtype='object', length=194)\n",
      "Columns of gdf: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
      "       'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
      "       ...\n",
      "       'Net Density  DUA', 'Applicants  Assisted', 'Invasive Cover 12 Months',\n",
      "       'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
      "       'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
      "       'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
      "      dtype='object', length=194)\n"
     ]
    }
   ],
   "source": [
    "print(\"Row count of merged_data:\", len(merged_data))\n",
    "print(\"Row count of gdf:\", len(gdf))\n",
    "print(\"Columns of merged_data:\", merged_data.columns)\n",
    "print(\"Columns of gdf:\", gdf.columns)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'merged_data' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgeopandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mmerged_data\u001b[49m\u001b[38;5;241m.\u001b[39mplot(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m10\u001b[39m), edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblack\u001b[39m\u001b[38;5;124m'\u001b[39m, linewidth\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m)\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# Add title and labels\u001b[39;00m\n\u001b[1;32m      8\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCensus Tracts\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'merged_data' is not defined"
     ]
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import geopandas as gpd\n",
    "\n",
    "\n",
    "merged_data.plot(figsize=(10, 10), edgecolor='black', linewidth=0.5)\n",
    "\n",
    "# Add title and labels\n",
    "plt.title('Census Tracts')\n",
    "plt.xlabel('Longitude')\n",
    "plt.ylabel('Latitude')\n",
    "\n",
    "# Show the plot\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save the dbf file as a csv\n",
    "merged_data.drop(columns='geometry').to_csv('data_merged/merged_california_climate_investment.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(119356, 194)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data shape and columns\n",
    "merged_data.shape\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "spatial_modeling",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}