870 lines
35 KiB
Plaintext
870 lines
35 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"## Title: Evaluating Equity and Impact\n",
|
||
"### Author: David P. Adams\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: pandas in ./.venv/lib/python3.12/site-packages (2.2.3)\n",
|
||
"Requirement already satisfied: numpy in ./.venv/lib/python3.12/site-packages (2.1.1)\n",
|
||
"Requirement already satisfied: matplotlib in ./.venv/lib/python3.12/site-packages (3.9.2)\n",
|
||
"Requirement already satisfied: seaborn in ./.venv/lib/python3.12/site-packages (0.13.2)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
|
||
"Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas) (2024.2)\n",
|
||
"Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas) (2024.2)\n",
|
||
"Requirement already satisfied: contourpy>=1.0.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (1.3.0)\n",
|
||
"Requirement already satisfied: cycler>=0.10 in ./.venv/lib/python3.12/site-packages (from matplotlib) (0.12.1)\n",
|
||
"Requirement already satisfied: fonttools>=4.22.0 in ./.venv/lib/python3.12/site-packages (from matplotlib) (4.54.1)\n",
|
||
"Requirement already satisfied: kiwisolver>=1.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (1.4.7)\n",
|
||
"Requirement already satisfied: packaging>=20.0 in ./.venv/lib/python3.12/site-packages (from matplotlib) (24.1)\n",
|
||
"Requirement already satisfied: pillow>=8 in ./.venv/lib/python3.12/site-packages (from matplotlib) (10.4.0)\n",
|
||
"Requirement already satisfied: pyparsing>=2.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (3.1.4)\n",
|
||
"Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%pip install pandas numpy matplotlib seaborn\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Importing the necessary libraries\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import seaborn as sns\n",
|
||
"import os\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"## set directory\n",
|
||
"import os\n",
|
||
"os.chdir('/home/dadams/CSU Fullerton Dropbox/David Adams/Research Projects/California Equity/california_equity_git')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# read in the data\n",
|
||
"data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"columns_data = pd.DataFrame(data.columns)\n",
|
||
"\n",
|
||
"# export the 'DataFrame' to a csv file\n",
|
||
"columns_data.to_csv('data_raw/columns.csv', index=False)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# check the data types\n",
|
||
"data.dtypes\n",
|
||
"\n",
|
||
"# save the data types to a csv file\n",
|
||
"data.dtypes.to_csv('data_raw/data_types.csv', header = False)\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Census Tract</th>\n",
|
||
" <th>Total Project Cost</th>\n",
|
||
" <th>Total Program GGRFFunding</th>\n",
|
||
" <th>Total Project GHGReductions</th>\n",
|
||
" <th>Annual Project GHGReductions</th>\n",
|
||
" <th>Project Count</th>\n",
|
||
" <th>Total GGRFDisadvantaged Community Funding</th>\n",
|
||
" <th>Funding Benefiting Disadvantaged Communities</th>\n",
|
||
" <th>Estimated Num Vehicles In Service</th>\n",
|
||
" <th>Funding Within Disadvantage Communities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Indirect Jobs Fte</th>\n",
|
||
" <th>Induced Jobs Fte</th>\n",
|
||
" <th>Compost Produced Tons</th>\n",
|
||
" <th>Compost Produced Tons Yr</th>\n",
|
||
" <th>Net Density DUA</th>\n",
|
||
" <th>Applicants Assisted</th>\n",
|
||
" <th>Invasive Cover 12 Months</th>\n",
|
||
" <th>Invasive Cover 36 Months</th>\n",
|
||
" <th>Project Acreage</th>\n",
|
||
" <th>Intermediary Admin Expenses Calc</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>1.193700e+05</td>\n",
|
||
" <td>1.414290e+05</td>\n",
|
||
" <td>1.414290e+05</td>\n",
|
||
" <td>1.414290e+05</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>5.518700e+04</td>\n",
|
||
" <td>55187.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>5.518700e+04</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.0</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>141429.000000</td>\n",
|
||
" <td>1.414290e+05</td>\n",
|
||
" <td>1.414290e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>6.053889e+09</td>\n",
|
||
" <td>9.206412e+05</td>\n",
|
||
" <td>7.791664e+04</td>\n",
|
||
" <td>7.717972e+02</td>\n",
|
||
" <td>0.205389</td>\n",
|
||
" <td>4.090872</td>\n",
|
||
" <td>2.736820e+04</td>\n",
|
||
" <td>110.217551</td>\n",
|
||
" <td>0.047331</td>\n",
|
||
" <td>2.030028e+04</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.064567</td>\n",
|
||
" <td>0.117056</td>\n",
|
||
" <td>0.440977</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.082260</td>\n",
|
||
" <td>0.019642</td>\n",
|
||
" <td>0.010656</td>\n",
|
||
" <td>0.010578</td>\n",
|
||
" <td>9.771087e+00</td>\n",
|
||
" <td>1.911114e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>2.641870e+07</td>\n",
|
||
" <td>3.736191e+07</td>\n",
|
||
" <td>1.011645e+06</td>\n",
|
||
" <td>2.371604e+04</td>\n",
|
||
" <td>3.361723</td>\n",
|
||
" <td>18.381861</td>\n",
|
||
" <td>6.327936e+05</td>\n",
|
||
" <td>1738.772195</td>\n",
|
||
" <td>1.972262</td>\n",
|
||
" <td>5.590536e+05</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1.444316</td>\n",
|
||
" <td>2.261146</td>\n",
|
||
" <td>45.712955</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4.382593</td>\n",
|
||
" <td>1.406914</td>\n",
|
||
" <td>0.758250</td>\n",
|
||
" <td>0.757677</td>\n",
|
||
" <td>3.669526e+03</td>\n",
|
||
" <td>1.196317e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>6.001400e+09</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>-2.785930e+05</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>6.037271e+09</td>\n",
|
||
" <td>4.000000e+03</td>\n",
|
||
" <td>3.500000e+03</td>\n",
|
||
" <td>8.000000e+00</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>6.059022e+09</td>\n",
|
||
" <td>8.000000e+03</td>\n",
|
||
" <td>7.500000e+03</td>\n",
|
||
" <td>1.500000e+01</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>6.073016e+09</td>\n",
|
||
" <td>2.150000e+04</td>\n",
|
||
" <td>1.900000e+04</td>\n",
|
||
" <td>4.000000e+01</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>3.000000</td>\n",
|
||
" <td>5.500000e+03</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.500000e+03</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>6.115041e+09</td>\n",
|
||
" <td>5.767700e+09</td>\n",
|
||
" <td>1.412670e+08</td>\n",
|
||
" <td>4.748924e+06</td>\n",
|
||
" <td>336.000000</td>\n",
|
||
" <td>2072.000000</td>\n",
|
||
" <td>6.443700e+07</td>\n",
|
||
" <td>102348.000000</td>\n",
|
||
" <td>503.000000</td>\n",
|
||
" <td>6.443700e+07</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>110.170000</td>\n",
|
||
" <td>151.000000</td>\n",
|
||
" <td>10365.000000</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>706.000000</td>\n",
|
||
" <td>320.000000</td>\n",
|
||
" <td>85.000000</td>\n",
|
||
" <td>85.000000</td>\n",
|
||
" <td>1.380000e+06</td>\n",
|
||
" <td>2.000000e+07</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>8 rows × 82 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Census Tract Total Project Cost Total Program GGRFFunding \\\n",
|
||
"count 1.193700e+05 1.414290e+05 1.414290e+05 \n",
|
||
"mean 6.053889e+09 9.206412e+05 7.791664e+04 \n",
|
||
"std 2.641870e+07 3.736191e+07 1.011645e+06 \n",
|
||
"min 6.001400e+09 0.000000e+00 0.000000e+00 \n",
|
||
"25% 6.037271e+09 4.000000e+03 3.500000e+03 \n",
|
||
"50% 6.059022e+09 8.000000e+03 7.500000e+03 \n",
|
||
"75% 6.073016e+09 2.150000e+04 1.900000e+04 \n",
|
||
"max 6.115041e+09 5.767700e+09 1.412670e+08 \n",
|
||
"\n",
|
||
" Total Project GHGReductions Annual Project GHGReductions \\\n",
|
||
"count 1.414290e+05 141429.000000 \n",
|
||
"mean 7.717972e+02 0.205389 \n",
|
||
"std 2.371604e+04 3.361723 \n",
|
||
"min -2.785930e+05 0.000000 \n",
|
||
"25% 8.000000e+00 0.000000 \n",
|
||
"50% 1.500000e+01 0.000000 \n",
|
||
"75% 4.000000e+01 0.000000 \n",
|
||
"max 4.748924e+06 336.000000 \n",
|
||
"\n",
|
||
" Project Count Total GGRFDisadvantaged Community Funding \\\n",
|
||
"count 141429.000000 5.518700e+04 \n",
|
||
"mean 4.090872 2.736820e+04 \n",
|
||
"std 18.381861 6.327936e+05 \n",
|
||
"min 0.000000 0.000000e+00 \n",
|
||
"25% 1.000000 0.000000e+00 \n",
|
||
"50% 1.000000 0.000000e+00 \n",
|
||
"75% 3.000000 5.500000e+03 \n",
|
||
"max 2072.000000 6.443700e+07 \n",
|
||
"\n",
|
||
" Funding Benefiting Disadvantaged Communities \\\n",
|
||
"count 55187.000000 \n",
|
||
"mean 110.217551 \n",
|
||
"std 1738.772195 \n",
|
||
"min 0.000000 \n",
|
||
"25% 0.000000 \n",
|
||
"50% 0.000000 \n",
|
||
"75% 0.000000 \n",
|
||
"max 102348.000000 \n",
|
||
"\n",
|
||
" Estimated Num Vehicles In Service \\\n",
|
||
"count 141429.000000 \n",
|
||
"mean 0.047331 \n",
|
||
"std 1.972262 \n",
|
||
"min 0.000000 \n",
|
||
"25% 0.000000 \n",
|
||
"50% 0.000000 \n",
|
||
"75% 0.000000 \n",
|
||
"max 503.000000 \n",
|
||
"\n",
|
||
" Funding Within Disadvantage Communities ... Indirect Jobs Fte \\\n",
|
||
"count 5.518700e+04 ... 141429.000000 \n",
|
||
"mean 2.030028e+04 ... 0.064567 \n",
|
||
"std 5.590536e+05 ... 1.444316 \n",
|
||
"min 0.000000e+00 ... 0.000000 \n",
|
||
"25% 0.000000e+00 ... 0.000000 \n",
|
||
"50% 0.000000e+00 ... 0.000000 \n",
|
||
"75% 1.500000e+03 ... 0.000000 \n",
|
||
"max 6.443700e+07 ... 110.170000 \n",
|
||
"\n",
|
||
" Induced Jobs Fte Compost Produced Tons Compost Produced Tons Yr \\\n",
|
||
"count 141429.000000 141429.000000 141429.0 \n",
|
||
"mean 0.117056 0.440977 0.0 \n",
|
||
"std 2.261146 45.712955 0.0 \n",
|
||
"min 0.000000 0.000000 0.0 \n",
|
||
"25% 0.000000 0.000000 0.0 \n",
|
||
"50% 0.000000 0.000000 0.0 \n",
|
||
"75% 0.000000 0.000000 0.0 \n",
|
||
"max 151.000000 10365.000000 0.0 \n",
|
||
"\n",
|
||
" Net Density DUA Applicants Assisted Invasive Cover 12 Months \\\n",
|
||
"count 141429.000000 141429.000000 141429.000000 \n",
|
||
"mean 0.082260 0.019642 0.010656 \n",
|
||
"std 4.382593 1.406914 0.758250 \n",
|
||
"min 0.000000 0.000000 0.000000 \n",
|
||
"25% 0.000000 0.000000 0.000000 \n",
|
||
"50% 0.000000 0.000000 0.000000 \n",
|
||
"75% 0.000000 0.000000 0.000000 \n",
|
||
"max 706.000000 320.000000 85.000000 \n",
|
||
"\n",
|
||
" Invasive Cover 36 Months Project Acreage \\\n",
|
||
"count 141429.000000 1.414290e+05 \n",
|
||
"mean 0.010578 9.771087e+00 \n",
|
||
"std 0.757677 3.669526e+03 \n",
|
||
"min 0.000000 0.000000e+00 \n",
|
||
"25% 0.000000 0.000000e+00 \n",
|
||
"50% 0.000000 0.000000e+00 \n",
|
||
"75% 0.000000 0.000000e+00 \n",
|
||
"max 85.000000 1.380000e+06 \n",
|
||
"\n",
|
||
" Intermediary Admin Expenses Calc \n",
|
||
"count 1.414290e+05 \n",
|
||
"mean 1.911114e+03 \n",
|
||
"std 1.196317e+05 \n",
|
||
"min 0.000000e+00 \n",
|
||
"25% 0.000000e+00 \n",
|
||
"50% 0.000000e+00 \n",
|
||
"75% 0.000000e+00 \n",
|
||
"max 2.000000e+07 \n",
|
||
"\n",
|
||
"[8 rows x 82 columns]"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# describe the data\n",
|
||
"data.describe()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Tract ZIP County ApproxLoc TotPop19 CIscore \\\n",
|
||
"0 6.083002e+09 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
|
||
"1 6.083002e+09 93455 Santa Barbara Santa Maria 13173 37.030667 \n",
|
||
"2 6.083002e+09 93454 Santa Barbara Santa Maria 2398 31.213140 \n",
|
||
"3 6.083002e+09 93455 Santa Barbara Orcutt 4496 6.639331 \n",
|
||
"4 6.083002e+09 93455 Santa Barbara Orcutt 4008 14.022852 \n",
|
||
"\n",
|
||
" CIscoreP Ozone OzoneP PM2_5 ... Elderly65 Hispanic \\\n",
|
||
"0 69.162885 0.034190 10.566273 7.567724 ... 12.5028 68.9210 \n",
|
||
"1 70.637922 0.035217 11.561917 7.624775 ... 5.3519 78.6229 \n",
|
||
"2 61.069087 0.034190 10.566273 7.548835 ... 12.8857 65.7214 \n",
|
||
"3 5.988401 0.036244 13.615432 7.660570 ... 14.4128 22.9537 \n",
|
||
"4 23.121533 0.036244 13.615432 7.663210 ... 18.8872 33.4082 \n",
|
||
"\n",
|
||
" White AfricanAm NativeAm OtherMult Shape_Leng Shape_Area \\\n",
|
||
"0 20.8899 0.4004 0.2670 1.3126 6999.357689 2.847611e+06 \n",
|
||
"1 13.2240 2.5051 0.0000 0.9489 19100.578232 1.635292e+07 \n",
|
||
"2 30.6088 0.9591 0.0000 2.1685 4970.985897 1.352329e+06 \n",
|
||
"3 69.1948 0.9342 0.7117 2.5356 6558.956012 2.417717e+06 \n",
|
||
"4 59.7804 0.6986 1.4721 1.3723 6570.368730 2.608422e+06 \n",
|
||
"\n",
|
||
" AAPI geometry \n",
|
||
"0 8.2091 POLYGON ((-39795.07 -341919.191, -38126.384 -3... \n",
|
||
"1 4.6990 POLYGON ((-39795.07 -341919.191, -39803.632 -3... \n",
|
||
"2 0.5421 POLYGON ((-38115.747 -341130.248, -38126.384 -... \n",
|
||
"3 3.6699 POLYGON ((-37341.662 -348530.437, -37252.307 -... \n",
|
||
"4 3.2685 POLYGON ((-39465.107 -348499.262, -38244.305 -... \n",
|
||
"\n",
|
||
"[5 rows x 67 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import geopandas as gpd\n",
|
||
"\n",
|
||
"# Read the shapefile\n",
|
||
"calenviroscreen = gpd.read_file(\"california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\")\n",
|
||
"\n",
|
||
"# Check the first few rows\n",
|
||
"print(calenviroscreen.head())\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Convert Census Tract columns to string and ensure they have the same format\n",
|
||
"data['Census Tract'] = data['Census Tract'].astype(str).str.zfill(11)\n",
|
||
"calenviroscreen['Tract'] = calenviroscreen['Tract'].astype(str).str.zfill(11)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Merge the datasets\n",
|
||
"merged_data = calenviroscreen.merge(data, left_on='Tract', right_on='Census Tract', how='inner')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"# Or as a CSV if you're not saving spatial data\n",
|
||
"merged_data.drop(columns='geometry').to_csv('merged_california_climate_investment.csv', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import geopandas as gpd\n",
|
||
"# Ensure merged_data is a GeoDataFrame\n",
|
||
"if not isinstance(merged_data, gpd.GeoDataFrame):\n",
|
||
" merged_data = gpd.GeoDataFrame(merged_data)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"EPSG:3310\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Check CRS\n",
|
||
"print(merged_data.crs)\n",
|
||
"\n",
|
||
"# Set CRS if necessary\n",
|
||
"if merged_data.crs is None:\n",
|
||
" merged_data.set_crs(epsg=4326, inplace=True) # Or use the appropriate EPSG code\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"geometry\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(merged_data.geometry.name)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: geoalchemy2 in ./.venv/lib/python3.12/site-packages (0.15.2)\n",
|
||
"Requirement already satisfied: sqlalchemy in ./.venv/lib/python3.12/site-packages (2.0.35)\n",
|
||
"Requirement already satisfied: packaging in ./.venv/lib/python3.12/site-packages (from geoalchemy2) (24.1)\n",
|
||
"Requirement already satisfied: typing-extensions>=4.6.0 in ./.venv/lib/python3.12/site-packages (from sqlalchemy) (4.12.2)\n",
|
||
"Requirement already satisfied: greenlet!=0.4.17 in ./.venv/lib/python3.12/site-packages (from sqlalchemy) (3.1.1)\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%pip install geoalchemy2 sqlalchemy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" Tract ZIP County_x ApproxLoc TotPop19 CIscore \\\n",
|
||
"0 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
|
||
"1 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
|
||
"2 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
|
||
"3 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
|
||
"4 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
|
||
"\n",
|
||
" CIscoreP Ozone OzoneP PM2_5 ... Net Density DUA \\\n",
|
||
"0 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
|
||
"1 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
|
||
"2 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
|
||
"3 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
|
||
"4 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
|
||
"\n",
|
||
" Applicants Assisted Invasive Cover 12 Months Invasive Cover 36 Months \\\n",
|
||
"0 0 0 0 \n",
|
||
"1 0 0 0 \n",
|
||
"2 0 0 0 \n",
|
||
"3 0 0 0 \n",
|
||
"4 0 0 0 \n",
|
||
"\n",
|
||
" Project Acreage IS IAE Intermediary Admin Expenses Calc \\\n",
|
||
"0 0 False 0 \n",
|
||
"1 0 False 0 \n",
|
||
"2 0 False 0 \n",
|
||
"3 0 False 0 \n",
|
||
"4 0 False 0 \n",
|
||
"\n",
|
||
" PRIMARY_FUNDING_RECIPIENT_TYPE TRIBAL AFFILIATION PROJECT PARTNERS \n",
|
||
"0 None None None \n",
|
||
"1 None None None \n",
|
||
"2 None None None \n",
|
||
"3 None None None \n",
|
||
"4 None None None \n",
|
||
"\n",
|
||
"[5 rows x 194 columns]\n",
|
||
"Spatial index created successfully.\n",
|
||
"Data uploaded and spatial index created successfully.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import geopandas as gpd\n",
|
||
"from sqlalchemy import create_engine, text\n",
|
||
"\n",
|
||
"# Step 1: Load your merged GeoDataFrame (assuming it already exists as 'merged_data')\n",
|
||
"# If it's not a GeoDataFrame, convert it to one\n",
|
||
"if not isinstance(merged_data, gpd.GeoDataFrame):\n",
|
||
" if 'geometry' not in merged_data.columns:\n",
|
||
" raise ValueError(\"The DataFrame must have a 'geometry' column to be converted into a GeoDataFrame.\")\n",
|
||
" merged_data = gpd.GeoDataFrame(merged_data, geometry='geometry')\n",
|
||
"\n",
|
||
"# Step 2: Set CRS to EPSG:3110 (California Albers) if not already set\n",
|
||
"if merged_data.crs is None or merged_data.crs.to_string() != 'EPSG:3110':\n",
|
||
" merged_data = merged_data.set_crs(epsg=4326) # Set initial CRS if not defined\n",
|
||
" merged_data = merged_data.to_crs(epsg=3110)\n",
|
||
"\n",
|
||
"# Step 3: Confirm geometry column is named 'geometry'\n",
|
||
"geometry_col = 'geometry'\n",
|
||
"if geometry_col not in merged_data.columns:\n",
|
||
" raise ValueError(\"GeoDataFrame does not contain a geometry column named 'geometry'.\")\n",
|
||
"\n",
|
||
"# Step 4: Create a connection to your PostGIS database\n",
|
||
"engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/calif_equity')\n",
|
||
"\n",
|
||
"# Step 5: Push the data to PostGIS, replacing the existing table if it exists\n",
|
||
"# The `to_postgis()` function automatically recognizes the geometry column\n",
|
||
"try:\n",
|
||
" merged_data.to_postgis('california_climate_investment', engine, if_exists='replace')\n",
|
||
"except Exception as e:\n",
|
||
" print(f\"Error uploading data to PostGIS: {e}\")\n",
|
||
"\n",
|
||
"# Step 6: Verify data upload by querying the table (select the first 5 rows)\n",
|
||
"try:\n",
|
||
" gdf = gpd.read_postgis(f\"SELECT * FROM california_climate_investment LIMIT 5\", engine, geom_col=geometry_col)\n",
|
||
" print(gdf.head())\n",
|
||
"except Exception as e:\n",
|
||
" print(f\"Error reading data from PostGIS: {e}\")\n",
|
||
"\n",
|
||
"# Step 7: Create a spatial index on the geometry column (using SQLAlchemy's text() function)\n",
|
||
"try:\n",
|
||
" with engine.connect() as conn:\n",
|
||
" conn.execute(text(f\"\"\"\n",
|
||
" CREATE INDEX IF NOT EXISTS california_climate_investment_geom_idx\n",
|
||
" ON california_climate_investment\n",
|
||
" USING GIST ({geometry_col});\n",
|
||
" \"\"\"))\n",
|
||
" print(\"Spatial index created successfully.\")\n",
|
||
"except Exception as e:\n",
|
||
" print(f\"Error creating spatial index: {e}\")\n",
|
||
"\n",
|
||
"print(\"Data uploaded and spatial index created successfully.\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"EPSG:3110\n",
|
||
"EPSG:3110\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(merged_data.crs)\n",
|
||
"print(gdf.crs)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Row count of merged_data: 119356\n",
|
||
"Row count of gdf: 5\n",
|
||
"Columns of merged_data: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
|
||
" 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
|
||
" ...\n",
|
||
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
|
||
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
|
||
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
|
||
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
|
||
" dtype='object', length=194)\n",
|
||
"Columns of gdf: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
|
||
" 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
|
||
" ...\n",
|
||
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
|
||
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
|
||
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
|
||
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
|
||
" dtype='object', length=194)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Row count of merged_data:\", len(merged_data))\n",
|
||
"print(\"Row count of gdf:\", len(gdf))\n",
|
||
"print(\"Columns of merged_data:\", merged_data.columns)\n",
|
||
"print(\"Columns of gdf:\", gdf.columns)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "NameError",
|
||
"evalue": "name 'merged_data' is not defined",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||
"Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgeopandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mmerged_data\u001b[49m\u001b[38;5;241m.\u001b[39mplot(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m10\u001b[39m), edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblack\u001b[39m\u001b[38;5;124m'\u001b[39m, linewidth\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Add title and labels\u001b[39;00m\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCensus Tracts\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
||
"\u001b[0;31mNameError\u001b[0m: name 'merged_data' is not defined"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import geopandas as gpd\n",
|
||
"\n",
|
||
"\n",
|
||
"merged_data.plot(figsize=(10, 10), edgecolor='black', linewidth=0.5)\n",
|
||
"\n",
|
||
"# Add title and labels\n",
|
||
"plt.title('Census Tracts')\n",
|
||
"plt.xlabel('Longitude')\n",
|
||
"plt.ylabel('Latitude')\n",
|
||
"\n",
|
||
"# Show the plot\n",
|
||
"plt.show()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# save the dbf file as a csv\n",
|
||
"merged_data.drop(columns='geometry').to_csv('data_merged/merged_california_climate_investment.csv', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(119356, 194)"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# data shape and columns\n",
|
||
"merged_data.shape\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "spatial_modeling",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.3"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|