Files
california-equity-git/initial_view/merge_cci_califequity_postgis.ipynb
2024-09-28 23:23:40 -07:00

870 lines
35 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"## Title: Evaluating Equity and Impact\n",
"### Author: David P. Adams\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in ./.venv/lib/python3.12/site-packages (2.2.3)\n",
"Requirement already satisfied: numpy in ./.venv/lib/python3.12/site-packages (2.1.1)\n",
"Requirement already satisfied: matplotlib in ./.venv/lib/python3.12/site-packages (3.9.2)\n",
"Requirement already satisfied: seaborn in ./.venv/lib/python3.12/site-packages (0.13.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in ./.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (1.3.0)\n",
"Requirement already satisfied: cycler>=0.10 in ./.venv/lib/python3.12/site-packages (from matplotlib) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in ./.venv/lib/python3.12/site-packages (from matplotlib) (4.54.1)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (1.4.7)\n",
"Requirement already satisfied: packaging>=20.0 in ./.venv/lib/python3.12/site-packages (from matplotlib) (24.1)\n",
"Requirement already satisfied: pillow>=8 in ./.venv/lib/python3.12/site-packages (from matplotlib) (10.4.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib) (3.1.4)\n",
"Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install pandas numpy matplotlib seaborn\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# Importing the necessary libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import os\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"## set directory\n",
"import os\n",
"os.chdir('/home/dadams/CSU Fullerton Dropbox/David Adams/Research Projects/California Equity/california_equity_git')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# read in the data\n",
"data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"\n",
"columns_data = pd.DataFrame(data.columns)\n",
"\n",
"# export the 'DataFrame' to a csv file\n",
"columns_data.to_csv('data_raw/columns.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"# check the data types\n",
"data.dtypes\n",
"\n",
"# save the data types to a csv file\n",
"data.dtypes.to_csv('data_raw/data_types.csv', header = False)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Census Tract</th>\n",
" <th>Total Project Cost</th>\n",
" <th>Total Program GGRFFunding</th>\n",
" <th>Total Project GHGReductions</th>\n",
" <th>Annual Project GHGReductions</th>\n",
" <th>Project Count</th>\n",
" <th>Total GGRFDisadvantaged Community Funding</th>\n",
" <th>Funding Benefiting Disadvantaged Communities</th>\n",
" <th>Estimated Num Vehicles In Service</th>\n",
" <th>Funding Within Disadvantage Communities</th>\n",
" <th>...</th>\n",
" <th>Indirect Jobs Fte</th>\n",
" <th>Induced Jobs Fte</th>\n",
" <th>Compost Produced Tons</th>\n",
" <th>Compost Produced Tons Yr</th>\n",
" <th>Net Density DUA</th>\n",
" <th>Applicants Assisted</th>\n",
" <th>Invasive Cover 12 Months</th>\n",
" <th>Invasive Cover 36 Months</th>\n",
" <th>Project Acreage</th>\n",
" <th>Intermediary Admin Expenses Calc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.193700e+05</td>\n",
" <td>1.414290e+05</td>\n",
" <td>1.414290e+05</td>\n",
" <td>1.414290e+05</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>5.518700e+04</td>\n",
" <td>55187.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>5.518700e+04</td>\n",
" <td>...</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.0</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>141429.000000</td>\n",
" <td>1.414290e+05</td>\n",
" <td>1.414290e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>6.053889e+09</td>\n",
" <td>9.206412e+05</td>\n",
" <td>7.791664e+04</td>\n",
" <td>7.717972e+02</td>\n",
" <td>0.205389</td>\n",
" <td>4.090872</td>\n",
" <td>2.736820e+04</td>\n",
" <td>110.217551</td>\n",
" <td>0.047331</td>\n",
" <td>2.030028e+04</td>\n",
" <td>...</td>\n",
" <td>0.064567</td>\n",
" <td>0.117056</td>\n",
" <td>0.440977</td>\n",
" <td>0.0</td>\n",
" <td>0.082260</td>\n",
" <td>0.019642</td>\n",
" <td>0.010656</td>\n",
" <td>0.010578</td>\n",
" <td>9.771087e+00</td>\n",
" <td>1.911114e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2.641870e+07</td>\n",
" <td>3.736191e+07</td>\n",
" <td>1.011645e+06</td>\n",
" <td>2.371604e+04</td>\n",
" <td>3.361723</td>\n",
" <td>18.381861</td>\n",
" <td>6.327936e+05</td>\n",
" <td>1738.772195</td>\n",
" <td>1.972262</td>\n",
" <td>5.590536e+05</td>\n",
" <td>...</td>\n",
" <td>1.444316</td>\n",
" <td>2.261146</td>\n",
" <td>45.712955</td>\n",
" <td>0.0</td>\n",
" <td>4.382593</td>\n",
" <td>1.406914</td>\n",
" <td>0.758250</td>\n",
" <td>0.757677</td>\n",
" <td>3.669526e+03</td>\n",
" <td>1.196317e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>6.001400e+09</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>-2.785930e+05</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>6.037271e+09</td>\n",
" <td>4.000000e+03</td>\n",
" <td>3.500000e+03</td>\n",
" <td>8.000000e+00</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>6.059022e+09</td>\n",
" <td>8.000000e+03</td>\n",
" <td>7.500000e+03</td>\n",
" <td>1.500000e+01</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>6.073016e+09</td>\n",
" <td>2.150000e+04</td>\n",
" <td>1.900000e+04</td>\n",
" <td>4.000000e+01</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>5.500000e+03</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.500000e+03</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>6.115041e+09</td>\n",
" <td>5.767700e+09</td>\n",
" <td>1.412670e+08</td>\n",
" <td>4.748924e+06</td>\n",
" <td>336.000000</td>\n",
" <td>2072.000000</td>\n",
" <td>6.443700e+07</td>\n",
" <td>102348.000000</td>\n",
" <td>503.000000</td>\n",
" <td>6.443700e+07</td>\n",
" <td>...</td>\n",
" <td>110.170000</td>\n",
" <td>151.000000</td>\n",
" <td>10365.000000</td>\n",
" <td>0.0</td>\n",
" <td>706.000000</td>\n",
" <td>320.000000</td>\n",
" <td>85.000000</td>\n",
" <td>85.000000</td>\n",
" <td>1.380000e+06</td>\n",
" <td>2.000000e+07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 82 columns</p>\n",
"</div>"
],
"text/plain": [
" Census Tract Total Project Cost Total Program GGRFFunding \\\n",
"count 1.193700e+05 1.414290e+05 1.414290e+05 \n",
"mean 6.053889e+09 9.206412e+05 7.791664e+04 \n",
"std 2.641870e+07 3.736191e+07 1.011645e+06 \n",
"min 6.001400e+09 0.000000e+00 0.000000e+00 \n",
"25% 6.037271e+09 4.000000e+03 3.500000e+03 \n",
"50% 6.059022e+09 8.000000e+03 7.500000e+03 \n",
"75% 6.073016e+09 2.150000e+04 1.900000e+04 \n",
"max 6.115041e+09 5.767700e+09 1.412670e+08 \n",
"\n",
" Total Project GHGReductions Annual Project GHGReductions \\\n",
"count 1.414290e+05 141429.000000 \n",
"mean 7.717972e+02 0.205389 \n",
"std 2.371604e+04 3.361723 \n",
"min -2.785930e+05 0.000000 \n",
"25% 8.000000e+00 0.000000 \n",
"50% 1.500000e+01 0.000000 \n",
"75% 4.000000e+01 0.000000 \n",
"max 4.748924e+06 336.000000 \n",
"\n",
" Project Count Total GGRFDisadvantaged Community Funding \\\n",
"count 141429.000000 5.518700e+04 \n",
"mean 4.090872 2.736820e+04 \n",
"std 18.381861 6.327936e+05 \n",
"min 0.000000 0.000000e+00 \n",
"25% 1.000000 0.000000e+00 \n",
"50% 1.000000 0.000000e+00 \n",
"75% 3.000000 5.500000e+03 \n",
"max 2072.000000 6.443700e+07 \n",
"\n",
" Funding Benefiting Disadvantaged Communities \\\n",
"count 55187.000000 \n",
"mean 110.217551 \n",
"std 1738.772195 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 0.000000 \n",
"max 102348.000000 \n",
"\n",
" Estimated Num Vehicles In Service \\\n",
"count 141429.000000 \n",
"mean 0.047331 \n",
"std 1.972262 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 0.000000 \n",
"max 503.000000 \n",
"\n",
" Funding Within Disadvantage Communities ... Indirect Jobs Fte \\\n",
"count 5.518700e+04 ... 141429.000000 \n",
"mean 2.030028e+04 ... 0.064567 \n",
"std 5.590536e+05 ... 1.444316 \n",
"min 0.000000e+00 ... 0.000000 \n",
"25% 0.000000e+00 ... 0.000000 \n",
"50% 0.000000e+00 ... 0.000000 \n",
"75% 1.500000e+03 ... 0.000000 \n",
"max 6.443700e+07 ... 110.170000 \n",
"\n",
" Induced Jobs Fte Compost Produced Tons Compost Produced Tons Yr \\\n",
"count 141429.000000 141429.000000 141429.0 \n",
"mean 0.117056 0.440977 0.0 \n",
"std 2.261146 45.712955 0.0 \n",
"min 0.000000 0.000000 0.0 \n",
"25% 0.000000 0.000000 0.0 \n",
"50% 0.000000 0.000000 0.0 \n",
"75% 0.000000 0.000000 0.0 \n",
"max 151.000000 10365.000000 0.0 \n",
"\n",
" Net Density DUA Applicants Assisted Invasive Cover 12 Months \\\n",
"count 141429.000000 141429.000000 141429.000000 \n",
"mean 0.082260 0.019642 0.010656 \n",
"std 4.382593 1.406914 0.758250 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 \n",
"50% 0.000000 0.000000 0.000000 \n",
"75% 0.000000 0.000000 0.000000 \n",
"max 706.000000 320.000000 85.000000 \n",
"\n",
" Invasive Cover 36 Months Project Acreage \\\n",
"count 141429.000000 1.414290e+05 \n",
"mean 0.010578 9.771087e+00 \n",
"std 0.757677 3.669526e+03 \n",
"min 0.000000 0.000000e+00 \n",
"25% 0.000000 0.000000e+00 \n",
"50% 0.000000 0.000000e+00 \n",
"75% 0.000000 0.000000e+00 \n",
"max 85.000000 1.380000e+06 \n",
"\n",
" Intermediary Admin Expenses Calc \n",
"count 1.414290e+05 \n",
"mean 1.911114e+03 \n",
"std 1.196317e+05 \n",
"min 0.000000e+00 \n",
"25% 0.000000e+00 \n",
"50% 0.000000e+00 \n",
"75% 0.000000e+00 \n",
"max 2.000000e+07 \n",
"\n",
"[8 rows x 82 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# describe the data\n",
"data.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Tract ZIP County ApproxLoc TotPop19 CIscore \\\n",
"0 6.083002e+09 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
"1 6.083002e+09 93455 Santa Barbara Santa Maria 13173 37.030667 \n",
"2 6.083002e+09 93454 Santa Barbara Santa Maria 2398 31.213140 \n",
"3 6.083002e+09 93455 Santa Barbara Orcutt 4496 6.639331 \n",
"4 6.083002e+09 93455 Santa Barbara Orcutt 4008 14.022852 \n",
"\n",
" CIscoreP Ozone OzoneP PM2_5 ... Elderly65 Hispanic \\\n",
"0 69.162885 0.034190 10.566273 7.567724 ... 12.5028 68.9210 \n",
"1 70.637922 0.035217 11.561917 7.624775 ... 5.3519 78.6229 \n",
"2 61.069087 0.034190 10.566273 7.548835 ... 12.8857 65.7214 \n",
"3 5.988401 0.036244 13.615432 7.660570 ... 14.4128 22.9537 \n",
"4 23.121533 0.036244 13.615432 7.663210 ... 18.8872 33.4082 \n",
"\n",
" White AfricanAm NativeAm OtherMult Shape_Leng Shape_Area \\\n",
"0 20.8899 0.4004 0.2670 1.3126 6999.357689 2.847611e+06 \n",
"1 13.2240 2.5051 0.0000 0.9489 19100.578232 1.635292e+07 \n",
"2 30.6088 0.9591 0.0000 2.1685 4970.985897 1.352329e+06 \n",
"3 69.1948 0.9342 0.7117 2.5356 6558.956012 2.417717e+06 \n",
"4 59.7804 0.6986 1.4721 1.3723 6570.368730 2.608422e+06 \n",
"\n",
" AAPI geometry \n",
"0 8.2091 POLYGON ((-39795.07 -341919.191, -38126.384 -3... \n",
"1 4.6990 POLYGON ((-39795.07 -341919.191, -39803.632 -3... \n",
"2 0.5421 POLYGON ((-38115.747 -341130.248, -38126.384 -... \n",
"3 3.6699 POLYGON ((-37341.662 -348530.437, -37252.307 -... \n",
"4 3.2685 POLYGON ((-39465.107 -348499.262, -38244.305 -... \n",
"\n",
"[5 rows x 67 columns]\n"
]
}
],
"source": [
"import geopandas as gpd\n",
"\n",
"# Read the shapefile\n",
"calenviroscreen = gpd.read_file(\"california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\")\n",
"\n",
"# Check the first few rows\n",
"print(calenviroscreen.head())\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# Convert Census Tract columns to string and ensure they have the same format\n",
"data['Census Tract'] = data['Census Tract'].astype(str).str.zfill(11)\n",
"calenviroscreen['Tract'] = calenviroscreen['Tract'].astype(str).str.zfill(11)\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"# Merge the datasets\n",
"merged_data = calenviroscreen.merge(data, left_on='Tract', right_on='Census Tract', how='inner')\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Or as a CSV if you're not saving spatial data\n",
"merged_data.drop(columns='geometry').to_csv('merged_california_climate_investment.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"# Ensure merged_data is a GeoDataFrame\n",
"if not isinstance(merged_data, gpd.GeoDataFrame):\n",
" merged_data = gpd.GeoDataFrame(merged_data)\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EPSG:3310\n"
]
}
],
"source": [
"# Check CRS\n",
"print(merged_data.crs)\n",
"\n",
"# Set CRS if necessary\n",
"if merged_data.crs is None:\n",
" merged_data.set_crs(epsg=4326, inplace=True) # Or use the appropriate EPSG code\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"geometry\n"
]
}
],
"source": [
"print(merged_data.geometry.name)\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: geoalchemy2 in ./.venv/lib/python3.12/site-packages (0.15.2)\n",
"Requirement already satisfied: sqlalchemy in ./.venv/lib/python3.12/site-packages (2.0.35)\n",
"Requirement already satisfied: packaging in ./.venv/lib/python3.12/site-packages (from geoalchemy2) (24.1)\n",
"Requirement already satisfied: typing-extensions>=4.6.0 in ./.venv/lib/python3.12/site-packages (from sqlalchemy) (4.12.2)\n",
"Requirement already satisfied: greenlet!=0.4.17 in ./.venv/lib/python3.12/site-packages (from sqlalchemy) (3.1.1)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install geoalchemy2 sqlalchemy"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Tract ZIP County_x ApproxLoc TotPop19 CIscore \\\n",
"0 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
"1 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
"2 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
"3 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
"4 6083002103.0 93454 Santa Barbara Santa Maria 4495 36.019653 \n",
"\n",
" CIscoreP Ozone OzoneP PM2_5 ... Net Density DUA \\\n",
"0 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
"1 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
"2 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
"3 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
"4 69.162885 0.03419 10.566273 7.567724 ... 0.0 \n",
"\n",
" Applicants Assisted Invasive Cover 12 Months Invasive Cover 36 Months \\\n",
"0 0 0 0 \n",
"1 0 0 0 \n",
"2 0 0 0 \n",
"3 0 0 0 \n",
"4 0 0 0 \n",
"\n",
" Project Acreage IS IAE Intermediary Admin Expenses Calc \\\n",
"0 0 False 0 \n",
"1 0 False 0 \n",
"2 0 False 0 \n",
"3 0 False 0 \n",
"4 0 False 0 \n",
"\n",
" PRIMARY_FUNDING_RECIPIENT_TYPE TRIBAL AFFILIATION PROJECT PARTNERS \n",
"0 None None None \n",
"1 None None None \n",
"2 None None None \n",
"3 None None None \n",
"4 None None None \n",
"\n",
"[5 rows x 194 columns]\n",
"Spatial index created successfully.\n",
"Data uploaded and spatial index created successfully.\n"
]
}
],
"source": [
"import geopandas as gpd\n",
"from sqlalchemy import create_engine, text\n",
"\n",
"# Step 1: Load your merged GeoDataFrame (assuming it already exists as 'merged_data')\n",
"# If it's not a GeoDataFrame, convert it to one\n",
"if not isinstance(merged_data, gpd.GeoDataFrame):\n",
" if 'geometry' not in merged_data.columns:\n",
" raise ValueError(\"The DataFrame must have a 'geometry' column to be converted into a GeoDataFrame.\")\n",
" merged_data = gpd.GeoDataFrame(merged_data, geometry='geometry')\n",
"\n",
"# Step 2: Set CRS to EPSG:3110 (California Albers) if not already set\n",
"if merged_data.crs is None or merged_data.crs.to_string() != 'EPSG:3110':\n",
" merged_data = merged_data.set_crs(epsg=4326) # Set initial CRS if not defined\n",
" merged_data = merged_data.to_crs(epsg=3110)\n",
"\n",
"# Step 3: Confirm geometry column is named 'geometry'\n",
"geometry_col = 'geometry'\n",
"if geometry_col not in merged_data.columns:\n",
" raise ValueError(\"GeoDataFrame does not contain a geometry column named 'geometry'.\")\n",
"\n",
"# Step 4: Create a connection to your PostGIS database\n",
"engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/calif_equity')\n",
"\n",
"# Step 5: Push the data to PostGIS, replacing the existing table if it exists\n",
"# The `to_postgis()` function automatically recognizes the geometry column\n",
"try:\n",
" merged_data.to_postgis('california_climate_investment', engine, if_exists='replace')\n",
"except Exception as e:\n",
" print(f\"Error uploading data to PostGIS: {e}\")\n",
"\n",
"# Step 6: Verify data upload by querying the table (select the first 5 rows)\n",
"try:\n",
" gdf = gpd.read_postgis(f\"SELECT * FROM california_climate_investment LIMIT 5\", engine, geom_col=geometry_col)\n",
" print(gdf.head())\n",
"except Exception as e:\n",
" print(f\"Error reading data from PostGIS: {e}\")\n",
"\n",
"# Step 7: Create a spatial index on the geometry column (using SQLAlchemy's text() function)\n",
"try:\n",
" with engine.connect() as conn:\n",
" conn.execute(text(f\"\"\"\n",
" CREATE INDEX IF NOT EXISTS california_climate_investment_geom_idx\n",
" ON california_climate_investment\n",
" USING GIST ({geometry_col});\n",
" \"\"\"))\n",
" print(\"Spatial index created successfully.\")\n",
"except Exception as e:\n",
" print(f\"Error creating spatial index: {e}\")\n",
"\n",
"print(\"Data uploaded and spatial index created successfully.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EPSG:3110\n",
"EPSG:3110\n"
]
}
],
"source": [
"print(merged_data.crs)\n",
"print(gdf.crs)\n"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Row count of merged_data: 119356\n",
"Row count of gdf: 5\n",
"Columns of merged_data: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
" 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
" ...\n",
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
" dtype='object', length=194)\n",
"Columns of gdf: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
" 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
" ...\n",
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
" dtype='object', length=194)\n"
]
}
],
"source": [
"print(\"Row count of merged_data:\", len(merged_data))\n",
"print(\"Row count of gdf:\", len(gdf))\n",
"print(\"Columns of merged_data:\", merged_data.columns)\n",
"print(\"Columns of gdf:\", gdf.columns)\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'merged_data' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgeopandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mmerged_data\u001b[49m\u001b[38;5;241m.\u001b[39mplot(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m10\u001b[39m), edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblack\u001b[39m\u001b[38;5;124m'\u001b[39m, linewidth\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Add title and labels\u001b[39;00m\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCensus Tracts\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'merged_data' is not defined"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import geopandas as gpd\n",
"\n",
"\n",
"merged_data.plot(figsize=(10, 10), edgecolor='black', linewidth=0.5)\n",
"\n",
"# Add title and labels\n",
"plt.title('Census Tracts')\n",
"plt.xlabel('Longitude')\n",
"plt.ylabel('Latitude')\n",
"\n",
"# Show the plot\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# save the dbf file as a csv\n",
"merged_data.drop(columns='geometry').to_csv('data_merged/merged_california_climate_investment.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(119356, 194)"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# data shape and columns\n",
"merged_data.shape\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "spatial_modeling",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}