data setup, replace shape files for CES

This commit is contained in:
2025-01-26 19:50:21 -08:00
parent d1dde0dbc6
commit bf428bd14c
9 changed files with 105 additions and 170 deletions

View File

@@ -1,186 +1,66 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pandas\n",
" Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n",
"Collecting geopandas\n",
" Using cached geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)\n",
"Collecting sqlalchemy\n",
" Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
"Collecting psycopg2-binary\n",
" Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
"Requirement already satisfied: numpy>=1.26.0 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.2.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
"Collecting pytz>=2020.1 (from pandas)\n",
" Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n",
"Collecting tzdata>=2022.7 (from pandas)\n",
" Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
"Collecting pyogrio>=0.7.2 (from geopandas)\n",
" Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n",
"Requirement already satisfied: packaging in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from geopandas) (24.2)\n",
"Collecting pyproj>=3.3.0 (from geopandas)\n",
" Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n",
"Collecting shapely>=2.0.0 (from geopandas)\n",
" Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)\n",
"Collecting greenlet!=0.4.17 (from sqlalchemy)\n",
" Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
"Collecting typing-extensions>=4.6.0 (from sqlalchemy)\n",
" Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
"Collecting certifi (from pyogrio>=0.7.2->geopandas)\n",
" Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n",
"Requirement already satisfied: six>=1.5 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n",
"Using cached geopandas-1.0.1-py3-none-any.whl (323 kB)\n",
"Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n",
"Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
"Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (613 kB)\n",
"Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl (24.0 MB)\n",
"Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.5 MB)\n",
"Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n",
"Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)\n",
"Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
"Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)\n",
"Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n",
"Installing collected packages: pytz, tzdata, typing-extensions, shapely, psycopg2-binary, greenlet, certifi, sqlalchemy, pyproj, pyogrio, pandas, geopandas\n",
"Successfully installed certifi-2024.12.14 geopandas-1.0.1 greenlet-3.1.1 pandas-2.2.3 psycopg2-binary-2.9.10 pyogrio-0.10.0 pyproj-3.7.0 pytz-2024.2 shapely-2.0.6 sqlalchemy-2.0.37 typing-extensions-4.12.2 tzdata-2025.1\n"
]
}
],
"source": [
"!pip install pandas geopandas sqlalchemy psycopg2-binary"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully connected to the database!\n",
"Dropping existing database objects...\n",
"Processing CalEnviroScreen data...\n",
"Loading CES data to database...\n",
"Processing CCI project data...\n",
"Loading CCI data to database...\n",
"Creating analysis views...\n",
"Data loading completed successfully!\n",
"\n",
"A module that was compiled using NumPy 1.x cannot be run in\n",
"NumPy 2.2.2 as it may crash. To support both 1.x and 2.x\n",
"versions of NumPy, modules must be compiled with NumPy 2.0.\n",
"Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n",
"\n",
"If you are a user of the module, the easiest solution will be to\n",
"downgrade to 'numpy<2' or try to upgrade the affected module.\n",
"We expect that some modules will need time to support NumPy 2.\n",
"\n",
"Traceback (most recent call last): File \"<frozen runpy>\", line 198, in _run_module_as_main\n",
" File \"<frozen runpy>\", line 88, in _run_code\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel_launcher.py\", line 18, in <module>\n",
" app.launch_new_instance()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n",
" app.start()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelapp.py\", line 739, in start\n",
" self.io_loop.start()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/tornado/platform/asyncio.py\", line 205, in start\n",
" self.asyncio_loop.run_forever()\n",
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 641, in run_forever\n",
" self._run_once()\n",
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 1986, in _run_once\n",
" handle._run()\n",
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/events.py\", line 88, in _run\n",
" self._context.run(self._callback, *self._args)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n",
" await self.process_one()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n",
" await dispatch(*args)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n",
" await result\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n",
" await super().execute_request(stream, ident, parent)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n",
" reply_content = await reply_content\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n",
" res = shell.run_cell(\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n",
" return super().run_cell(*args, **kwargs)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3075, in run_cell\n",
" result = self._run_cell(\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3130, in _run_cell\n",
" result = runner(coro)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n",
" coro.send(None)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3334, in run_cell_async\n",
" has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3517, in run_ast_nodes\n",
" if await self.run_code(code, result, async_=asy):\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3577, in run_code\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
" File \"/tmp/ipykernel_794405/3254404226.py\", line 2, in <module>\n",
" import geopandas as gpd\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py\", line 1, in <module>\n",
" from geopandas._config import options\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 109, in <module>\n",
" default_value=_default_use_pygeos(),\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 95, in _default_use_pygeos\n",
" import geopandas._compat as compat\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py\", line 9, in <module>\n",
" import shapely\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py\", line 1, in <module>\n",
" from shapely.lib import GEOSException # NOQA\n"
]
},
{
"ename": "AttributeError",
"evalue": "_ARRAY_API not found",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;31mAttributeError\u001b[0m: _ARRAY_API not found"
]
},
{
"ename": "ImportError",
"evalue": "numpy.core.multiarray failed to import",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msqlalchemy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m create_engine\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m options\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeoseries\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoSeries\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeodataframe\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoDataFrame\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:109\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 104\u001b[0m compat\u001b[38;5;241m.\u001b[39mset_use_pygeos(value)\n\u001b[1;32m 107\u001b[0m use_pygeos \u001b[38;5;241m=\u001b[39m Option(\n\u001b[1;32m 108\u001b[0m key\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_pygeos\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m--> 109\u001b[0m default_value\u001b[38;5;241m=\u001b[39m\u001b[43m_default_use_pygeos\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 110\u001b[0m doc\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhether to use PyGEOS to speed up spatial operations. The default is True \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif PyGEOS is installed, and follows the USE_PYGEOS environment variable \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif set.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 114\u001b[0m ),\n\u001b[1;32m 115\u001b[0m validator\u001b[38;5;241m=\u001b[39m_validate_bool,\n\u001b[1;32m 116\u001b[0m callback\u001b[38;5;241m=\u001b[39m_callback_use_pygeos,\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_validate_io_engine\u001b[39m(value):\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:95\u001b[0m, in \u001b[0;36m_default_use_pygeos\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_default_use_pygeos\u001b[39m():\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m compat\u001b[38;5;241m.\u001b[39mUSE_PYGEOS\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py:9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeos\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# pandas compat\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GEOSException \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Geometry \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m geos_version, geos_version_string \u001b[38;5;66;03m# NOQA\u001b[39;00m\n",
"\u001b[0;31mImportError\u001b[0m: numpy.core.multiarray failed to import"
"Record counts:\n",
"CES data: 8035 records\n",
"CCI projects: 120715 records\n"
]
}
],
"source": [
"import pandas as pd\n",
"import geopandas as gpd\n",
"from sqlalchemy import create_engine\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import os\n",
"import geopandas as gpd\n",
"from sqlalchemy import create_engine, text\n",
"from datetime import datetime\n",
"\n",
"import os\n",
"\n",
"# database variables\n",
"DB_USER = os.getenv('DB_USER')\n",
"DB_PASSWORD = os.getenv('DB_PASSWORD')\n",
"DB_HOST = os.getenv('DB_HOST')\n",
"DB_PORT = os.getenv('DB_PORT')\n",
"# Database configuration\n",
"DB_USER = os.getenv('DB_USER', 'postgres')\n",
"DB_PASSWORD = os.getenv('DB_PASSWORD', 'MandyLinkToby3')\n",
"DB_HOST = os.getenv('DB_HOST', '192.168.0.74')\n",
"DB_PORT = os.getenv('DB_PORT', '5432')\n",
"DB_NAME = 'calif_equity'\n",
"\n",
"# Set working directory\n",
"os.chdir('/home/dadams/Repos/california_equity_git')\n",
"\n",
"# Create database connection\n",
"def create_db_engine():\n",
" return create_engine('postgresql://' + DB_USER + ':' + DB_PASSWORD + '@' + DB_HOST + ':' + DB_PORT + '/cci_db')\n",
" connection_string = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'\n",
" return create_engine(connection_string)\n",
"\n",
"def drop_existing_objects(engine):\n",
" \"\"\"Drop existing database objects in the correct order\"\"\"\n",
" with engine.connect() as connection:\n",
" # Drop views first\n",
" connection.execute(text(\"DROP VIEW IF EXISTS project_efficiency CASCADE\"))\n",
" connection.execute(text(\"DROP VIEW IF EXISTS regional_collaboration CASCADE\"))\n",
" # Then drop tables\n",
" connection.execute(text(\"DROP TABLE IF EXISTS agency_partnerships CASCADE\"))\n",
" connection.execute(text(\"DROP TABLE IF EXISTS cci_projects CASCADE\"))\n",
" connection.execute(text(\"DROP TABLE IF EXISTS ces_data CASCADE\"))\n",
" connection.commit()\n",
"\n",
"# Load and clean CES data\n",
"def process_ces_data(filepath):\n",
" print(\"Processing CalEnviroScreen data...\")\n",
" gdf = gpd.read_file(filepath)\n",
@@ -216,9 +96,11 @@
" }\n",
" ces_data = ces_data.rename(columns=column_map)\n",
" \n",
" # Set the geometry column explicitly\n",
" ces_data = ces_data.set_geometry('geom')\n",
" \n",
" return ces_data\n",
"\n",
"# Load and clean CCI data\n",
"def process_cci_data(filepath):\n",
" print(\"Processing CCI project data...\")\n",
" df = pd.read_csv(filepath, low_memory=False)\n",
@@ -235,12 +117,8 @@
" (df['date_operational'] <= '2024-12-31')\n",
" ]\n",
" \n",
" # Process project partners into array\n",
" # Process project partners\n",
" df['project_partners'] = df['project_partners'].fillna('')\n",
" df['project_partners'] = df['project_partners'].apply(\n",
" lambda x: '{' + ','.join([p.strip() for p in str(x).split(',')]) + '}'\n",
" if x else '{}'\n",
" )\n",
" \n",
" # Select and prepare relevant columns\n",
" cci_data = df[[\n",
@@ -267,33 +145,90 @@
" \n",
" return cci_data\n",
"\n",
"# Main loading function\n",
"def create_views(engine):\n",
" with engine.connect() as connection:\n",
" # Project efficiency view\n",
" connection.execute(text(\"\"\"\n",
" CREATE VIEW project_efficiency AS\n",
" SELECT \n",
" p.project_id,\n",
" p.program_name,\n",
" p.agency_name,\n",
" p.total_funding,\n",
" p.ghg_reduction,\n",
" p.dac_benefit,\n",
" CASE \n",
" WHEN p.total_funding > 0 THEN p.ghg_reduction / p.total_funding \n",
" ELSE 0 \n",
" END as ghg_efficiency,\n",
" c.ci_score as ces_score,\n",
" CASE \n",
" WHEN p.project_partners = '' THEN 0\n",
" ELSE (length(p.project_partners) - length(replace(p.project_partners, ',', '')) + 1)\n",
" END as partner_count\n",
" FROM cci_projects p\n",
" LEFT JOIN ces_data c ON cast(p.census_tract as text) = cast(c.tract_id as text)\n",
" \"\"\"))\n",
" \n",
" # Regional collaboration view\n",
" connection.execute(text(\"\"\"\n",
" CREATE VIEW regional_collaboration AS\n",
" SELECT \n",
" county,\n",
" COUNT(DISTINCT project_id) as project_count,\n",
" AVG(CASE \n",
" WHEN project_partners = '' THEN 0\n",
" ELSE (length(project_partners) - length(replace(project_partners, ',', '')) + 1)\n",
" END) as avg_partners,\n",
" SUM(total_funding) as total_funding,\n",
" SUM(CASE WHEN dac_benefit THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as dac_rate,\n",
" SUM(ghg_reduction) / NULLIF(SUM(total_funding), 0) as region_efficiency\n",
" FROM cci_projects\n",
" GROUP BY county\n",
" \"\"\"))\n",
" \n",
" connection.commit()\n",
"\n",
"def load_data_to_db():\n",
" try:\n",
" engine = create_db_engine()\n",
" \n",
" # Drop existing objects first\n",
" print(\"Dropping existing database objects...\")\n",
" drop_existing_objects(engine)\n",
" \n",
" # Load CES data\n",
" ces_data = process_ces_data('california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp')\n",
" ces_data = process_ces_data('/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp')\n",
" print(\"Loading CES data to database...\")\n",
" ces_data.to_postgis('ces_data', engine, if_exists='replace', index=False)\n",
" \n",
" # Load CCI data\n",
" cci_data = process_cci_data('data_raw/cci_programs_data.csv')\n",
" cci_data = process_cci_data('/home/dadams/Repos/california_equity_git/data_raw/cci_programs_data.csv')\n",
" print(\"Loading CCI data to database...\")\n",
" cci_data.to_sql('cci_projects', engine, if_exists='replace', index=False)\n",
" \n",
" print(\"Creating analysis views...\")\n",
" create_views(engine)\n",
" \n",
" print(\"Data loading completed successfully!\")\n",
" \n",
" # Return sample counts for verification\n",
" return {\n",
" 'ces_records': len(ces_data),\n",
" 'cci_records': len(cci_data)\n",
" }\n",
" \n",
" except Exception as e:\n",
" print(f\"Error loading data: {str(e)}\")\n",
" print(f\"Error loading data: {e}\")\n",
" raise\n",
"\n",
"# Test database connection\n",
"try:\n",
" engine = create_db_engine()\n",
" with engine.connect() as conn:\n",
" print(\"Successfully connected to the database!\")\n",
"except Exception as e:\n",
" print(f\"Error connecting to the database: {str(e)}\")\n",
"\n",
"# Execute loading\n",
"record_counts = load_data_to_db()\n",
"print(\"\\nRecord counts:\")\n",