From bf428bd14c2fd01101f2551229976196046b3a2a Mon Sep 17 00:00:00 2001 From: dadams Date: Sun, 26 Jan 2025 19:50:21 -0800 Subject: [PATCH] data setup, replace shape files for CES --- analysis/database_setup.ipynb | 275 +++++++----------- ...Shapefile.cpg => CES4_Final_Shapefile.cpg} | 0 ...Shapefile.dbf => CES4_Final_Shapefile.dbf} | 0 ...Shapefile.prj => CES4_Final_Shapefile.prj} | 0 ...Shapefile.sbn => CES4_Final_Shapefile.sbn} | Bin ...Shapefile.sbx => CES4_Final_Shapefile.sbx} | Bin ...Shapefile.shp => CES4_Final_Shapefile.shp} | 0 ...e.shp.xml => CES4_Final_Shapefile.shp.xml} | 0 ...Shapefile.shx => CES4_Final_Shapefile.shx} | 0 9 files changed, 105 insertions(+), 170 deletions(-) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.cpg => CES4_Final_Shapefile.cpg} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.dbf => CES4_Final_Shapefile.dbf} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.prj => CES4_Final_Shapefile.prj} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.sbn => CES4_Final_Shapefile.sbn} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.sbx => CES4_Final_Shapefile.sbx} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.shp => CES4_Final_Shapefile.shp} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.shp.xml => CES4_Final_Shapefile.shp.xml} (100%) rename california_enviroscreen/calif_enviroscreen_shape/{CES4 Final Shapefile.shx => CES4_Final_Shapefile.shx} (100%) diff --git a/analysis/database_setup.ipynb b/analysis/database_setup.ipynb index 874abd7a..0fbfbf83 100644 --- a/analysis/database_setup.ipynb +++ b/analysis/database_setup.ipynb @@ -1,186 +1,66 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting pandas\n", - " Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n", - "Collecting geopandas\n", - " Using cached geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)\n", - "Collecting sqlalchemy\n", - " Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n", - "Collecting psycopg2-binary\n", - " Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", - "Requirement already satisfied: numpy>=1.26.0 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.2.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n", - "Collecting pytz>=2020.1 (from pandas)\n", - " Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n", - "Collecting tzdata>=2022.7 (from pandas)\n", - " Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", - "Collecting pyogrio>=0.7.2 (from geopandas)\n", - " Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n", - "Requirement already satisfied: packaging in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from geopandas) (24.2)\n", - "Collecting pyproj>=3.3.0 (from geopandas)\n", - " Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n", - "Collecting shapely>=2.0.0 (from geopandas)\n", - " Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)\n", - "Collecting greenlet!=0.4.17 (from sqlalchemy)\n", - " Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n", - "Collecting typing-extensions>=4.6.0 (from sqlalchemy)\n", - " Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n", - "Collecting certifi (from pyogrio>=0.7.2->geopandas)\n", - " Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n", - "Requirement already satisfied: six>=1.5 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", - "Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n", - "Using cached geopandas-1.0.1-py3-none-any.whl (323 kB)\n", - "Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n", - "Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", - "Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (613 kB)\n", - "Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl (24.0 MB)\n", - "Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.5 MB)\n", - "Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", - "Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)\n", - "Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n", - "Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)\n", - "Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n", - "Installing collected packages: pytz, tzdata, typing-extensions, shapely, psycopg2-binary, greenlet, certifi, sqlalchemy, pyproj, pyogrio, pandas, geopandas\n", - "Successfully installed certifi-2024.12.14 geopandas-1.0.1 greenlet-3.1.1 pandas-2.2.3 psycopg2-binary-2.9.10 pyogrio-0.10.0 pyproj-3.7.0 pytz-2024.2 shapely-2.0.6 sqlalchemy-2.0.37 typing-extensions-4.12.2 tzdata-2025.1\n" - ] - } - ], - "source": [ - "!pip install pandas geopandas sqlalchemy psycopg2-binary" - ] - }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ + "Successfully connected to the database!\n", + "Dropping existing database objects...\n", + "Processing CalEnviroScreen data...\n", + "Loading CES data to database...\n", + "Processing CCI project data...\n", + "Loading CCI data to database...\n", + "Creating analysis views...\n", + "Data loading completed successfully!\n", "\n", - "A module that was compiled using NumPy 1.x cannot be run in\n", - "NumPy 2.2.2 as it may crash. To support both 1.x and 2.x\n", - "versions of NumPy, modules must be compiled with NumPy 2.0.\n", - "Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n", - "\n", - "If you are a user of the module, the easiest solution will be to\n", - "downgrade to 'numpy<2' or try to upgrade the affected module.\n", - "We expect that some modules will need time to support NumPy 2.\n", - "\n", - "Traceback (most recent call last): File \"\", line 198, in _run_module_as_main\n", - " File \"\", line 88, in _run_code\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel_launcher.py\", line 18, in \n", - " app.launch_new_instance()\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n", - " app.start()\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelapp.py\", line 739, in start\n", - " self.io_loop.start()\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/tornado/platform/asyncio.py\", line 205, in start\n", - " self.asyncio_loop.run_forever()\n", - " File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 641, in run_forever\n", - " self._run_once()\n", - " File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 1986, in _run_once\n", - " handle._run()\n", - " File \"/home/dadams/miniconda3/lib/python3.12/asyncio/events.py\", line 88, in _run\n", - " self._context.run(self._callback, *self._args)\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n", - " await self.process_one()\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n", - " await dispatch(*args)\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n", - " await result\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n", - " await super().execute_request(stream, ident, parent)\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n", - " reply_content = await reply_content\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n", - " res = shell.run_cell(\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n", - " return super().run_cell(*args, **kwargs)\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3075, in run_cell\n", - " result = self._run_cell(\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3130, in _run_cell\n", - " result = runner(coro)\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n", - " coro.send(None)\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3334, in run_cell_async\n", - " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3517, in run_ast_nodes\n", - " if await self.run_code(code, result, async_=asy):\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3577, in run_code\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n", - " File \"/tmp/ipykernel_794405/3254404226.py\", line 2, in \n", - " import geopandas as gpd\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py\", line 1, in \n", - " from geopandas._config import options\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 109, in \n", - " default_value=_default_use_pygeos(),\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 95, in _default_use_pygeos\n", - " import geopandas._compat as compat\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py\", line 9, in \n", - " import shapely\n", - " File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py\", line 1, in \n", - " from shapely.lib import GEOSException # NOQA\n" - ] - }, - { - "ename": "AttributeError", - "evalue": "_ARRAY_API not found", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;31mAttributeError\u001b[0m: _ARRAY_API not found" - ] - }, - { - "ename": "ImportError", - "evalue": "numpy.core.multiarray failed to import", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msqlalchemy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m create_engine\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n", - "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m options\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeoseries\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoSeries\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeodataframe\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoDataFrame\n", - "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:109\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 104\u001b[0m compat\u001b[38;5;241m.\u001b[39mset_use_pygeos(value)\n\u001b[1;32m 107\u001b[0m use_pygeos \u001b[38;5;241m=\u001b[39m Option(\n\u001b[1;32m 108\u001b[0m key\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_pygeos\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m--> 109\u001b[0m default_value\u001b[38;5;241m=\u001b[39m\u001b[43m_default_use_pygeos\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 110\u001b[0m doc\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhether to use PyGEOS to speed up spatial operations. The default is True \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif PyGEOS is installed, and follows the USE_PYGEOS environment variable \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif set.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 114\u001b[0m ),\n\u001b[1;32m 115\u001b[0m validator\u001b[38;5;241m=\u001b[39m_validate_bool,\n\u001b[1;32m 116\u001b[0m callback\u001b[38;5;241m=\u001b[39m_callback_use_pygeos,\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_validate_io_engine\u001b[39m(value):\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:95\u001b[0m, in \u001b[0;36m_default_use_pygeos\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_default_use_pygeos\u001b[39m():\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m compat\u001b[38;5;241m.\u001b[39mUSE_PYGEOS\n", - "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py:9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeos\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# pandas compat\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n", - "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GEOSException \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Geometry \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m geos_version, geos_version_string \u001b[38;5;66;03m# NOQA\u001b[39;00m\n", - "\u001b[0;31mImportError\u001b[0m: numpy.core.multiarray failed to import" + "Record counts:\n", + "CES data: 8035 records\n", + "CCI projects: 120715 records\n" ] } ], "source": [ "import pandas as pd\n", - "import geopandas as gpd\n", - "from sqlalchemy import create_engine\n", "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import os\n", + "import geopandas as gpd\n", + "from sqlalchemy import create_engine, text\n", "from datetime import datetime\n", "\n", - "import os\n", - "\n", - "# database variables\n", - "DB_USER = os.getenv('DB_USER')\n", - "DB_PASSWORD = os.getenv('DB_PASSWORD')\n", - "DB_HOST = os.getenv('DB_HOST')\n", - "DB_PORT = os.getenv('DB_PORT')\n", + "# Database configuration\n", + "DB_USER = os.getenv('DB_USER', 'postgres')\n", + "DB_PASSWORD = os.getenv('DB_PASSWORD', 'MandyLinkToby3')\n", + "DB_HOST = os.getenv('DB_HOST', '192.168.0.74')\n", + "DB_PORT = os.getenv('DB_PORT', '5432')\n", + "DB_NAME = 'calif_equity'\n", "\n", + "# Set working directory\n", + "os.chdir('/home/dadams/Repos/california_equity_git')\n", "\n", "# Create database connection\n", "def create_db_engine():\n", - " return create_engine('postgresql://' + DB_USER + ':' + DB_PASSWORD + '@' + DB_HOST + ':' + DB_PORT + '/cci_db')\n", + " connection_string = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'\n", + " return create_engine(connection_string)\n", + "\n", + "def drop_existing_objects(engine):\n", + " \"\"\"Drop existing database objects in the correct order\"\"\"\n", + " with engine.connect() as connection:\n", + " # Drop views first\n", + " connection.execute(text(\"DROP VIEW IF EXISTS project_efficiency CASCADE\"))\n", + " connection.execute(text(\"DROP VIEW IF EXISTS regional_collaboration CASCADE\"))\n", + " # Then drop tables\n", + " connection.execute(text(\"DROP TABLE IF EXISTS agency_partnerships CASCADE\"))\n", + " connection.execute(text(\"DROP TABLE IF EXISTS cci_projects CASCADE\"))\n", + " connection.execute(text(\"DROP TABLE IF EXISTS ces_data CASCADE\"))\n", + " connection.commit()\n", "\n", - "# Load and clean CES data\n", "def process_ces_data(filepath):\n", " print(\"Processing CalEnviroScreen data...\")\n", " gdf = gpd.read_file(filepath)\n", @@ -216,9 +96,11 @@ " }\n", " ces_data = ces_data.rename(columns=column_map)\n", " \n", + " # Set the geometry column explicitly\n", + " ces_data = ces_data.set_geometry('geom')\n", + " \n", " return ces_data\n", "\n", - "# Load and clean CCI data\n", "def process_cci_data(filepath):\n", " print(\"Processing CCI project data...\")\n", " df = pd.read_csv(filepath, low_memory=False)\n", @@ -235,12 +117,8 @@ " (df['date_operational'] <= '2024-12-31')\n", " ]\n", " \n", - " # Process project partners into array\n", + " # Process project partners\n", " df['project_partners'] = df['project_partners'].fillna('')\n", - " df['project_partners'] = df['project_partners'].apply(\n", - " lambda x: '{' + ','.join([p.strip() for p in str(x).split(',')]) + '}'\n", - " if x else '{}'\n", - " )\n", " \n", " # Select and prepare relevant columns\n", " cci_data = df[[\n", @@ -267,33 +145,90 @@ " \n", " return cci_data\n", "\n", - "# Main loading function\n", + "def create_views(engine):\n", + " with engine.connect() as connection:\n", + " # Project efficiency view\n", + " connection.execute(text(\"\"\"\n", + " CREATE VIEW project_efficiency AS\n", + " SELECT \n", + " p.project_id,\n", + " p.program_name,\n", + " p.agency_name,\n", + " p.total_funding,\n", + " p.ghg_reduction,\n", + " p.dac_benefit,\n", + " CASE \n", + " WHEN p.total_funding > 0 THEN p.ghg_reduction / p.total_funding \n", + " ELSE 0 \n", + " END as ghg_efficiency,\n", + " c.ci_score as ces_score,\n", + " CASE \n", + " WHEN p.project_partners = '' THEN 0\n", + " ELSE (length(p.project_partners) - length(replace(p.project_partners, ',', '')) + 1)\n", + " END as partner_count\n", + " FROM cci_projects p\n", + " LEFT JOIN ces_data c ON cast(p.census_tract as text) = cast(c.tract_id as text)\n", + " \"\"\"))\n", + " \n", + " # Regional collaboration view\n", + " connection.execute(text(\"\"\"\n", + " CREATE VIEW regional_collaboration AS\n", + " SELECT \n", + " county,\n", + " COUNT(DISTINCT project_id) as project_count,\n", + " AVG(CASE \n", + " WHEN project_partners = '' THEN 0\n", + " ELSE (length(project_partners) - length(replace(project_partners, ',', '')) + 1)\n", + " END) as avg_partners,\n", + " SUM(total_funding) as total_funding,\n", + " SUM(CASE WHEN dac_benefit THEN 1 ELSE 0 END)::FLOAT / COUNT(*) as dac_rate,\n", + " SUM(ghg_reduction) / NULLIF(SUM(total_funding), 0) as region_efficiency\n", + " FROM cci_projects\n", + " GROUP BY county\n", + " \"\"\"))\n", + " \n", + " connection.commit()\n", + "\n", "def load_data_to_db():\n", " try:\n", " engine = create_db_engine()\n", " \n", + " # Drop existing objects first\n", + " print(\"Dropping existing database objects...\")\n", + " drop_existing_objects(engine)\n", + " \n", " # Load CES data\n", - " ces_data = process_ces_data('california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp')\n", + " ces_data = process_ces_data('/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp')\n", " print(\"Loading CES data to database...\")\n", " ces_data.to_postgis('ces_data', engine, if_exists='replace', index=False)\n", " \n", " # Load CCI data\n", - " cci_data = process_cci_data('data_raw/cci_programs_data.csv')\n", + " cci_data = process_cci_data('/home/dadams/Repos/california_equity_git/data_raw/cci_programs_data.csv')\n", " print(\"Loading CCI data to database...\")\n", " cci_data.to_sql('cci_projects', engine, if_exists='replace', index=False)\n", " \n", + " print(\"Creating analysis views...\")\n", + " create_views(engine)\n", + " \n", " print(\"Data loading completed successfully!\")\n", " \n", - " # Return sample counts for verification\n", " return {\n", " 'ces_records': len(ces_data),\n", " 'cci_records': len(cci_data)\n", " }\n", " \n", " except Exception as e:\n", - " print(f\"Error loading data: {str(e)}\")\n", + " print(f\"Error loading data: {e}\")\n", " raise\n", "\n", + "# Test database connection\n", + "try:\n", + " engine = create_db_engine()\n", + " with engine.connect() as conn:\n", + " print(\"Successfully connected to the database!\")\n", + "except Exception as e:\n", + " print(f\"Error connecting to the database: {str(e)}\")\n", + "\n", "# Execute loading\n", "record_counts = load_data_to_db()\n", "print(\"\\nRecord counts:\")\n", diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.cpg b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.cpg similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.cpg rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.cpg diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.dbf b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.dbf similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.dbf rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.dbf diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.prj b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.prj similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.prj rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.prj diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.sbn b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.sbn similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.sbn rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.sbn diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.sbx b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.sbx similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.sbx rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.sbx diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp.xml b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp.xml similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp.xml rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shp.xml diff --git a/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shx b/california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shx similarity index 100% rename from california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shx rename to california_enviroscreen/calif_enviroscreen_shape/CES4_Final_Shapefile.shx