This commit is contained in:
2025-01-26 19:24:23 -08:00
parent 32cd60e92b
commit d1dde0dbc6
4155 changed files with 29170 additions and 216373 deletions

View File

@@ -1,321 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# calif equity\n",
"## Looking at collaboration components \n",
"Date: 2024-12-19"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Importing the necessary libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"## set directory\n",
"import os\n",
"os.chdir('/home/dadams/Repos/california_equity_git')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Project IDNumber', 'Reporting Cycle Name', 'Agency Name',\n",
" 'Program Name', 'Program Description', 'Sub Program Name',\n",
" 'Record Type', 'Project Name', 'Project Type', 'Project Description',\n",
" ...\n",
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
" dtype='object', length=127)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"def debug_shapefile(shapefile_path):\n",
" \"\"\"\n",
" Debug shapefile reading issues by checking file existence and required components.\n",
" \n",
" Args:\n",
" shapefile_path (str): Path to the .shp file\n",
" \n",
" Returns:\n",
" dict: Dictionary containing debug information\n",
" \"\"\"\n",
" base_path = Path(shapefile_path).parent\n",
" file_name = Path(shapefile_path).stem\n",
" \n",
" # Required shapefile components\n",
" required_extensions = ['.shp', '.shx', '.dbf']\n",
" optional_extensions = ['.prj', '.cpg', '.sbn', '.sbx']\n",
" \n",
" debug_info = {\n",
" 'file_exists': os.path.exists(shapefile_path),\n",
" 'parent_dir_exists': os.path.exists(base_path),\n",
" 'components': {},\n",
" 'file_sizes': {},\n",
" 'readable': False\n",
" }\n",
" \n",
" # Check for all component files\n",
" for ext in required_extensions + optional_extensions:\n",
" full_path = base_path / f\"{file_name}{ext}\"\n",
" exists = full_path.exists()\n",
" debug_info['components'][ext] = exists\n",
" if exists:\n",
" debug_info['file_sizes'][ext] = os.path.getsize(full_path)\n",
" \n",
" # Try reading with explicit driver\n",
" try:\n",
" gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n",
" debug_info['readable'] = True\n",
" debug_info['num_features'] = len(gdf)\n",
" except Exception as e:\n",
" debug_info['error'] = str(e)\n",
" \n",
" return debug_info\n",
"\n",
"# Usage example\n",
"shapefile_path = \"/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\"\n",
"debug_results = debug_shapefile(shapefile_path)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Checking .shp file: True\n",
"File permissions: 644\n",
"Checking .shx file: True\n",
"File permissions: 644\n",
"Checking .dbf file: True\n",
"File permissions: 644\n"
]
}
],
"source": [
"import os\n",
"from pathlib import Path\n",
"\n",
"# Define base path and file name\n",
"base_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape'\n",
"file_name = 'CES4 Final Shapefile'\n",
"\n",
"# Check for existence of all required files\n",
"required_files = ['.shp', '.shx', '.dbf']\n",
"for ext in required_files:\n",
" full_path = os.path.join(base_path, file_name + ext)\n",
" print(f\"Checking {ext} file: {os.path.exists(full_path)}\")\n",
" if os.path.exists(full_path):\n",
" print(f\"File permissions: {oct(os.stat(full_path).st_mode)[-3:]}\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Approach 1 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.\n",
"Approach 2 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.\n",
"Directory contents: ['CES4 Final Shapefile.sbx', 'CES4 Final Shapefile.shp', 'CES4 Final Shapefile.sbn', 'CES4 Final Shapefile.shp.xml', 'CES4 Final Shapefile.shx', 'CES4 Final Shapefile.prj', 'CES4 Final Shapefile.cpg', 'CES4 Final Shapefile.dbf']\n"
]
}
],
"source": [
"import geopandas as gpd\n",
"\n",
"# Approach 1: Using absolute path with normalized separators\n",
"shapefile_path = Path(base_path) / f\"{file_name}.shp\"\n",
"try:\n",
" gdf = gpd.read_file(shapefile_path)\n",
"except Exception as e:\n",
" print(f\"Approach 1 failed: {e}\")\n",
"\n",
"# Approach 2: Using explicit ESRI Shapefile driver\n",
"try:\n",
" gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n",
"except Exception as e:\n",
" print(f\"Approach 2 failed: {e}\")\n",
"\n",
"# Approach 3: Check if the directory is readable\n",
"try:\n",
" print(f\"Directory contents: {os.listdir(base_path)}\")\n",
"except Exception as e:\n",
" print(f\"Cannot list directory: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"GeoPandas version: 1.0.1\n",
"Fiona version: 1.10.1\n"
]
}
],
"source": [
"import geopandas as gpd\n",
"import fiona\n",
"print(f\"GeoPandas version: {gpd.__version__}\")\n",
"print(f\"Fiona version: {fiona.__version__}\")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"ename": "DataSourceError",
"evalue": "'/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mDataSourceError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[30], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Load the shapefile\u001b[39;00m\n\u001b[1;32m 4\u001b[0m shapefile_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 5\u001b[0m gdf \u001b[38;5;241m=\u001b[39m \u001b[43mgpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshapefile_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Print the head of the GeoDataFrame\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(gdf\u001b[38;5;241m.\u001b[39mhead())\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:294\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, columns, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 291\u001b[0m from_bytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyogrio\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read_file_pyogrio\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrows\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrows\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiona\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mapi\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mis_file_like(filename):\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:547\u001b[0m, in \u001b[0;36m_read_file_pyogrio\u001b[0;34m(path_or_bytes, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m 538\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 539\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keywords are deprecated, and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill be removed in a future release. You can use the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keyword \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 543\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,\n\u001b[1;32m 544\u001b[0m )\n\u001b[1;32m 545\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpyogrio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_bytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/geopandas.py:265\u001b[0m, in \u001b[0;36mread_dataframe\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_arrow:\n\u001b[1;32m 261\u001b[0m \u001b[38;5;66;03m# For arrow, datetimes are read as is.\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;66;03m# For numpy IO, datetimes are read as string values to preserve timezone info\u001b[39;00m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;66;03m# as numpy does not directly support timezones.\u001b[39;00m\n\u001b[1;32m 264\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatetime_as_string\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 265\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mread_func\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgdal_force_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 273\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfid_as_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_arrow:\n\u001b[1;32m 285\u001b[0m meta, table \u001b[38;5;241m=\u001b[39m result\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/raw.py:198\u001b[0m, in \u001b[0;36mread\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Read OGR data source into numpy arrays.\u001b[39;00m\n\u001b[1;32m 60\u001b[0m \n\u001b[1;32m 61\u001b[0m \u001b[38;5;124;03mIMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 194\u001b[0m \n\u001b[1;32m 195\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 196\u001b[0m dataset_kwargs \u001b[38;5;241m=\u001b[39m _preprocess_options_key_value(kwargs) \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m--> 198\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mogr_read\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43mget_vsi_path_or_buffer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_mask_to_wkb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_fids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatetime_as_string\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdatetime_as_string\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:1240\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_read\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:216\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_open\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mDataSourceError\u001b[0m: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
]
}
],
"source": [
"import geopandas as gpd\n",
"\n",
"# Load the shapefile\n",
"shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'\n",
"gdf = gpd.read_file(shapefile_path)\n",
"\n",
"# Print the head of the GeoDataFrame\n",
"print(gdf.head())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total GGRF Funding: $8.13B\n",
"Number of projects: 131428\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Basic cleaning\n",
"data['Date Operational'] = pd.to_datetime(data['Date Operational'])\n",
"data = data[\n",
" (data['Date Operational'] >= '2010-01-01') & \n",
" (data['Date Operational'] <= '2024-11-01')\n",
"].copy()\n",
"\n",
"# Remove rows with no GGRF funding\n",
"data = data.dropna(subset=['Total Program GGRFFunding'])\n",
"\n",
"# Add derived columns\n",
"data['Year'] = data['Date Operational'].dt.year\n",
"data['is_multi_county'] = data['County'].str.contains(',', na=False)\n",
"data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1\n",
"\n",
"# Quick validation\n",
"print(f\"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B\")\n",
"print(f\"Number of projects: {len(data)}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,326 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pandas\n",
" Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n",
"Collecting geopandas\n",
" Using cached geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)\n",
"Collecting sqlalchemy\n",
" Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
"Collecting psycopg2-binary\n",
" Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
"Requirement already satisfied: numpy>=1.26.0 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.2.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
"Collecting pytz>=2020.1 (from pandas)\n",
" Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n",
"Collecting tzdata>=2022.7 (from pandas)\n",
" Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
"Collecting pyogrio>=0.7.2 (from geopandas)\n",
" Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n",
"Requirement already satisfied: packaging in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from geopandas) (24.2)\n",
"Collecting pyproj>=3.3.0 (from geopandas)\n",
" Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n",
"Collecting shapely>=2.0.0 (from geopandas)\n",
" Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)\n",
"Collecting greenlet!=0.4.17 (from sqlalchemy)\n",
" Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
"Collecting typing-extensions>=4.6.0 (from sqlalchemy)\n",
" Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
"Collecting certifi (from pyogrio>=0.7.2->geopandas)\n",
" Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n",
"Requirement already satisfied: six>=1.5 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
"Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n",
"Using cached geopandas-1.0.1-py3-none-any.whl (323 kB)\n",
"Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n",
"Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
"Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (613 kB)\n",
"Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl (24.0 MB)\n",
"Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.5 MB)\n",
"Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n",
"Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)\n",
"Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
"Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)\n",
"Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n",
"Installing collected packages: pytz, tzdata, typing-extensions, shapely, psycopg2-binary, greenlet, certifi, sqlalchemy, pyproj, pyogrio, pandas, geopandas\n",
"Successfully installed certifi-2024.12.14 geopandas-1.0.1 greenlet-3.1.1 pandas-2.2.3 psycopg2-binary-2.9.10 pyogrio-0.10.0 pyproj-3.7.0 pytz-2024.2 shapely-2.0.6 sqlalchemy-2.0.37 typing-extensions-4.12.2 tzdata-2025.1\n"
]
}
],
"source": [
"!pip install pandas geopandas sqlalchemy psycopg2-binary"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"A module that was compiled using NumPy 1.x cannot be run in\n",
"NumPy 2.2.2 as it may crash. To support both 1.x and 2.x\n",
"versions of NumPy, modules must be compiled with NumPy 2.0.\n",
"Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n",
"\n",
"If you are a user of the module, the easiest solution will be to\n",
"downgrade to 'numpy<2' or try to upgrade the affected module.\n",
"We expect that some modules will need time to support NumPy 2.\n",
"\n",
"Traceback (most recent call last): File \"<frozen runpy>\", line 198, in _run_module_as_main\n",
" File \"<frozen runpy>\", line 88, in _run_code\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel_launcher.py\", line 18, in <module>\n",
" app.launch_new_instance()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n",
" app.start()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelapp.py\", line 739, in start\n",
" self.io_loop.start()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/tornado/platform/asyncio.py\", line 205, in start\n",
" self.asyncio_loop.run_forever()\n",
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 641, in run_forever\n",
" self._run_once()\n",
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 1986, in _run_once\n",
" handle._run()\n",
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/events.py\", line 88, in _run\n",
" self._context.run(self._callback, *self._args)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n",
" await self.process_one()\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n",
" await dispatch(*args)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n",
" await result\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n",
" await super().execute_request(stream, ident, parent)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n",
" reply_content = await reply_content\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n",
" res = shell.run_cell(\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n",
" return super().run_cell(*args, **kwargs)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3075, in run_cell\n",
" result = self._run_cell(\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3130, in _run_cell\n",
" result = runner(coro)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n",
" coro.send(None)\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3334, in run_cell_async\n",
" has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3517, in run_ast_nodes\n",
" if await self.run_code(code, result, async_=asy):\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3577, in run_code\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
" File \"/tmp/ipykernel_794405/3254404226.py\", line 2, in <module>\n",
" import geopandas as gpd\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py\", line 1, in <module>\n",
" from geopandas._config import options\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 109, in <module>\n",
" default_value=_default_use_pygeos(),\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 95, in _default_use_pygeos\n",
" import geopandas._compat as compat\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py\", line 9, in <module>\n",
" import shapely\n",
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py\", line 1, in <module>\n",
" from shapely.lib import GEOSException # NOQA\n"
]
},
{
"ename": "AttributeError",
"evalue": "_ARRAY_API not found",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;31mAttributeError\u001b[0m: _ARRAY_API not found"
]
},
{
"ename": "ImportError",
"evalue": "numpy.core.multiarray failed to import",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msqlalchemy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m create_engine\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m options\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeoseries\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoSeries\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeodataframe\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoDataFrame\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:109\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 104\u001b[0m compat\u001b[38;5;241m.\u001b[39mset_use_pygeos(value)\n\u001b[1;32m 107\u001b[0m use_pygeos \u001b[38;5;241m=\u001b[39m Option(\n\u001b[1;32m 108\u001b[0m key\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_pygeos\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m--> 109\u001b[0m default_value\u001b[38;5;241m=\u001b[39m\u001b[43m_default_use_pygeos\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 110\u001b[0m doc\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhether to use PyGEOS to speed up spatial operations. The default is True \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif PyGEOS is installed, and follows the USE_PYGEOS environment variable \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif set.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 114\u001b[0m ),\n\u001b[1;32m 115\u001b[0m validator\u001b[38;5;241m=\u001b[39m_validate_bool,\n\u001b[1;32m 116\u001b[0m callback\u001b[38;5;241m=\u001b[39m_callback_use_pygeos,\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_validate_io_engine\u001b[39m(value):\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:95\u001b[0m, in \u001b[0;36m_default_use_pygeos\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_default_use_pygeos\u001b[39m():\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m compat\u001b[38;5;241m.\u001b[39mUSE_PYGEOS\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py:9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeos\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# pandas compat\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n",
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GEOSException \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Geometry \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m geos_version, geos_version_string \u001b[38;5;66;03m# NOQA\u001b[39;00m\n",
"\u001b[0;31mImportError\u001b[0m: numpy.core.multiarray failed to import"
]
}
],
"source": [
"import pandas as pd\n",
"import geopandas as gpd\n",
"from sqlalchemy import create_engine\n",
"import numpy as np\n",
"from datetime import datetime\n",
"\n",
"import os\n",
"\n",
"# database variables\n",
"DB_USER = os.getenv('DB_USER')\n",
"DB_PASSWORD = os.getenv('DB_PASSWORD')\n",
"DB_HOST = os.getenv('DB_HOST')\n",
"DB_PORT = os.getenv('DB_PORT')\n",
"\n",
"\n",
"# Create database connection\n",
"def create_db_engine():\n",
" return create_engine('postgresql://' + DB_USER + ':' + DB_PASSWORD + '@' + DB_HOST + ':' + DB_PORT + '/cci_db')\n",
"\n",
"# Load and clean CES data\n",
"def process_ces_data(filepath):\n",
" print(\"Processing CalEnviroScreen data...\")\n",
" gdf = gpd.read_file(filepath)\n",
" \n",
" # Clean and standardize column names\n",
" gdf.columns = [col.lower().replace(' ', '_') for col in gdf.columns]\n",
" \n",
" # Convert tract ID to string and ensure it's clean\n",
" gdf['tract'] = gdf['tract'].astype(str).str.strip()\n",
" \n",
" # Select and rename relevant columns\n",
" ces_data = gdf[['tract', 'zip', 'county', 'approxloc', 'totpop19',\n",
" 'ciscore', 'ciscorep', 'ozone', 'ozonep', 'pm2_5',\n",
" 'pm2_5_p', 'drinkwat', 'drinkwatp', 'poverty',\n",
" 'povertyp', 'unempl', 'unemplp', 'housburd',\n",
" 'housburdp', 'geometry']]\n",
" \n",
" # Rename columns to match database schema\n",
" column_map = {\n",
" 'tract': 'tract_id',\n",
" 'zip': 'zip_code',\n",
" 'approxloc': 'approx_loc',\n",
" 'totpop19': 'total_pop_19',\n",
" 'ciscore': 'ci_score',\n",
" 'ciscorep': 'ci_score_pctl',\n",
" 'pm2_5': 'pm25',\n",
" 'pm2_5_p': 'pm25_pctl',\n",
" 'drinkwat': 'drinking_water',\n",
" 'drinkwatp': 'drinking_water_pctl',\n",
" 'housburd': 'housing_burden',\n",
" 'housburdp': 'housing_burden_pctl',\n",
" 'geometry': 'geom'\n",
" }\n",
" ces_data = ces_data.rename(columns=column_map)\n",
" \n",
" return ces_data\n",
"\n",
"# Load and clean CCI data\n",
"def process_cci_data(filepath):\n",
" print(\"Processing CCI project data...\")\n",
" df = pd.read_csv(filepath, low_memory=False)\n",
" \n",
" # Clean column names\n",
" df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n",
" \n",
" # Convert date columns\n",
" df['date_operational'] = pd.to_datetime(df['date_operational'])\n",
" \n",
" # Filter date range\n",
" df = df[\n",
" (df['date_operational'] >= '2015-01-01') &\n",
" (df['date_operational'] <= '2024-12-31')\n",
" ]\n",
" \n",
" # Process project partners into array\n",
" df['project_partners'] = df['project_partners'].fillna('')\n",
" df['project_partners'] = df['project_partners'].apply(\n",
" lambda x: '{' + ','.join([p.strip() for p in str(x).split(',')]) + '}'\n",
" if x else '{}'\n",
" )\n",
" \n",
" # Select and prepare relevant columns\n",
" cci_data = df[[\n",
" 'project_idnumber', 'reporting_cycle_name', 'agency_name',\n",
" 'program_name', 'program_description', 'project_name',\n",
" 'project_type', 'project_description', 'date_operational',\n",
" 'census_tract', 'county', 'total_program_ggrffunding',\n",
" 'total_project_ghgreductions', 'is_benefit_disadvantaged_communities',\n",
" 'project_partners'\n",
" ]]\n",
" \n",
" # Rename columns to match schema\n",
" column_map = {\n",
" 'project_idnumber': 'project_id',\n",
" 'reporting_cycle_name': 'reporting_cycle',\n",
" 'total_program_ggrffunding': 'total_funding',\n",
" 'total_project_ghgreductions': 'ghg_reduction',\n",
" 'is_benefit_disadvantaged_communities': 'dac_benefit'\n",
" }\n",
" cci_data = cci_data.rename(columns=column_map)\n",
" \n",
" # Convert boolean columns\n",
" cci_data['dac_benefit'] = cci_data['dac_benefit'].astype(bool)\n",
" \n",
" return cci_data\n",
"\n",
"# Main loading function\n",
"def load_data_to_db():\n",
" try:\n",
" engine = create_db_engine()\n",
" \n",
" # Load CES data\n",
" ces_data = process_ces_data('california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp')\n",
" print(\"Loading CES data to database...\")\n",
" ces_data.to_postgis('ces_data', engine, if_exists='replace', index=False)\n",
" \n",
" # Load CCI data\n",
" cci_data = process_cci_data('data_raw/cci_programs_data.csv')\n",
" print(\"Loading CCI data to database...\")\n",
" cci_data.to_sql('cci_projects', engine, if_exists='replace', index=False)\n",
" \n",
" print(\"Data loading completed successfully!\")\n",
" \n",
" # Return sample counts for verification\n",
" return {\n",
" 'ces_records': len(ces_data),\n",
" 'cci_records': len(cci_data)\n",
" }\n",
" \n",
" except Exception as e:\n",
" print(f\"Error loading data: {str(e)}\")\n",
" raise\n",
"\n",
"# Execute loading\n",
"record_counts = load_data_to_db()\n",
"print(\"\\nRecord counts:\")\n",
"print(f\"CES data: {record_counts['ces_records']} records\")\n",
"print(f\"CCI projects: {record_counts['cci_records']} records\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long