{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# calif equity\n", "## Looking at collaboration components \n", "Date: 2024-12-19" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Importing the necessary libraries\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import os" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "## set directory\n", "import os\n", "os.chdir('/home/dadams/Repos/california_equity_git')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Project IDNumber', 'Reporting Cycle Name', 'Agency Name',\n", " 'Program Name', 'Program Description', 'Sub Program Name',\n", " 'Record Type', 'Project Name', 'Project Type', 'Project Description',\n", " ...\n", " 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n", " 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n", " 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n", " 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n", " dtype='object', length=127)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.columns" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "import geopandas as gpd\n", "import os\n", "from pathlib import Path\n", "\n", "def debug_shapefile(shapefile_path):\n", " \"\"\"\n", " Debug shapefile reading issues by checking file existence and required components.\n", " \n", " Args:\n", " shapefile_path (str): Path to the .shp file\n", " \n", " Returns:\n", " dict: Dictionary containing debug information\n", " \"\"\"\n", " base_path = Path(shapefile_path).parent\n", " file_name = Path(shapefile_path).stem\n", " \n", " # Required shapefile components\n", " required_extensions = ['.shp', '.shx', '.dbf']\n", " optional_extensions = ['.prj', '.cpg', '.sbn', '.sbx']\n", " \n", " debug_info = {\n", " 'file_exists': os.path.exists(shapefile_path),\n", " 'parent_dir_exists': os.path.exists(base_path),\n", " 'components': {},\n", " 'file_sizes': {},\n", " 'readable': False\n", " }\n", " \n", " # Check for all component files\n", " for ext in required_extensions + optional_extensions:\n", " full_path = base_path / f\"{file_name}{ext}\"\n", " exists = full_path.exists()\n", " debug_info['components'][ext] = exists\n", " if exists:\n", " debug_info['file_sizes'][ext] = os.path.getsize(full_path)\n", " \n", " # Try reading with explicit driver\n", " try:\n", " gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n", " debug_info['readable'] = True\n", " debug_info['num_features'] = len(gdf)\n", " except Exception as e:\n", " debug_info['error'] = str(e)\n", " \n", " return debug_info\n", "\n", "# Usage example\n", "shapefile_path = \"/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\"\n", "debug_results = debug_shapefile(shapefile_path)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Checking .shp file: True\n", "File permissions: 644\n", "Checking .shx file: True\n", "File permissions: 644\n", "Checking .dbf file: True\n", "File permissions: 644\n" ] } ], "source": [ "import os\n", "from pathlib import Path\n", "\n", "# Define base path and file name\n", "base_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape'\n", "file_name = 'CES4 Final Shapefile'\n", "\n", "# Check for existence of all required files\n", "required_files = ['.shp', '.shx', '.dbf']\n", "for ext in required_files:\n", " full_path = os.path.join(base_path, file_name + ext)\n", " print(f\"Checking {ext} file: {os.path.exists(full_path)}\")\n", " if os.path.exists(full_path):\n", " print(f\"File permissions: {oct(os.stat(full_path).st_mode)[-3:]}\")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Approach 1 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with ':', e.g. 'CSV:path'.\n", "Approach 2 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with ':', e.g. 'CSV:path'.\n", "Directory contents: ['CES4 Final Shapefile.sbx', 'CES4 Final Shapefile.shp', 'CES4 Final Shapefile.sbn', 'CES4 Final Shapefile.shp.xml', 'CES4 Final Shapefile.shx', 'CES4 Final Shapefile.prj', 'CES4 Final Shapefile.cpg', 'CES4 Final Shapefile.dbf']\n" ] } ], "source": [ "import geopandas as gpd\n", "\n", "# Approach 1: Using absolute path with normalized separators\n", "shapefile_path = Path(base_path) / f\"{file_name}.shp\"\n", "try:\n", " gdf = gpd.read_file(shapefile_path)\n", "except Exception as e:\n", " print(f\"Approach 1 failed: {e}\")\n", "\n", "# Approach 2: Using explicit ESRI Shapefile driver\n", "try:\n", " gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n", "except Exception as e:\n", " print(f\"Approach 2 failed: {e}\")\n", "\n", "# Approach 3: Check if the directory is readable\n", "try:\n", " print(f\"Directory contents: {os.listdir(base_path)}\")\n", "except Exception as e:\n", " print(f\"Cannot list directory: {e}\")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GeoPandas version: 1.0.1\n", "Fiona version: 1.10.1\n" ] } ], "source": [ "import geopandas as gpd\n", "import fiona\n", "print(f\"GeoPandas version: {gpd.__version__}\")\n", "print(f\"Fiona version: {fiona.__version__}\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "ename": "DataSourceError", "evalue": "'/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with ':', e.g. 'CSV:path'.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mDataSourceError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[30], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Load the shapefile\u001b[39;00m\n\u001b[1;32m 4\u001b[0m shapefile_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 5\u001b[0m gdf \u001b[38;5;241m=\u001b[39m \u001b[43mgpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshapefile_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Print the head of the GeoDataFrame\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(gdf\u001b[38;5;241m.\u001b[39mhead())\n", "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:294\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, columns, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 291\u001b[0m from_bytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyogrio\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read_file_pyogrio\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrows\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrows\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiona\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mapi\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mis_file_like(filename):\n", "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:547\u001b[0m, in \u001b[0;36m_read_file_pyogrio\u001b[0;34m(path_or_bytes, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m 538\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 539\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keywords are deprecated, and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill be removed in a future release. You can use the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keyword \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 543\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,\n\u001b[1;32m 544\u001b[0m )\n\u001b[1;32m 545\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpyogrio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_bytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/geopandas.py:265\u001b[0m, in \u001b[0;36mread_dataframe\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_arrow:\n\u001b[1;32m 261\u001b[0m \u001b[38;5;66;03m# For arrow, datetimes are read as is.\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;66;03m# For numpy IO, datetimes are read as string values to preserve timezone info\u001b[39;00m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;66;03m# as numpy does not directly support timezones.\u001b[39;00m\n\u001b[1;32m 264\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatetime_as_string\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 265\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mread_func\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgdal_force_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 273\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfid_as_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_arrow:\n\u001b[1;32m 285\u001b[0m meta, table \u001b[38;5;241m=\u001b[39m result\n", "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/raw.py:198\u001b[0m, in \u001b[0;36mread\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Read OGR data source into numpy arrays.\u001b[39;00m\n\u001b[1;32m 60\u001b[0m \n\u001b[1;32m 61\u001b[0m \u001b[38;5;124;03mIMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 194\u001b[0m \n\u001b[1;32m 195\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 196\u001b[0m dataset_kwargs \u001b[38;5;241m=\u001b[39m _preprocess_options_key_value(kwargs) \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m--> 198\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mogr_read\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43mget_vsi_path_or_buffer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_mask_to_wkb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_fids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatetime_as_string\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdatetime_as_string\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:1240\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_read\u001b[0;34m()\u001b[0m\n", "File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:216\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_open\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mDataSourceError\u001b[0m: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with ':', e.g. 'CSV:path'." ] } ], "source": [ "import geopandas as gpd\n", "\n", "# Load the shapefile\n", "shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'\n", "gdf = gpd.read_file(shapefile_path)\n", "\n", "# Print the head of the GeoDataFrame\n", "print(gdf.head())" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total GGRF Funding: $8.13B\n", "Number of projects: 131428\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "# Basic cleaning\n", "data['Date Operational'] = pd.to_datetime(data['Date Operational'])\n", "data = data[\n", " (data['Date Operational'] >= '2010-01-01') & \n", " (data['Date Operational'] <= '2024-11-01')\n", "].copy()\n", "\n", "# Remove rows with no GGRF funding\n", "data = data.dropna(subset=['Total Program GGRFFunding'])\n", "\n", "# Add derived columns\n", "data['Year'] = data['Date Operational'].dt.year\n", "data['is_multi_county'] = data['County'].str.contains(',', na=False)\n", "data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1\n", "\n", "# Quick validation\n", "print(f\"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B\")\n", "print(f\"Number of projects: {len(data)}\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }