venv
This commit is contained in:
@@ -1,321 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# calif equity\n",
|
||||
"## Looking at collaboration components \n",
|
||||
"Date: 2024-12-19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Importing the necessary libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## set directory\n",
|
||||
"import os\n",
|
||||
"os.chdir('/home/dadams/Repos/california_equity_git')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['Project IDNumber', 'Reporting Cycle Name', 'Agency Name',\n",
|
||||
" 'Program Name', 'Program Description', 'Sub Program Name',\n",
|
||||
" 'Record Type', 'Project Name', 'Project Type', 'Project Description',\n",
|
||||
" ...\n",
|
||||
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
|
||||
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
|
||||
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
|
||||
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
|
||||
" dtype='object', length=127)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"def debug_shapefile(shapefile_path):\n",
|
||||
" \"\"\"\n",
|
||||
" Debug shapefile reading issues by checking file existence and required components.\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" shapefile_path (str): Path to the .shp file\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" dict: Dictionary containing debug information\n",
|
||||
" \"\"\"\n",
|
||||
" base_path = Path(shapefile_path).parent\n",
|
||||
" file_name = Path(shapefile_path).stem\n",
|
||||
" \n",
|
||||
" # Required shapefile components\n",
|
||||
" required_extensions = ['.shp', '.shx', '.dbf']\n",
|
||||
" optional_extensions = ['.prj', '.cpg', '.sbn', '.sbx']\n",
|
||||
" \n",
|
||||
" debug_info = {\n",
|
||||
" 'file_exists': os.path.exists(shapefile_path),\n",
|
||||
" 'parent_dir_exists': os.path.exists(base_path),\n",
|
||||
" 'components': {},\n",
|
||||
" 'file_sizes': {},\n",
|
||||
" 'readable': False\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" # Check for all component files\n",
|
||||
" for ext in required_extensions + optional_extensions:\n",
|
||||
" full_path = base_path / f\"{file_name}{ext}\"\n",
|
||||
" exists = full_path.exists()\n",
|
||||
" debug_info['components'][ext] = exists\n",
|
||||
" if exists:\n",
|
||||
" debug_info['file_sizes'][ext] = os.path.getsize(full_path)\n",
|
||||
" \n",
|
||||
" # Try reading with explicit driver\n",
|
||||
" try:\n",
|
||||
" gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n",
|
||||
" debug_info['readable'] = True\n",
|
||||
" debug_info['num_features'] = len(gdf)\n",
|
||||
" except Exception as e:\n",
|
||||
" debug_info['error'] = str(e)\n",
|
||||
" \n",
|
||||
" return debug_info\n",
|
||||
"\n",
|
||||
"# Usage example\n",
|
||||
"shapefile_path = \"/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\"\n",
|
||||
"debug_results = debug_shapefile(shapefile_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Checking .shp file: True\n",
|
||||
"File permissions: 644\n",
|
||||
"Checking .shx file: True\n",
|
||||
"File permissions: 644\n",
|
||||
"Checking .dbf file: True\n",
|
||||
"File permissions: 644\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"# Define base path and file name\n",
|
||||
"base_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape'\n",
|
||||
"file_name = 'CES4 Final Shapefile'\n",
|
||||
"\n",
|
||||
"# Check for existence of all required files\n",
|
||||
"required_files = ['.shp', '.shx', '.dbf']\n",
|
||||
"for ext in required_files:\n",
|
||||
" full_path = os.path.join(base_path, file_name + ext)\n",
|
||||
" print(f\"Checking {ext} file: {os.path.exists(full_path)}\")\n",
|
||||
" if os.path.exists(full_path):\n",
|
||||
" print(f\"File permissions: {oct(os.stat(full_path).st_mode)[-3:]}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Approach 1 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.\n",
|
||||
"Approach 2 failed: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.\n",
|
||||
"Directory contents: ['CES4 Final Shapefile.sbx', 'CES4 Final Shapefile.shp', 'CES4 Final Shapefile.sbn', 'CES4 Final Shapefile.shp.xml', 'CES4 Final Shapefile.shx', 'CES4 Final Shapefile.prj', 'CES4 Final Shapefile.cpg', 'CES4 Final Shapefile.dbf']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"\n",
|
||||
"# Approach 1: Using absolute path with normalized separators\n",
|
||||
"shapefile_path = Path(base_path) / f\"{file_name}.shp\"\n",
|
||||
"try:\n",
|
||||
" gdf = gpd.read_file(shapefile_path)\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Approach 1 failed: {e}\")\n",
|
||||
"\n",
|
||||
"# Approach 2: Using explicit ESRI Shapefile driver\n",
|
||||
"try:\n",
|
||||
" gdf = gpd.read_file(shapefile_path, driver='ESRI Shapefile')\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Approach 2 failed: {e}\")\n",
|
||||
"\n",
|
||||
"# Approach 3: Check if the directory is readable\n",
|
||||
"try:\n",
|
||||
" print(f\"Directory contents: {os.listdir(base_path)}\")\n",
|
||||
"except Exception as e:\n",
|
||||
" print(f\"Cannot list directory: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"GeoPandas version: 1.0.1\n",
|
||||
"Fiona version: 1.10.1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"import fiona\n",
|
||||
"print(f\"GeoPandas version: {gpd.__version__}\")\n",
|
||||
"print(f\"Fiona version: {fiona.__version__}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "DataSourceError",
|
||||
"evalue": "'/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mDataSourceError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[30], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Load the shapefile\u001b[39;00m\n\u001b[1;32m 4\u001b[0m shapefile_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 5\u001b[0m gdf \u001b[38;5;241m=\u001b[39m \u001b[43mgpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mshapefile_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Print the head of the GeoDataFrame\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(gdf\u001b[38;5;241m.\u001b[39mhead())\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:294\u001b[0m, in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, columns, rows, engine, **kwargs)\u001b[0m\n\u001b[1;32m 291\u001b[0m from_bytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyogrio\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read_file_pyogrio\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrows\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrows\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiona\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mapi\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mis_file_like(filename):\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/io/file.py:547\u001b[0m, in \u001b[0;36m_read_file_pyogrio\u001b[0;34m(path_or_bytes, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m 538\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 539\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore_fields\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keywords are deprecated, and \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill be removed in a future release. You can use the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m keyword \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 543\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,\n\u001b[1;32m 544\u001b[0m )\n\u001b[1;32m 545\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minclude_fields\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpyogrio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_bytes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/geopandas.py:265\u001b[0m, in \u001b[0;36mread_dataframe\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, fid_as_index, use_arrow, on_invalid, arrow_to_pandas_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_arrow:\n\u001b[1;32m 261\u001b[0m \u001b[38;5;66;03m# For arrow, datetimes are read as is.\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;66;03m# For numpy IO, datetimes are read as string values to preserve timezone info\u001b[39;00m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;66;03m# as numpy does not directly support timezones.\u001b[39;00m\n\u001b[1;32m 264\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatetime_as_string\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 265\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mread_func\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 266\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgdal_force_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 273\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfid_as_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_arrow:\n\u001b[1;32m 285\u001b[0m meta, table \u001b[38;5;241m=\u001b[39m result\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/raw.py:198\u001b[0m, in \u001b[0;36mread\u001b[0;34m(path_or_buffer, layer, encoding, columns, read_geometry, force_2d, skip_features, max_features, where, bbox, mask, fids, sql, sql_dialect, return_fids, datetime_as_string, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Read OGR data source into numpy arrays.\u001b[39;00m\n\u001b[1;32m 60\u001b[0m \n\u001b[1;32m 61\u001b[0m \u001b[38;5;124;03mIMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 194\u001b[0m \n\u001b[1;32m 195\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 196\u001b[0m dataset_kwargs \u001b[38;5;241m=\u001b[39m _preprocess_options_key_value(kwargs) \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m--> 198\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mogr_read\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43mget_vsi_path_or_buffer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mread_geometry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mread_geometry\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mskip_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_features\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbbox\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43mmask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_mask_to_wkb\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmask\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43mfids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[43m \u001b[49m\u001b[43msql\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43msql_dialect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msql_dialect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_fids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_fids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[43mdatetime_as_string\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdatetime_as_string\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:1240\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_read\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/pyogrio/_io.pyx:216\u001b[0m, in \u001b[0;36mpyogrio._io.ogr_open\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mDataSourceError\u001b[0m: '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp' not recognized as being in a supported file format. It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import geopandas as gpd\n",
|
||||
"\n",
|
||||
"# Load the shapefile\n",
|
||||
"shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'\n",
|
||||
"gdf = gpd.read_file(shapefile_path)\n",
|
||||
"\n",
|
||||
"# Print the head of the GeoDataFrame\n",
|
||||
"print(gdf.head())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Total GGRF Funding: $8.13B\n",
|
||||
"Number of projects: 131428\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Basic cleaning\n",
|
||||
"data['Date Operational'] = pd.to_datetime(data['Date Operational'])\n",
|
||||
"data = data[\n",
|
||||
" (data['Date Operational'] >= '2010-01-01') & \n",
|
||||
" (data['Date Operational'] <= '2024-11-01')\n",
|
||||
"].copy()\n",
|
||||
"\n",
|
||||
"# Remove rows with no GGRF funding\n",
|
||||
"data = data.dropna(subset=['Total Program GGRFFunding'])\n",
|
||||
"\n",
|
||||
"# Add derived columns\n",
|
||||
"data['Year'] = data['Date Operational'].dt.year\n",
|
||||
"data['is_multi_county'] = data['County'].str.contains(',', na=False)\n",
|
||||
"data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1\n",
|
||||
"\n",
|
||||
"# Quick validation\n",
|
||||
"print(f\"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B\")\n",
|
||||
"print(f\"Number of projects: {len(data)}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
326
analysis/database_setup.ipynb
Normal file
326
analysis/database_setup.ipynb
Normal file
@@ -0,0 +1,326 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting pandas\n",
|
||||
" Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)\n",
|
||||
"Collecting geopandas\n",
|
||||
" Using cached geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)\n",
|
||||
"Collecting sqlalchemy\n",
|
||||
" Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n",
|
||||
"Collecting psycopg2-binary\n",
|
||||
" Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
|
||||
"Requirement already satisfied: numpy>=1.26.0 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.2.2)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
|
||||
"Collecting pytz>=2020.1 (from pandas)\n",
|
||||
" Using cached pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n",
|
||||
"Collecting tzdata>=2022.7 (from pandas)\n",
|
||||
" Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
||||
"Collecting pyogrio>=0.7.2 (from geopandas)\n",
|
||||
" Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n",
|
||||
"Requirement already satisfied: packaging in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from geopandas) (24.2)\n",
|
||||
"Collecting pyproj>=3.3.0 (from geopandas)\n",
|
||||
" Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n",
|
||||
"Collecting shapely>=2.0.0 (from geopandas)\n",
|
||||
" Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)\n",
|
||||
"Collecting greenlet!=0.4.17 (from sqlalchemy)\n",
|
||||
" Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (3.8 kB)\n",
|
||||
"Collecting typing-extensions>=4.6.0 (from sqlalchemy)\n",
|
||||
" Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)\n",
|
||||
"Collecting certifi (from pyogrio>=0.7.2->geopandas)\n",
|
||||
" Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
|
||||
"Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)\n",
|
||||
"Using cached geopandas-1.0.1-py3-none-any.whl (323 kB)\n",
|
||||
"Using cached SQLAlchemy-2.0.37-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n",
|
||||
"Using cached psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
|
||||
"Using cached greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (613 kB)\n",
|
||||
"Using cached pyogrio-0.10.0-cp312-cp312-manylinux_2_28_x86_64.whl (24.0 MB)\n",
|
||||
"Using cached pyproj-3.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.5 MB)\n",
|
||||
"Using cached pytz-2024.2-py2.py3-none-any.whl (508 kB)\n",
|
||||
"Using cached shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)\n",
|
||||
"Using cached typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n",
|
||||
"Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)\n",
|
||||
"Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n",
|
||||
"Installing collected packages: pytz, tzdata, typing-extensions, shapely, psycopg2-binary, greenlet, certifi, sqlalchemy, pyproj, pyogrio, pandas, geopandas\n",
|
||||
"Successfully installed certifi-2024.12.14 geopandas-1.0.1 greenlet-3.1.1 pandas-2.2.3 psycopg2-binary-2.9.10 pyogrio-0.10.0 pyproj-3.7.0 pytz-2024.2 shapely-2.0.6 sqlalchemy-2.0.37 typing-extensions-4.12.2 tzdata-2025.1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install pandas geopandas sqlalchemy psycopg2-binary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"A module that was compiled using NumPy 1.x cannot be run in\n",
|
||||
"NumPy 2.2.2 as it may crash. To support both 1.x and 2.x\n",
|
||||
"versions of NumPy, modules must be compiled with NumPy 2.0.\n",
|
||||
"Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.\n",
|
||||
"\n",
|
||||
"If you are a user of the module, the easiest solution will be to\n",
|
||||
"downgrade to 'numpy<2' or try to upgrade the affected module.\n",
|
||||
"We expect that some modules will need time to support NumPy 2.\n",
|
||||
"\n",
|
||||
"Traceback (most recent call last): File \"<frozen runpy>\", line 198, in _run_module_as_main\n",
|
||||
" File \"<frozen runpy>\", line 88, in _run_code\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel_launcher.py\", line 18, in <module>\n",
|
||||
" app.launch_new_instance()\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/traitlets/config/application.py\", line 1075, in launch_instance\n",
|
||||
" app.start()\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelapp.py\", line 739, in start\n",
|
||||
" self.io_loop.start()\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/tornado/platform/asyncio.py\", line 205, in start\n",
|
||||
" self.asyncio_loop.run_forever()\n",
|
||||
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 641, in run_forever\n",
|
||||
" self._run_once()\n",
|
||||
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/base_events.py\", line 1986, in _run_once\n",
|
||||
" handle._run()\n",
|
||||
" File \"/home/dadams/miniconda3/lib/python3.12/asyncio/events.py\", line 88, in _run\n",
|
||||
" self._context.run(self._callback, *self._args)\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 545, in dispatch_queue\n",
|
||||
" await self.process_one()\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 534, in process_one\n",
|
||||
" await dispatch(*args)\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 437, in dispatch_shell\n",
|
||||
" await result\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 362, in execute_request\n",
|
||||
" await super().execute_request(stream, ident, parent)\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/kernelbase.py\", line 778, in execute_request\n",
|
||||
" reply_content = await reply_content\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py\", line 449, in do_execute\n",
|
||||
" res = shell.run_cell(\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell\n",
|
||||
" return super().run_cell(*args, **kwargs)\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3075, in run_cell\n",
|
||||
" result = self._run_cell(\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3130, in _run_cell\n",
|
||||
" result = runner(coro)\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/async_helpers.py\", line 128, in _pseudo_sync_runner\n",
|
||||
" coro.send(None)\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3334, in run_cell_async\n",
|
||||
" has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3517, in run_ast_nodes\n",
|
||||
" if await self.run_code(code, result, async_=asy):\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3577, in run_code\n",
|
||||
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
|
||||
" File \"/tmp/ipykernel_794405/3254404226.py\", line 2, in <module>\n",
|
||||
" import geopandas as gpd\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py\", line 1, in <module>\n",
|
||||
" from geopandas._config import options\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 109, in <module>\n",
|
||||
" default_value=_default_use_pygeos(),\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py\", line 95, in _default_use_pygeos\n",
|
||||
" import geopandas._compat as compat\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py\", line 9, in <module>\n",
|
||||
" import shapely\n",
|
||||
" File \"/home/dadams/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py\", line 1, in <module>\n",
|
||||
" from shapely.lib import GEOSException # NOQA\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "_ARRAY_API not found",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;31mAttributeError\u001b[0m: _ARRAY_API not found"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "ImportError",
|
||||
"evalue": "numpy.core.multiarray failed to import",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msqlalchemy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m create_engine\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m options\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeoseries\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoSeries\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeodataframe\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GeoDataFrame\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:109\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 104\u001b[0m compat\u001b[38;5;241m.\u001b[39mset_use_pygeos(value)\n\u001b[1;32m 107\u001b[0m use_pygeos \u001b[38;5;241m=\u001b[39m Option(\n\u001b[1;32m 108\u001b[0m key\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_pygeos\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m--> 109\u001b[0m default_value\u001b[38;5;241m=\u001b[39m\u001b[43m_default_use_pygeos\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 110\u001b[0m doc\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhether to use PyGEOS to speed up spatial operations. The default is True \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif PyGEOS is installed, and follows the USE_PYGEOS environment variable \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mif set.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 114\u001b[0m ),\n\u001b[1;32m 115\u001b[0m validator\u001b[38;5;241m=\u001b[39m_validate_bool,\n\u001b[1;32m 116\u001b[0m callback\u001b[38;5;241m=\u001b[39m_callback_use_pygeos,\n\u001b[1;32m 117\u001b[0m )\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_validate_io_engine\u001b[39m(value):\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_config.py:95\u001b[0m, in \u001b[0;36m_default_use_pygeos\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_default_use_pygeos\u001b[39m():\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mgeopandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_compat\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mcompat\u001b[39;00m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m compat\u001b[38;5;241m.\u001b[39mUSE_PYGEOS\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/geopandas/_compat.py:9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeos\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# pandas compat\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# -----------------------------------------------------------------------------\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/Repos/california_equity_git/.venv/lib/python3.12/site-packages/shapely/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m GEOSException \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m Geometry \u001b[38;5;66;03m# NOQA\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mshapely\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlib\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m geos_version, geos_version_string \u001b[38;5;66;03m# NOQA\u001b[39;00m\n",
|
||||
"\u001b[0;31mImportError\u001b[0m: numpy.core.multiarray failed to import"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import geopandas as gpd\n",
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import numpy as np\n",
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# database variables\n",
|
||||
"DB_USER = os.getenv('DB_USER')\n",
|
||||
"DB_PASSWORD = os.getenv('DB_PASSWORD')\n",
|
||||
"DB_HOST = os.getenv('DB_HOST')\n",
|
||||
"DB_PORT = os.getenv('DB_PORT')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Create database connection\n",
|
||||
"def create_db_engine():\n",
|
||||
" return create_engine('postgresql://' + DB_USER + ':' + DB_PASSWORD + '@' + DB_HOST + ':' + DB_PORT + '/cci_db')\n",
|
||||
"\n",
|
||||
"# Load and clean CES data\n",
|
||||
"def process_ces_data(filepath):\n",
|
||||
" print(\"Processing CalEnviroScreen data...\")\n",
|
||||
" gdf = gpd.read_file(filepath)\n",
|
||||
" \n",
|
||||
" # Clean and standardize column names\n",
|
||||
" gdf.columns = [col.lower().replace(' ', '_') for col in gdf.columns]\n",
|
||||
" \n",
|
||||
" # Convert tract ID to string and ensure it's clean\n",
|
||||
" gdf['tract'] = gdf['tract'].astype(str).str.strip()\n",
|
||||
" \n",
|
||||
" # Select and rename relevant columns\n",
|
||||
" ces_data = gdf[['tract', 'zip', 'county', 'approxloc', 'totpop19',\n",
|
||||
" 'ciscore', 'ciscorep', 'ozone', 'ozonep', 'pm2_5',\n",
|
||||
" 'pm2_5_p', 'drinkwat', 'drinkwatp', 'poverty',\n",
|
||||
" 'povertyp', 'unempl', 'unemplp', 'housburd',\n",
|
||||
" 'housburdp', 'geometry']]\n",
|
||||
" \n",
|
||||
" # Rename columns to match database schema\n",
|
||||
" column_map = {\n",
|
||||
" 'tract': 'tract_id',\n",
|
||||
" 'zip': 'zip_code',\n",
|
||||
" 'approxloc': 'approx_loc',\n",
|
||||
" 'totpop19': 'total_pop_19',\n",
|
||||
" 'ciscore': 'ci_score',\n",
|
||||
" 'ciscorep': 'ci_score_pctl',\n",
|
||||
" 'pm2_5': 'pm25',\n",
|
||||
" 'pm2_5_p': 'pm25_pctl',\n",
|
||||
" 'drinkwat': 'drinking_water',\n",
|
||||
" 'drinkwatp': 'drinking_water_pctl',\n",
|
||||
" 'housburd': 'housing_burden',\n",
|
||||
" 'housburdp': 'housing_burden_pctl',\n",
|
||||
" 'geometry': 'geom'\n",
|
||||
" }\n",
|
||||
" ces_data = ces_data.rename(columns=column_map)\n",
|
||||
" \n",
|
||||
" return ces_data\n",
|
||||
"\n",
|
||||
"# Load and clean CCI data\n",
|
||||
"def process_cci_data(filepath):\n",
|
||||
" print(\"Processing CCI project data...\")\n",
|
||||
" df = pd.read_csv(filepath, low_memory=False)\n",
|
||||
" \n",
|
||||
" # Clean column names\n",
|
||||
" df.columns = [col.lower().replace(' ', '_') for col in df.columns]\n",
|
||||
" \n",
|
||||
" # Convert date columns\n",
|
||||
" df['date_operational'] = pd.to_datetime(df['date_operational'])\n",
|
||||
" \n",
|
||||
" # Filter date range\n",
|
||||
" df = df[\n",
|
||||
" (df['date_operational'] >= '2015-01-01') &\n",
|
||||
" (df['date_operational'] <= '2024-12-31')\n",
|
||||
" ]\n",
|
||||
" \n",
|
||||
" # Process project partners into array\n",
|
||||
" df['project_partners'] = df['project_partners'].fillna('')\n",
|
||||
" df['project_partners'] = df['project_partners'].apply(\n",
|
||||
" lambda x: '{' + ','.join([p.strip() for p in str(x).split(',')]) + '}'\n",
|
||||
" if x else '{}'\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Select and prepare relevant columns\n",
|
||||
" cci_data = df[[\n",
|
||||
" 'project_idnumber', 'reporting_cycle_name', 'agency_name',\n",
|
||||
" 'program_name', 'program_description', 'project_name',\n",
|
||||
" 'project_type', 'project_description', 'date_operational',\n",
|
||||
" 'census_tract', 'county', 'total_program_ggrffunding',\n",
|
||||
" 'total_project_ghgreductions', 'is_benefit_disadvantaged_communities',\n",
|
||||
" 'project_partners'\n",
|
||||
" ]]\n",
|
||||
" \n",
|
||||
" # Rename columns to match schema\n",
|
||||
" column_map = {\n",
|
||||
" 'project_idnumber': 'project_id',\n",
|
||||
" 'reporting_cycle_name': 'reporting_cycle',\n",
|
||||
" 'total_program_ggrffunding': 'total_funding',\n",
|
||||
" 'total_project_ghgreductions': 'ghg_reduction',\n",
|
||||
" 'is_benefit_disadvantaged_communities': 'dac_benefit'\n",
|
||||
" }\n",
|
||||
" cci_data = cci_data.rename(columns=column_map)\n",
|
||||
" \n",
|
||||
" # Convert boolean columns\n",
|
||||
" cci_data['dac_benefit'] = cci_data['dac_benefit'].astype(bool)\n",
|
||||
" \n",
|
||||
" return cci_data\n",
|
||||
"\n",
|
||||
"# Main loading function\n",
|
||||
"def load_data_to_db():\n",
|
||||
" try:\n",
|
||||
" engine = create_db_engine()\n",
|
||||
" \n",
|
||||
" # Load CES data\n",
|
||||
" ces_data = process_ces_data('california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp')\n",
|
||||
" print(\"Loading CES data to database...\")\n",
|
||||
" ces_data.to_postgis('ces_data', engine, if_exists='replace', index=False)\n",
|
||||
" \n",
|
||||
" # Load CCI data\n",
|
||||
" cci_data = process_cci_data('data_raw/cci_programs_data.csv')\n",
|
||||
" print(\"Loading CCI data to database...\")\n",
|
||||
" cci_data.to_sql('cci_projects', engine, if_exists='replace', index=False)\n",
|
||||
" \n",
|
||||
" print(\"Data loading completed successfully!\")\n",
|
||||
" \n",
|
||||
" # Return sample counts for verification\n",
|
||||
" return {\n",
|
||||
" 'ces_records': len(ces_data),\n",
|
||||
" 'cci_records': len(cci_data)\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error loading data: {str(e)}\")\n",
|
||||
" raise\n",
|
||||
"\n",
|
||||
"# Execute loading\n",
|
||||
"record_counts = load_data_to_db()\n",
|
||||
"print(\"\\nRecord counts:\")\n",
|
||||
"print(f\"CES data: {record_counts['ces_records']} records\")\n",
|
||||
"print(f\"CCI projects: {record_counts['cci_records']} records\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user