{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# calif equity\n", "## Looking at collaboration components \n", "Date: 2024-12-19" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "# Importing the necessary libraries\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "## set directory\n", "import os\n", "os.chdir('/home/dadams/Repos/california_equity_git')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "data.columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "import geopandas as gpd\n", "\n", "# Load the shapefile\n", "shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'\n", "gdf = gpd.read_file(shapefile_path)\n", "\n", "# Print the head of the GeoDataFrame\n", "print(gdf.head())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "plaintext" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "# Basic cleaning\n", "data['Date Operational'] = pd.to_datetime(data['Date Operational'])\n", "data = data[\n", " (data['Date Operational'] >= '2010-01-01') & \n", " (data['Date Operational'] <= '2024-11-01')\n", "].copy()\n", "\n", "# Remove rows with no GGRF funding\n", "data = data.dropna(subset=['Total Program GGRFFunding'])\n", "\n", "# Add derived columns\n", "data['Year'] = data['Date Operational'].dt.year\n", "data['is_multi_county'] = data['County'].str.contains(',', na=False)\n", "data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1\n", "\n", "# Quick validation\n", "print(f\"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B\")\n", "print(f\"Number of projects: {len(data)}\")" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }