From 2d459d420f3926fd4b08cc6080db552f4d2bcf34 Mon Sep 17 00:00:00 2001 From: dadams Date: Thu, 19 Dec 2024 20:09:53 -0800 Subject: [PATCH] python .venv --- analysis/collaboration.ipynb | 133 +++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 analysis/collaboration.ipynb diff --git a/analysis/collaboration.ipynb b/analysis/collaboration.ipynb new file mode 100644 index 00000000..5d72bf15 --- /dev/null +++ b/analysis/collaboration.ipynb @@ -0,0 +1,133 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# calif equity\n", + "## Looking at collaboration components \n", + "Date: 2024-12-19" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Importing the necessary libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "## set directory\n", + "import os\n", + "os.chdir('/home/dadams/Repos/california_equity_git')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "data = pd.read_csv('data_raw/cci_programs_data.csv', low_memory=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "\n", + "# Load the shapefile\n", + "shapefile_path = '/home/dadams/Repos/california_equity_git/california_enviroscreen/calif_enviroscreen_shape/CES4 Final Shapefile.shp'\n", + "gdf = gpd.read_file(shapefile_path)\n", + "\n", + "# Print the head of the GeoDataFrame\n", + "print(gdf.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Basic cleaning\n", + "data['Date Operational'] = pd.to_datetime(data['Date Operational'])\n", + "data = data[\n", + " (data['Date Operational'] >= '2010-01-01') & \n", + " (data['Date Operational'] <= '2024-11-01')\n", + "].copy()\n", + "\n", + "# Remove rows with no GGRF funding\n", + "data = data.dropna(subset=['Total Program GGRFFunding'])\n", + "\n", + "# Add derived columns\n", + "data['Year'] = data['Date Operational'].dt.year\n", + "data['is_multi_county'] = data['County'].str.contains(',', na=False)\n", + "data['partnership_size'] = data['County'].str.count(',').fillna(0) + 1\n", + "\n", + "# Quick validation\n", + "print(f\"Total GGRF Funding: ${data['Total Program GGRFFunding'].sum()/1e9:.2f}B\")\n", + "print(f\"Number of projects: {len(data)}\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}