892 lines
48 KiB
Plaintext
892 lines
48 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6db438e3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import seaborn as sns\n",
|
|
"from pathlib import Path\n",
|
|
"import logging\n",
|
|
"import warnings\n",
|
|
"\n",
|
|
"# Configure basic logging\n",
|
|
"logging.basicConfig(level=logging.INFO, \n",
|
|
" format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')\n",
|
|
"logger = logging.getLogger(\"cci_analyzer\")\n",
|
|
"\n",
|
|
"# Suppress pandas warnings\n",
|
|
"warnings.filterwarnings('ignore')\n",
|
|
"\n",
|
|
"class CCIDataAnalyzer:\n",
|
|
" \"\"\"Simplified analyzer for California Climate Investments data.\"\"\"\n",
|
|
" \n",
|
|
" def __init__(self, data_path, output_path=\"./output\"):\n",
|
|
" self.data_path = Path(data_path)\n",
|
|
" self.output_path = Path(output_path)\n",
|
|
" self.output_path.mkdir(parents=True, exist_ok=True)\n",
|
|
" self.data = {}\n",
|
|
" \n",
|
|
" logger.info(f\"Initialized with data path: {self.data_path}\")\n",
|
|
" \n",
|
|
" def load_data(self):\n",
|
|
" \"\"\"Load CCI data with special handling for encoding issues.\"\"\"\n",
|
|
" try:\n",
|
|
" logger.info(f\"Loading data from {self.data_path}\")\n",
|
|
" \n",
|
|
" # Read as string to avoid conversion errors\n",
|
|
" df = pd.read_csv(self.data_path, dtype=str)\n",
|
|
" logger.info(f\"Successfully loaded {len(df)} rows with {len(df.columns)} columns\")\n",
|
|
" \n",
|
|
" # Clean and process the data\n",
|
|
" df = self._clean_data(df)\n",
|
|
" \n",
|
|
" # Store in data dictionary\n",
|
|
" self.data['cci_projects'] = df\n",
|
|
" \n",
|
|
" # Create separate datasets for CARB and non-CARB projects\n",
|
|
" self._create_carb_datasets()\n",
|
|
" \n",
|
|
" return True\n",
|
|
" \n",
|
|
" except Exception as e:\n",
|
|
" logger.error(f\"Error loading data: {e}\")\n",
|
|
" return False\n",
|
|
" \n",
|
|
" def _clean_data(self, df):\n",
|
|
" \"\"\"Clean and process the CCI data.\"\"\"\n",
|
|
" try:\n",
|
|
" # 1. Fix column names\n",
|
|
" df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]\n",
|
|
" \n",
|
|
" # 2. Handle the problematic lat_long column with LibreOffice encoding\n",
|
|
" if 'lat_long' in df.columns:\n",
|
|
" logger.info(\"Processing coordinates with special encoding handling\")\n",
|
|
" \n",
|
|
" # Function to clean LibreOffice encoding\n",
|
|
" def clean_libreoffice_encoding(text):\n",
|
|
" if pd.isna(text):\n",
|
|
" return text\n",
|
|
" \n",
|
|
" # Special LibreOffice character replacements\n",
|
|
" replacements = {\n",
|
|
" '+AC0-': '-', # Minus sign\n",
|
|
" '+ACI-': '\"', # Quote mark\n",
|
|
" }\n",
|
|
" \n",
|
|
" cleaned = str(text)\n",
|
|
" for code, char in replacements.items():\n",
|
|
" cleaned = cleaned.replace(code, char)\n",
|
|
" \n",
|
|
" return cleaned\n",
|
|
" \n",
|
|
" # Clean the lat_long column\n",
|
|
" df['lat_long'] = df['lat_long'].apply(clean_libreoffice_encoding)\n",
|
|
" \n",
|
|
" # Extract latitude and longitude\n",
|
|
" def extract_coords(coord_str):\n",
|
|
" if pd.isna(coord_str):\n",
|
|
" return (np.nan, np.nan)\n",
|
|
" \n",
|
|
" try:\n",
|
|
" # Try to split by comma\n",
|
|
" if ',' in coord_str:\n",
|
|
" parts = coord_str.split(',')\n",
|
|
" if len(parts) >= 2:\n",
|
|
" lon = parts[0].strip()\n",
|
|
" lat = parts[1].strip()\n",
|
|
" return (float(lat), float(lon))\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" \n",
|
|
" return (np.nan, np.nan)\n",
|
|
" \n",
|
|
" # Extract coordinates safely\n",
|
|
" try:\n",
|
|
" coords = df['lat_long'].apply(extract_coords)\n",
|
|
" df['latitude'] = coords.apply(lambda x: x[0])\n",
|
|
" df['longitude'] = coords.apply(lambda x: x[1])\n",
|
|
" except Exception as e:\n",
|
|
" logger.error(f\"Error extracting coordinates: {e}\")\n",
|
|
" \n",
|
|
" # 3. Convert numeric columns\n",
|
|
" numeric_cols = [\n",
|
|
" 'total_project_cost', \n",
|
|
" 'total_program_ggrffunding', \n",
|
|
" 'project_life_years',\n",
|
|
" 'total_project_ghgreductions', \n",
|
|
" 'annual_project_ghgreductions'\n",
|
|
" ]\n",
|
|
" \n",
|
|
" for col in df.columns:\n",
|
|
" # Find matching columns (case insensitive)\n",
|
|
" if any(num_col in col.lower() for num_col in \n",
|
|
" ['cost', 'funding', 'ghg', 'reductions', 'years']):\n",
|
|
" df[col] = pd.to_numeric(df[col], errors='coerce')\n",
|
|
" \n",
|
|
" # 4. Convert date columns\n",
|
|
" date_cols = [col for col in df.columns if 'date' in col.lower()]\n",
|
|
" for col in date_cols:\n",
|
|
" df[col] = pd.to_datetime(df[col], errors='coerce')\n",
|
|
" \n",
|
|
" # 5. Extract funding year\n",
|
|
" fiscal_year_cols = [col for col in df.columns if 'fiscal_year' in col.lower()]\n",
|
|
" if fiscal_year_cols:\n",
|
|
" try:\n",
|
|
" # Handle different possible formats of fiscal year column\n",
|
|
" year_col = fiscal_year_cols[0]\n",
|
|
" # Try multiple approaches to extract year\n",
|
|
" try:\n",
|
|
" # Handle standard fiscal year format like \"2019-20\"\n",
|
|
" df['funding_year'] = df[year_col].astype(str).str.extract(r'(\\d{4})').astype('Int64')\n",
|
|
" except Exception:\n",
|
|
" logger.warning(f\"Could not extract year with regex pattern, trying direct conversion\")\n",
|
|
" # Try direct conversion if it's already a year\n",
|
|
" df['funding_year'] = pd.to_numeric(df[year_col], errors='coerce').astype('Int64')\n",
|
|
" except Exception as e:\n",
|
|
" logger.error(f\"Error extracting funding year: {e}\")\n",
|
|
" \n",
|
|
" # 6. Calculate derived metrics if columns exist\n",
|
|
" funding_col = [col for col in df.columns if 'total_program' in col.lower() and 'funding' in col.lower()]\n",
|
|
" ghg_col = [col for col in df.columns if 'total_project' in col.lower() and 'ghg' in col.lower()]\n",
|
|
" dac_col = [col for col in df.columns if 'funding_benefiting' in col.lower()]\n",
|
|
" \n",
|
|
" if funding_col and ghg_col:\n",
|
|
" df['ghg_efficiency'] = np.where(\n",
|
|
" df[ghg_col[0]] > 0,\n",
|
|
" df[funding_col[0]] / df[ghg_col[0]],\n",
|
|
" np.nan\n",
|
|
" )\n",
|
|
" \n",
|
|
" if funding_col and dac_col:\n",
|
|
" df['dac_benefit_percentage'] = np.where(\n",
|
|
" df[funding_col[0]] > 0,\n",
|
|
" 100 * df[dac_col[0]] / df[funding_col[0]],\n",
|
|
" 0\n",
|
|
" )\n",
|
|
" \n",
|
|
" logger.info(\"Data cleaning and processing complete\")\n",
|
|
" return df\n",
|
|
" \n",
|
|
" except Exception as e:\n",
|
|
" logger.error(f\"Error cleaning data: {e}\")\n",
|
|
" return df\n",
|
|
" \n",
|
|
" def _create_carb_datasets(self):\n",
|
|
" \"\"\"Create separate datasets for CARB and non-CARB projects.\"\"\"\n",
|
|
" if 'cci_projects' not in self.data:\n",
|
|
" logger.error(\"No data available to create CARB datasets\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" df = self.data['cci_projects']\n",
|
|
" \n",
|
|
" try:\n",
|
|
" # Check if agency_name column exists\n",
|
|
" if 'agency_name' not in df.columns:\n",
|
|
" logger.error(\"agency_name column not found\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" # Create CARB dataset\n",
|
|
" carb_mask = df['agency_name'].str.contains('Air Resources Board', case=False, na=False)\n",
|
|
" self.data['carb_projects'] = df[carb_mask].copy()\n",
|
|
" self.data['non_carb_projects'] = df[~carb_mask].copy()\n",
|
|
" \n",
|
|
" logger.info(f\"Created CARB dataset with {len(self.data['carb_projects'])} projects\")\n",
|
|
" logger.info(f\"Created non-CARB dataset with {len(self.data['non_carb_projects'])} projects\")\n",
|
|
" \n",
|
|
" # Identify EV rebate/voucher projects within CARB\n",
|
|
" if len(self.data['carb_projects']) > 0:\n",
|
|
" carb_df = self.data['carb_projects']\n",
|
|
" \n",
|
|
" # Look for EV-related projects using various columns\n",
|
|
" ev_indicators = ['electric vehicle', 'ev ', 'rebate', 'voucher', 'clean vehicle']\n",
|
|
" \n",
|
|
" # Check program name for EV indicators\n",
|
|
" if 'program_name' in carb_df.columns:\n",
|
|
" ev_mask = carb_df['program_name'].str.lower().str.contains('|'.join(ev_indicators), na=False)\n",
|
|
" elif 'sub_program_name' in carb_df.columns:\n",
|
|
" ev_mask = carb_df['sub_program_name'].str.lower().str.contains('|'.join(ev_indicators), na=False)\n",
|
|
" else:\n",
|
|
" # If specific columns not found, try to find any column that might indicate EV projects\n",
|
|
" ev_mask = pd.Series(False, index=carb_df.index)\n",
|
|
" for col in carb_df.columns:\n",
|
|
" if carb_df[col].dtype == 'object':\n",
|
|
" try:\n",
|
|
" ev_mask = ev_mask | carb_df[col].astype(str).str.lower().str.contains('|'.join(ev_indicators), na=False)\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" \n",
|
|
" self.data['ev_projects'] = carb_df[ev_mask].copy()\n",
|
|
" logger.info(f\"Identified {len(self.data['ev_projects'])} potential EV rebate/voucher projects\")\n",
|
|
" \n",
|
|
" except Exception as e:\n",
|
|
" logger.error(f\"Error creating CARB datasets: {e}\")\n",
|
|
" \n",
|
|
" def analyze_data(self, include_carb_breakdown=True):\n",
|
|
" \"\"\"Basic analysis of CCI data with optional CARB breakdown.\"\"\"\n",
|
|
" if 'cci_projects' not in self.data:\n",
|
|
" logger.error(\"No data available for analysis\")\n",
|
|
" return None\n",
|
|
" \n",
|
|
" df = self.data['cci_projects']\n",
|
|
" \n",
|
|
" # Get agency information\n",
|
|
" if 'agency_name' in df.columns:\n",
|
|
" agency_counts = df['agency_name'].value_counts()\n",
|
|
" print(\"\\nAgencies involved in CCI projects:\")\n",
|
|
" for agency, count in agency_counts.head(10).items():\n",
|
|
" print(f\" {agency}: {count} projects\")\n",
|
|
" \n",
|
|
" # Analyze funding distribution\n",
|
|
" funding_col = [col for col in df.columns if 'total_program' in col.lower() and 'funding' in col.lower()]\n",
|
|
" if funding_col:\n",
|
|
" total_funding = df[funding_col[0]].sum()\n",
|
|
" print(f\"\\nTotal CCI funding: ${total_funding:,.2f}\")\n",
|
|
" print(f\"Average project funding: ${df[funding_col[0]].mean():,.2f}\")\n",
|
|
" \n",
|
|
" # Analyze GHG reductions\n",
|
|
" ghg_col = [col for col in df.columns if 'total_project' in col.lower() and 'ghg' in col.lower()]\n",
|
|
" if ghg_col:\n",
|
|
" total_ghg = df[ghg_col[0]].sum()\n",
|
|
" print(f\"\\nTotal GHG reductions: {total_ghg:,.2f} tons\")\n",
|
|
" print(f\"Average GHG reduction per project: {df[ghg_col[0]].mean():,.2f} tons\")\n",
|
|
" \n",
|
|
" # Analyze DAC benefits\n",
|
|
" if 'dac_benefit_percentage' in df.columns:\n",
|
|
" avg_dac = df['dac_benefit_percentage'].mean()\n",
|
|
" print(f\"\\nAverage DAC benefit percentage: {avg_dac:.2f}%\")\n",
|
|
" \n",
|
|
" # CARB vs. Non-CARB Analysis\n",
|
|
" if include_carb_breakdown and 'carb_projects' in self.data and 'non_carb_projects' in self.data:\n",
|
|
" carb_df = self.data['carb_projects']\n",
|
|
" non_carb_df = self.data['non_carb_projects']\n",
|
|
" \n",
|
|
" print(\"\\n--- CARB vs. Non-CARB Analysis ---\")\n",
|
|
" print(f\"CARB projects: {len(carb_df)} ({len(carb_df)/len(df)*100:.1f}% of total)\")\n",
|
|
" print(f\"Non-CARB projects: {len(non_carb_df)} ({len(non_carb_df)/len(df)*100:.1f}% of total)\")\n",
|
|
" \n",
|
|
" if funding_col:\n",
|
|
" carb_funding = carb_df[funding_col[0]].sum()\n",
|
|
" non_carb_funding = non_carb_df[funding_col[0]].sum()\n",
|
|
" print(f\"\\nCARB funding: ${carb_funding:,.2f} ({carb_funding/total_funding*100:.1f}% of total)\")\n",
|
|
" print(f\"Non-CARB funding: ${non_carb_funding:,.2f} ({non_carb_funding/total_funding*100:.1f}% of total)\")\n",
|
|
" print(f\"Average CARB project: ${carb_df[funding_col[0]].mean():,.2f}\")\n",
|
|
" print(f\"Average non-CARB project: ${non_carb_df[funding_col[0]].mean():,.2f}\")\n",
|
|
" \n",
|
|
" if ghg_col:\n",
|
|
" carb_ghg = carb_df[ghg_col[0]].sum()\n",
|
|
" non_carb_ghg = non_carb_df[ghg_col[0]].sum()\n",
|
|
" print(f\"\\nCARB GHG reductions: {carb_ghg:,.2f} tons ({carb_ghg/total_ghg*100:.1f}% of total)\")\n",
|
|
" print(f\"Non-CARB GHG reductions: {non_carb_ghg:,.2f} tons ({non_carb_ghg/total_ghg*100:.1f}% of total)\")\n",
|
|
" \n",
|
|
" # Calculate efficiency\n",
|
|
" if funding_col:\n",
|
|
" carb_efficiency = carb_funding / carb_ghg if carb_ghg > 0 else 0\n",
|
|
" non_carb_efficiency = non_carb_funding / non_carb_ghg if non_carb_ghg > 0 else 0\n",
|
|
" print(f\"\\nCARB efficiency: ${carb_efficiency:.2f} per ton CO2e\")\n",
|
|
" print(f\"Non-CARB efficiency: ${non_carb_efficiency:.2f} per ton CO2e\")\n",
|
|
" \n",
|
|
" # EV Projects Analysis\n",
|
|
" if 'ev_projects' in self.data:\n",
|
|
" ev_df = self.data['ev_projects']\n",
|
|
" print(\"\\n--- Electric Vehicle Projects Analysis ---\")\n",
|
|
" print(f\"EV projects: {len(ev_df)} ({len(ev_df)/len(carb_df)*100:.1f}% of CARB projects)\")\n",
|
|
" \n",
|
|
" if funding_col:\n",
|
|
" ev_funding = ev_df[funding_col[0]].sum()\n",
|
|
" print(f\"EV funding: ${ev_funding:,.2f} ({ev_funding/carb_funding*100:.1f}% of CARB funding)\")\n",
|
|
" print(f\"Average EV project: ${ev_df[funding_col[0]].mean():,.2f}\")\n",
|
|
" \n",
|
|
" if ghg_col:\n",
|
|
" ev_ghg = ev_df[ghg_col[0]].sum()\n",
|
|
" print(f\"EV GHG reductions: {ev_ghg:,.2f} tons ({ev_ghg/carb_ghg*100:.1f}% of CARB reductions)\")\n",
|
|
" \n",
|
|
" # Calculate efficiency\n",
|
|
" if funding_col:\n",
|
|
" ev_efficiency = ev_funding / ev_ghg if ev_ghg > 0 else 0\n",
|
|
" print(f\"EV efficiency: ${ev_efficiency:.2f} per ton CO2e\")\n",
|
|
" \n",
|
|
" return {\n",
|
|
" \"total_projects\": len(df),\n",
|
|
" \"total_funding\": total_funding if funding_col else None,\n",
|
|
" \"total_ghg_reductions\": total_ghg if ghg_col else None,\n",
|
|
" \"carb_projects\": len(self.data['carb_projects']) if 'carb_projects' in self.data else None,\n",
|
|
" \"ev_projects\": len(self.data['ev_projects']) if 'ev_projects' in self.data else None\n",
|
|
" }\n",
|
|
" \n",
|
|
" def plot_agency_comparison(self):\n",
|
|
" \"\"\"Create visualizations comparing agencies.\"\"\"\n",
|
|
" if 'cci_projects' not in self.data:\n",
|
|
" logger.error(\"No data available for visualization\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" df = self.data['cci_projects']\n",
|
|
" \n",
|
|
" # Ensure agency_name column exists\n",
|
|
" if 'agency_name' not in df.columns:\n",
|
|
" logger.error(\"agency_name column not found\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" # Find funding column\n",
|
|
" funding_col = [col for col in df.columns if 'total_program' in col.lower() and 'funding' in col.lower()]\n",
|
|
" if not funding_col:\n",
|
|
" logger.error(\"Funding column not found\")\n",
|
|
" return\n",
|
|
" funding_col = funding_col[0]\n",
|
|
" \n",
|
|
" # Find GHG reduction column\n",
|
|
" ghg_col = [col for col in df.columns if 'total_project' in col.lower() and 'ghg' in col.lower()]\n",
|
|
" if not ghg_col:\n",
|
|
" logger.error(\"GHG reduction column not found\")\n",
|
|
" return\n",
|
|
" ghg_col = ghg_col[0]\n",
|
|
" \n",
|
|
" # Create figure\n",
|
|
" plt.figure(figsize=(15, 12))\n",
|
|
" \n",
|
|
" # 1. Project count by agency\n",
|
|
" plt.subplot(2, 2, 1)\n",
|
|
" agency_counts = df['agency_name'].value_counts().head(10)\n",
|
|
" agency_counts.plot(kind='barh')\n",
|
|
" plt.title('Number of Projects by Agency (Top 10)')\n",
|
|
" plt.xlabel('Number of Projects')\n",
|
|
" \n",
|
|
" # 2. Funding by agency\n",
|
|
" plt.subplot(2, 2, 2)\n",
|
|
" agency_funding = df.groupby('agency_name')[funding_col].sum().sort_values(ascending=False).head(10) / 1_000_000\n",
|
|
" agency_funding.plot(kind='barh')\n",
|
|
" plt.title('Total Funding by Agency ($ Millions)')\n",
|
|
" plt.xlabel('Funding ($ Millions)')\n",
|
|
" \n",
|
|
" # 3. GHG reductions by agency\n",
|
|
" plt.subplot(2, 2, 3)\n",
|
|
" agency_ghg = df.groupby('agency_name')[ghg_col].sum().sort_values(ascending=False).head(10) / 1_000\n",
|
|
" agency_ghg.plot(kind='barh')\n",
|
|
" plt.title('GHG Reductions by Agency (Thousand Tons)')\n",
|
|
" plt.xlabel('GHG Reductions (Thousand Tons)')\n",
|
|
" \n",
|
|
" # 4. Efficiency by agency ($/ton)\n",
|
|
" plt.subplot(2, 2, 4)\n",
|
|
" agency_efficiency = df.groupby('agency_name').apply(\n",
|
|
" lambda x: x[funding_col].sum() / x[ghg_col].sum() if x[ghg_col].sum() > 0 else np.nan\n",
|
|
" ).dropna().sort_values().head(10)\n",
|
|
" agency_efficiency.plot(kind='barh')\n",
|
|
" plt.title('Cost Efficiency by Agency ($ per Ton CO2e)')\n",
|
|
" plt.xlabel('Cost per Ton GHG Reduced ($)')\n",
|
|
" \n",
|
|
" plt.tight_layout()\n",
|
|
" \n",
|
|
" # Save visualization\n",
|
|
" output_file = self.output_path / \"agency_comparison.png\"\n",
|
|
" plt.savefig(output_file, dpi=300, bbox_inches='tight')\n",
|
|
" logger.info(f\"Agency comparison visualization saved to {output_file}\")\n",
|
|
" plt.show()\n",
|
|
" \n",
|
|
" def plot_carb_analysis(self):\n",
|
|
" \"\"\"Create visualizations specifically for CARB vs non-CARB analysis.\"\"\"\n",
|
|
" if 'carb_projects' not in self.data or 'non_carb_projects' not in self.data:\n",
|
|
" logger.error(\"CARB datasets not available\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" # Find funding column\n",
|
|
" funding_col = None\n",
|
|
" ghg_col = None\n",
|
|
" \n",
|
|
" # Check if we have funding data\n",
|
|
" for key in ['carb_projects', 'non_carb_projects']:\n",
|
|
" df = self.data[key]\n",
|
|
" funding_cols = [col for col in df.columns if 'total_program' in col.lower() and 'funding' in col.lower()]\n",
|
|
" if funding_cols:\n",
|
|
" funding_col = funding_cols[0]\n",
|
|
" \n",
|
|
" ghg_cols = [col for col in df.columns if 'total_project' in col.lower() and 'ghg' in col.lower()]\n",
|
|
" if ghg_cols:\n",
|
|
" ghg_col = ghg_cols[0]\n",
|
|
" \n",
|
|
" if not funding_col or not ghg_col:\n",
|
|
" logger.error(\"Required columns not found\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" # Prepare data for comparison\n",
|
|
" carb_df = self.data['carb_projects']\n",
|
|
" non_carb_df = self.data['non_carb_projects']\n",
|
|
" ev_df = self.data.get('ev_projects', pd.DataFrame())\n",
|
|
" \n",
|
|
" # Create figure\n",
|
|
" plt.figure(figsize=(15, 12))\n",
|
|
" \n",
|
|
" # 1. Project count comparison\n",
|
|
" plt.subplot(2, 2, 1)\n",
|
|
" project_counts = pd.Series({\n",
|
|
" 'CARB (non-EV)': len(carb_df) - len(ev_df), \n",
|
|
" 'CARB (EV Projects)': len(ev_df),\n",
|
|
" 'Non-CARB': len(non_carb_df)\n",
|
|
" })\n",
|
|
" project_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90)\n",
|
|
" plt.title('Distribution of Projects')\n",
|
|
" plt.ylabel('') # Hide ylabel\n",
|
|
" \n",
|
|
" # 2. Funding comparison\n",
|
|
" plt.subplot(2, 2, 2)\n",
|
|
" if funding_col:\n",
|
|
" carb_non_ev_funding = carb_df[~carb_df.index.isin(ev_df.index)][funding_col].sum() if not ev_df.empty else carb_df[funding_col].sum()\n",
|
|
" ev_funding = ev_df[funding_col].sum() if not ev_df.empty else 0\n",
|
|
" non_carb_funding = non_carb_df[funding_col].sum()\n",
|
|
" \n",
|
|
" funding_distribution = pd.Series({\n",
|
|
" 'CARB (non-EV)': carb_non_ev_funding,\n",
|
|
" 'CARB (EV Projects)': ev_funding,\n",
|
|
" 'Non-CARB': non_carb_funding\n",
|
|
" })\n",
|
|
" funding_distribution.plot(kind='pie', autopct='%1.1f%%', startangle=90)\n",
|
|
" plt.title('Distribution of Funding')\n",
|
|
" plt.ylabel('') # Hide ylabel\n",
|
|
" \n",
|
|
" # 3. GHG reductions comparison\n",
|
|
" plt.subplot(2, 2, 3)\n",
|
|
" if ghg_col:\n",
|
|
" carb_non_ev_ghg = carb_df[~carb_df.index.isin(ev_df.index)][ghg_col].sum() if not ev_df.empty else carb_df[ghg_col].sum()\n",
|
|
" ev_ghg = ev_df[ghg_col].sum() if not ev_df.empty else 0\n",
|
|
" non_carb_ghg = non_carb_df[ghg_col].sum()\n",
|
|
" \n",
|
|
" ghg_distribution = pd.Series({\n",
|
|
" 'CARB (non-EV)': carb_non_ev_ghg,\n",
|
|
" 'CARB (EV Projects)': ev_ghg,\n",
|
|
" 'Non-CARB': non_carb_ghg\n",
|
|
" })\n",
|
|
" ghg_distribution.plot(kind='pie', autopct='%1.1f%%', startangle=90)\n",
|
|
" plt.title('Distribution of GHG Reductions')\n",
|
|
" plt.ylabel('') # Hide ylabel\n",
|
|
" \n",
|
|
" # 4. Efficiency comparison ($/ton)\n",
|
|
" plt.subplot(2, 2, 4)\n",
|
|
" if funding_col and ghg_col:\n",
|
|
" carb_non_ev_efficiency = carb_non_ev_funding / carb_non_ev_ghg if carb_non_ev_ghg > 0 else 0\n",
|
|
" ev_efficiency = ev_funding / ev_ghg if ev_ghg > 0 else 0\n",
|
|
" non_carb_efficiency = non_carb_funding / non_carb_ghg if non_carb_ghg > 0 else 0\n",
|
|
" \n",
|
|
" efficiency_comparison = pd.Series({\n",
|
|
" 'CARB (non-EV)': carb_non_ev_efficiency,\n",
|
|
" 'CARB (EV Projects)': ev_efficiency,\n",
|
|
" 'Non-CARB': non_carb_efficiency\n",
|
|
" })\n",
|
|
" efficiency_comparison.plot(kind='bar')\n",
|
|
" plt.title('Cost Efficiency ($ per Ton CO2e)')\n",
|
|
" plt.ylabel('Cost per Ton GHG Reduced ($)')\n",
|
|
" plt.xticks(rotation=45)\n",
|
|
" \n",
|
|
" plt.tight_layout()\n",
|
|
" \n",
|
|
" # Save visualization\n",
|
|
" output_file = self.output_path / \"carb_analysis.png\"\n",
|
|
" plt.savefig(output_file, dpi=300, bbox_inches='tight')\n",
|
|
" logger.info(f\"CARB analysis visualization saved to {output_file}\")\n",
|
|
" plt.show()\n",
|
|
" \n",
|
|
" def plot_temporal_analysis(self):\n",
|
|
" \"\"\"Create visualizations showing trends over time.\"\"\"\n",
|
|
" if 'cci_projects' not in self.data:\n",
|
|
" logger.error(\"No data available for visualization\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" df = self.data['cci_projects']\n",
|
|
" \n",
|
|
" # Check if we have year data\n",
|
|
" if 'funding_year' not in df.columns:\n",
|
|
" logger.error(\"funding_year column not found\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" # Find funding and GHG columns\n",
|
|
" funding_col = [col for col in df.columns if 'total_program' in col.lower() and 'funding' in col.lower()]\n",
|
|
" ghg_col = [col for col in df.columns if 'total_project' in col.lower() and 'ghg' in col.lower()]\n",
|
|
" dac_col = [col for col in df.columns if 'dac_benefit_percentage' in col.lower()]\n",
|
|
" \n",
|
|
" if not funding_col or not ghg_col:\n",
|
|
" logger.error(\"Required columns not found\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" funding_col = funding_col[0]\n",
|
|
" ghg_col = ghg_col[0]\n",
|
|
" dac_col = dac_col[0] if dac_col else None\n",
|
|
" \n",
|
|
" # Separate CARB data if available\n",
|
|
" carb_df = self.data.get('carb_projects', None)\n",
|
|
" ev_df = self.data.get('ev_projects', None)\n",
|
|
" \n",
|
|
" # Create figure\n",
|
|
" plt.figure(figsize=(15, 12))\n",
|
|
" \n",
|
|
" # 1. Funding by year\n",
|
|
" plt.subplot(2, 2, 1)\n",
|
|
" yearly_funding = df.groupby('funding_year')[funding_col].sum() / 1_000_000\n",
|
|
" \n",
|
|
" # Add CARB and EV breakdowns if available\n",
|
|
" if carb_df is not None:\n",
|
|
" carb_yearly = carb_df.groupby('funding_year')[funding_col].sum() / 1_000_000\n",
|
|
" non_carb_yearly = yearly_funding - carb_yearly\n",
|
|
" \n",
|
|
" if ev_df is not None:\n",
|
|
" ev_yearly = ev_df.groupby('funding_year')[funding_col].sum() / 1_000_000\n",
|
|
" carb_non_ev_yearly = carb_yearly - ev_yearly\n",
|
|
" \n",
|
|
" # Plot stacked bar chart\n",
|
|
" years = sorted(yearly_funding.index)\n",
|
|
" bottom = np.zeros(len(years))\n",
|
|
" \n",
|
|
" plt.bar(years, non_carb_yearly.reindex(years, fill_value=0), label='Non-CARB', bottom=bottom)\n",
|
|
" bottom += non_carb_yearly.reindex(years, fill_value=0)\n",
|
|
" \n",
|
|
" plt.bar(years, carb_non_ev_yearly.reindex(years, fill_value=0), label='CARB (non-EV)', bottom=bottom)\n",
|
|
" bottom += carb_non_ev_yearly.reindex(years, fill_value=0)\n",
|
|
" \n",
|
|
" plt.bar(years, ev_yearly.reindex(years, fill_value=0), label='CARB (EV Projects)', bottom=bottom)\n",
|
|
" \n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" # Plot CARB vs non-CARB\n",
|
|
" years = sorted(yearly_funding.index)\n",
|
|
" plt.bar(years, non_carb_yearly.reindex(years, fill_value=0), label='Non-CARB')\n",
|
|
" plt.bar(years, carb_yearly.reindex(years, fill_value=0), label='CARB', bottom=non_carb_yearly.reindex(years, fill_value=0))\n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" # Simple yearly plot\n",
|
|
" yearly_funding.plot(kind='bar')\n",
|
|
" \n",
|
|
" plt.title('CCI Funding by Year')\n",
|
|
" plt.xlabel('Funding Year')\n",
|
|
" plt.ylabel('Funding ($ Millions)')\n",
|
|
" plt.xticks(rotation=45)\n",
|
|
" \n",
|
|
" # 2. GHG reductions by year\n",
|
|
" plt.subplot(2, 2, 2)\n",
|
|
" yearly_ghg = df.groupby('funding_year')[ghg_col].sum() / 1_000\n",
|
|
" \n",
|
|
" # Add CARB and EV breakdowns if available\n",
|
|
" if carb_df is not None:\n",
|
|
" carb_yearly_ghg = carb_df.groupby('funding_year')[ghg_col].sum() / 1_000\n",
|
|
" non_carb_yearly_ghg = yearly_ghg - carb_yearly_ghg\n",
|
|
" \n",
|
|
" if ev_df is not None:\n",
|
|
" ev_yearly_ghg = ev_df.groupby('funding_year')[ghg_col].sum() / 1_000\n",
|
|
" carb_non_ev_yearly_ghg = carb_yearly_ghg - ev_yearly_ghg\n",
|
|
" \n",
|
|
" # Plot stacked bar chart\n",
|
|
" years = sorted(yearly_ghg.index)\n",
|
|
" bottom = np.zeros(len(years))\n",
|
|
" \n",
|
|
" plt.bar(years, non_carb_yearly_ghg.reindex(years, fill_value=0), label='Non-CARB', bottom=bottom)\n",
|
|
" bottom += non_carb_yearly_ghg.reindex(years, fill_value=0)\n",
|
|
" \n",
|
|
" plt.bar(years, carb_non_ev_yearly_ghg.reindex(years, fill_value=0), label='CARB (non-EV)', bottom=bottom)\n",
|
|
" bottom += carb_non_ev_yearly_ghg.reindex(years, fill_value=0)\n",
|
|
" \n",
|
|
" plt.bar(years, ev_yearly_ghg.reindex(years, fill_value=0), label='CARB (EV Projects)', bottom=bottom)\n",
|
|
" \n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" # Plot CARB vs non-CARB\n",
|
|
" years = sorted(yearly_ghg.index)\n",
|
|
" plt.bar(years, non_carb_yearly_ghg.reindex(years, fill_value=0), label='Non-CARB')\n",
|
|
" plt.bar(years, carb_yearly_ghg.reindex(years, fill_value=0), label='CARB', bottom=non_carb_yearly_ghg.reindex(years, fill_value=0))\n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" # Simple yearly plot\n",
|
|
" yearly_ghg.plot(kind='bar')\n",
|
|
" \n",
|
|
" plt.title('GHG Reductions by Year')\n",
|
|
" plt.xlabel('Funding Year')\n",
|
|
" plt.ylabel('GHG Reductions (Thousand Tons)')\n",
|
|
" plt.xticks(rotation=45)\n",
|
|
" \n",
|
|
" # 3. Project counts by year\n",
|
|
" plt.subplot(2, 2, 3)\n",
|
|
" yearly_projects = df.groupby('funding_year').size()\n",
|
|
" \n",
|
|
" # Add CARB and EV breakdowns if available\n",
|
|
" if carb_df is not None:\n",
|
|
" carb_yearly_projects = carb_df.groupby('funding_year').size()\n",
|
|
" non_carb_yearly_projects = yearly_projects - carb_yearly_projects\n",
|
|
" \n",
|
|
" if ev_df is not None:\n",
|
|
" ev_yearly_projects = ev_df.groupby('funding_year').size()\n",
|
|
" carb_non_ev_yearly_projects = carb_yearly_projects - ev_yearly_projects\n",
|
|
" \n",
|
|
" # Plot stacked bar chart\n",
|
|
" years = sorted(yearly_projects.index)\n",
|
|
" bottom = np.zeros(len(years))\n",
|
|
" \n",
|
|
" plt.bar(years, non_carb_yearly_projects.reindex(years, fill_value=0), label='Non-CARB', bottom=bottom)\n",
|
|
" bottom += non_carb_yearly_projects.reindex(years, fill_value=0)\n",
|
|
" \n",
|
|
" plt.bar(years, carb_non_ev_yearly_projects.reindex(years, fill_value=0), label='CARB (non-EV)', bottom=bottom)\n",
|
|
" bottom += carb_non_ev_yearly_projects.reindex(years, fill_value=0)\n",
|
|
" \n",
|
|
" plt.bar(years, ev_yearly_projects.reindex(years, fill_value=0), label='CARB (EV Projects)', bottom=bottom)\n",
|
|
" \n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" # Plot CARB vs non-CARB\n",
|
|
" years = sorted(yearly_projects.index)\n",
|
|
" plt.bar(years, non_carb_yearly_projects.reindex(years, fill_value=0), label='Non-CARB')\n",
|
|
" plt.bar(years, carb_yearly_projects.reindex(years, fill_value=0), label='CARB', bottom=non_carb_yearly_projects.reindex(years, fill_value=0))\n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" # Simple yearly plot\n",
|
|
" yearly_projects.plot(kind='bar')\n",
|
|
" \n",
|
|
" plt.title('Number of Projects by Year')\n",
|
|
" plt.xlabel('Funding Year')\n",
|
|
" plt.ylabel('Number of Projects')\n",
|
|
" plt.xticks(rotation=45)\n",
|
|
" \n",
|
|
" # 4. DAC benefit percentage by year\n",
|
|
" plt.subplot(2, 2, 4)\n",
|
|
" if dac_col:\n",
|
|
" yearly_dac = df.groupby('funding_year')[dac_col].mean()\n",
|
|
" \n",
|
|
" # Compare CARB vs non-CARB if available\n",
|
|
" if carb_df is not None:\n",
|
|
" carb_yearly_dac = carb_df.groupby('funding_year')[dac_col].mean()\n",
|
|
" non_carb_yearly_dac = self.data['non_carb_projects'].groupby('funding_year')[dac_col].mean()\n",
|
|
" \n",
|
|
" # Plot lines\n",
|
|
" years = sorted(yearly_dac.index)\n",
|
|
" plt.plot(years, yearly_dac.reindex(years), 'k-', label='Overall', linewidth=2)\n",
|
|
" plt.plot(years, carb_yearly_dac.reindex(years), 'b-', label='CARB', linewidth=1.5)\n",
|
|
" plt.plot(years, non_carb_yearly_dac.reindex(years), 'r-', label='Non-CARB', linewidth=1.5)\n",
|
|
" \n",
|
|
" if ev_df is not None and not ev_df.empty:\n",
|
|
" ev_yearly_dac = ev_df.groupby('funding_year')[dac_col].mean()\n",
|
|
" plt.plot(years, ev_yearly_dac.reindex(years), 'g-', label='EV Projects', linewidth=1.5)\n",
|
|
" \n",
|
|
" plt.legend()\n",
|
|
" else:\n",
|
|
" yearly_dac.plot(kind='line', marker='o')\n",
|
|
" \n",
|
|
" plt.title('DAC Benefit Percentage by Year')\n",
|
|
" plt.xlabel('Funding Year')\n",
|
|
" plt.ylabel('Average DAC Benefit (%)')\n",
|
|
" plt.grid(True, linestyle='--', alpha=0.7)\n",
|
|
" plt.xticks(rotation=45)\n",
|
|
" \n",
|
|
" plt.tight_layout()\n",
|
|
" \n",
|
|
" # Save visualization\n",
|
|
" output_file = self.output_path / \"temporal_analysis.png\"\n",
|
|
" plt.savefig(output_file, dpi=300, bbox_inches='tight')\n",
|
|
" logger.info(f\"Temporal analysis visualization saved to {output_file}\")\n",
|
|
" plt.show()\n",
|
|
" \n",
|
|
" def identify_collaboration_patterns(self):\n",
|
|
" \"\"\"\n",
|
|
" Analyze collaboration patterns in CCI projects to address the research question.\n",
|
|
" This examines how inter-agency collaboration affects outcomes.\n",
|
|
" \"\"\"\n",
|
|
" if 'cci_projects' not in self.data:\n",
|
|
" logger.error(\"No data available for analysis\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" df = self.data['cci_projects']\n",
|
|
" \n",
|
|
" # Check if we can identify collaborative projects\n",
|
|
" collab_indicators = []\n",
|
|
" \n",
|
|
" # Look for program name patterns that might indicate collaboration\n",
|
|
" if 'program_name' in df.columns:\n",
|
|
" collab_indicators.append('program_name')\n",
|
|
" if 'sub_program_name' in df.columns:\n",
|
|
" collab_indicators.append('sub_program_name')\n",
|
|
" if 'agency_name' in df.columns:\n",
|
|
" collab_indicators.append('agency_name')\n",
|
|
" \n",
|
|
" if not collab_indicators:\n",
|
|
" logger.error(\"Could not identify columns for collaboration analysis\")\n",
|
|
" return\n",
|
|
" \n",
|
|
" print(\"\\n--- Collaboration Analysis ---\")\n",
|
|
" \n",
|
|
" try:\n",
|
|
" # Identify unique programs\n",
|
|
" if 'program_name' in df.columns:\n",
|
|
" unique_programs = df['program_name'].nunique()\n",
|
|
" print(f\"Number of unique programs: {unique_programs}\")\n",
|
|
" \n",
|
|
" # Count agencies per program\n",
|
|
" program_agencies = df.groupby('program_name')['agency_name'].nunique().sort_values(ascending=False)\n",
|
|
" multi_agency_programs = program_agencies[program_agencies > 1]\n",
|
|
" \n",
|
|
" print(f\"Programs with multiple agencies: {len(multi_agency_programs)} ({len(multi_agency_programs)/unique_programs*100:.1f}% of programs)\")\n",
|
|
" \n",
|
|
" if len(multi_agency_programs) > 0:\n",
|
|
" print(\"\\nTop multi-agency programs:\")\n",
|
|
" for program, count in multi_agency_programs.head(5).items():\n",
|
|
" print(f\" {program}: {count} agencies\")\n",
|
|
" \n",
|
|
" # Analyze outcomes for multi-agency vs single-agency programs\n",
|
|
" funding_col = [col for col in df.columns if 'total_program' in col.lower() and 'funding' in col.lower()]\n",
|
|
" ghg_col = [col for col in df.columns if 'total_project' in col.lower() and 'ghg' in col.lower()]\n",
|
|
" dac_col = [col for col in df.columns if 'dac_benefit_percentage' in col.lower()]\n",
|
|
" \n",
|
|
" if funding_col and ghg_col:\n",
|
|
" funding_col = funding_col[0]\n",
|
|
" ghg_col = ghg_col[0]\n",
|
|
" \n",
|
|
" # Create multi-agency flag\n",
|
|
" df['multi_agency_program'] = df['program_name'].map(lambda x: program_agencies[x] > 1 if x in program_agencies else False)\n",
|
|
" \n",
|
|
" # Group by multi-agency flag\n",
|
|
" multi_df = df[df['multi_agency_program']].copy()\n",
|
|
" single_df = df[~df['multi_agency_program']].copy()\n",
|
|
" \n",
|
|
" # Compare outcomes\n",
|
|
" print(\"\\nComparison of Multi-agency vs Single-agency Programs:\")\n",
|
|
" print(f\"Multi-agency projects: {len(multi_df)} ({len(multi_df)/len(df)*100:.1f}% of total)\")\n",
|
|
" print(f\"Single-agency projects: {len(single_df)} ({len(single_df)/len(df)*100:.1f}% of total)\")\n",
|
|
" \n",
|
|
" multi_funding = multi_df[funding_col].sum()\n",
|
|
" single_funding = single_df[funding_col].sum()\n",
|
|
" total_funding = df[funding_col].sum()\n",
|
|
" \n",
|
|
" print(f\"\\nMulti-agency funding: ${multi_funding:,.2f} ({multi_funding/total_funding*100:.1f}% of total)\")\n",
|
|
" print(f\"Single-agency funding: ${single_funding:,.2f} ({single_funding/total_funding*100:.1f}% of total)\")\n",
|
|
" \n",
|
|
" multi_ghg = multi_df[ghg_col].sum()\n",
|
|
" single_ghg = single_df[ghg_col].sum()\n",
|
|
" total_ghg = df[ghg_col].sum()\n",
|
|
" \n",
|
|
" print(f\"\\nMulti-agency GHG reductions: {multi_ghg:,.2f} tons ({multi_ghg/total_ghg*100:.1f}% of total)\")\n",
|
|
" print(f\"Single-agency GHG reductions: {single_ghg:,.2f} tons ({single_ghg/total_ghg*100:.1f}% of total)\")\n",
|
|
" \n",
|
|
" # Calculate efficiency\n",
|
|
" multi_efficiency = multi_funding / multi_ghg if multi_ghg > 0 else 0\n",
|
|
" single_efficiency = single_funding / single_ghg if single_ghg > 0 else 0\n",
|
|
" \n",
|
|
" print(f\"\\nMulti-agency efficiency: ${multi_efficiency:.2f} per ton CO2e\")\n",
|
|
" print(f\"Single-agency efficiency: ${single_efficiency:.2f} per ton CO2e\")\n",
|
|
" \n",
|
|
" # DAC benefits\n",
|
|
" if dac_col:\n",
|
|
" dac_col = dac_col[0]\n",
|
|
" multi_dac = multi_df[dac_col].mean()\n",
|
|
" single_dac = single_df[dac_col].mean()\n",
|
|
" \n",
|
|
" print(f\"\\nMulti-agency DAC benefit: {multi_dac:.2f}%\")\n",
|
|
" print(f\"Single-agency DAC benefit: {single_dac:.2f}%\")\n",
|
|
" \n",
|
|
" # Create visualization\n",
|
|
" plt.figure(figsize=(15, 10))\n",
|
|
" \n",
|
|
" # 1. Project distribution\n",
|
|
" plt.subplot(2, 2, 1)\n",
|
|
" project_dist = pd.Series({\n",
|
|
" 'Multi-agency Programs': len(multi_df),\n",
|
|
" 'Single-agency Programs': len(single_df)\n",
|
|
" })\n",
|
|
" project_dist.plot(kind='pie', autopct='%1.1f%%', startangle=90)\n",
|
|
" plt.title('Distribution of Projects')\n",
|
|
" plt.ylabel('')\n",
|
|
" \n",
|
|
" # 2. Funding distribution\n",
|
|
" plt.subplot(2, 2, 2)\n",
|
|
" funding_dist = pd.Series({\n",
|
|
" 'Multi-agency Programs': multi_funding,\n",
|
|
" 'Single-agency Programs': single_funding\n",
|
|
" })\n",
|
|
" funding_dist.plot(kind='pie', autopct='%1.1f%%', startangle=90)\n",
|
|
" plt.title('Distribution of Funding')\n",
|
|
" plt.ylabel('')\n",
|
|
" \n",
|
|
" # 3. GHG reduction distribution\n",
|
|
" plt.subplot(2, 2, 3)\n",
|
|
" ghg_dist = pd.Series({\n",
|
|
" 'Multi-agency Programs': multi_ghg,\n",
|
|
" 'Single-agency Programs': single_ghg\n",
|
|
" })\n",
|
|
" ghg_dist.plot(kind='pie', autopct='%1.1f%%', startangle=90)\n",
|
|
" plt.title('Distribution of GHG Reductions')\n",
|
|
" plt.ylabel('')\n",
|
|
" \n",
|
|
" # 4. Efficiency & DAC comparison\n",
|
|
" plt.subplot(2, 2, 4)\n",
|
|
" metrics = ['Cost Efficiency ($/ton)', 'DAC Benefit (%)']\n",
|
|
" multi_values = [multi_efficiency]\n",
|
|
" single_values = [single_efficiency]\n",
|
|
" \n",
|
|
" if dac_col:\n",
|
|
" multi_values.append(multi_dac)\n",
|
|
" single_values.append(single_dac)\n",
|
|
" \n",
|
|
" x = np.arange(len(metrics))\n",
|
|
" width = 0.35\n",
|
|
" \n",
|
|
" plt.bar(x - width/2, multi_values, width, label='Multi-agency')\n",
|
|
" plt.bar(x + width/2, single_values, width, label='Single-agency')\n",
|
|
" \n",
|
|
" plt.xlabel('Metric')\n",
|
|
" plt.ylabel('Value')\n",
|
|
" plt.title('Performance Comparison')\n",
|
|
" plt.xticks(x, metrics)\n",
|
|
" plt.legend()\n",
|
|
" \n",
|
|
" plt.tight_layout()\n",
|
|
" \n",
|
|
" # Save visualization\n",
|
|
" output_file = self.output_path / \"collaboration_analysis.png\"\n",
|
|
" plt.savefig(output_file, dpi=300, bbox_inches='tight')\n",
|
|
" logger.info(f\"Collaboration analysis visualization saved to {output_file}\")\n",
|
|
" plt.show()\n",
|
|
" \n",
|
|
" except Exception as e:\n",
|
|
" logger.error(f\"Error in collaboration analysis: {e}\")\n",
|
|
"\n",
|
|
"# Usage example\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" analyzer = CCIDataAnalyzer(data_path=\"data/cci_programs_data_reduced.csv\")\n",
|
|
" if analyzer.load_data():\n",
|
|
" print(\"Data loaded successfully!\")\n",
|
|
" results = analyzer.analyze_data()\n",
|
|
" \n",
|
|
" # Run agency comparison analysis\n",
|
|
" analyzer.plot_agency_comparison()\n",
|
|
" \n",
|
|
" # Run CARB vs non-CARB analysis\n",
|
|
" analyzer.plot_carb_analysis()\n",
|
|
" \n",
|
|
" # Run temporal analysis\n",
|
|
" analyzer.plot_temporal_analysis()\n",
|
|
" \n",
|
|
" # Run collaboration analysis\n",
|
|
" analyzer.identify_collaboration_patterns()\n",
|
|
" else:\n",
|
|
" print(\"Failed to load data. Check file path and format.\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|