This commit is contained in:
2025-04-10 00:03:30 -07:00
parent 81ec68b3cc
commit 03ae352949
12 changed files with 150373 additions and 0 deletions

122
run_cci_analysis.py Normal file
View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python3
"""
California Climate Investments (CCI) Collaboration Analysis Workflow
This script runs the complete workflow for analyzing collaboration patterns
in California's Climate Investments program and their impact on greenhouse
gas reduction efficiency and equity outcomes.
Usage:
python run_cci_analysis.py --data_path data/cci_programs_data_reduced.csv --output_dir output
"""
import os
import argparse
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("cci_workflow")
def main():
"""Run the complete CCI collaboration analysis workflow."""
parser = argparse.ArgumentParser(description='Run CCI Collaboration Analysis Workflow')
parser.add_argument('--data_path', type=str, required=True, help='Path to the raw CCI data CSV file')
parser.add_argument('--output_dir', type=str, default='./output', help='Directory to save all outputs')
parser.add_argument('--skip_cleaning', action='store_true', help='Skip the data cleaning step')
parser.add_argument('--skip_analysis', action='store_true', help='Skip the detailed analysis step')
parser.add_argument('--skip_research', action='store_true', help='Skip the research questions analysis')
args = parser.parse_args()
# Create output directory
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# File paths
raw_data_path = args.data_path
cleaned_data_path = output_dir / "cleaned_cci_data.csv"
data_summary_path = output_dir / "data_summary.json"
# Create subdirectories for different analysis outputs
cleaned_output_dir = output_dir / "cleaned"
analysis_output_dir = output_dir / "analysis"
research_output_dir = output_dir / "research"
for directory in [cleaned_output_dir, analysis_output_dir, research_output_dir]:
directory.mkdir(parents=True, exist_ok=True)
# Step 1: Clean and prepare the data
if not args.skip_cleaning:
logger.info("Step 1: Cleaning and preparing the CCI data")
try:
from data_cleaning_script import clean_and_prepare_cci_data, generate_data_summary
# Clean and prepare the data
cleaned_df = clean_and_prepare_cci_data(raw_data_path, cleaned_data_path)
# Generate data summary
generate_data_summary(cleaned_df, data_summary_path)
logger.info(f"Data cleaning complete. Cleaned data saved to {cleaned_data_path}")
logger.info(f"Data summary saved to {data_summary_path}")
except Exception as e:
logger.error(f"Error in data cleaning step: {e}")
return
else:
logger.info("Skipping data cleaning step")
# Check if cleaned data exists
if not cleaned_data_path.exists():
logger.error(f"Cleaned data file {cleaned_data_path} not found. Cannot proceed without data.")
return
# Step 2: Run the detailed collaboration analysis
if not args.skip_analysis:
logger.info("Step 2: Running detailed collaboration analysis")
try:
from cci_collaboration_analysis import CCICollaborationAnalyzer
# Initialize the analyzer
analyzer = CCICollaborationAnalyzer(cleaned_data_path, str(analysis_output_dir))
# Run full analysis
analyzer.run_full_analysis()
logger.info(f"Detailed analysis complete. Results saved to {analysis_output_dir}")
except Exception as e:
logger.error(f"Error in detailed analysis step: {e}")
logger.error("Continuing to research analysis with available data...")
else:
logger.info("Skipping detailed analysis step")
# Step 3: Analyze specific research questions
if not args.skip_research:
logger.info("Step 3: Analyzing research questions")
try:
from research_analysis_script import analyze_research_questions
# Run research analysis
findings = analyze_research_questions(cleaned_data_path, str(research_output_dir))
if findings:
logger.info(f"Research analysis complete. Results saved to {research_output_dir}")
else:
logger.error("Research analysis failed to complete successfully")
except Exception as e:
logger.error(f"Error in research analysis step: {e}")
else:
logger.info("Skipping research analysis step")
logger.info("CCI Collaboration Analysis Workflow complete!")
if __name__ == "__main__":
main()