122 lines
4.8 KiB
Python
122 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
California Climate Investments (CCI) Collaboration Analysis Workflow
|
|
|
|
This script runs the complete workflow for analyzing collaboration patterns
|
|
in California's Climate Investments program and their impact on greenhouse
|
|
gas reduction efficiency and equity outcomes.
|
|
|
|
Usage:
|
|
python run_cci_analysis.py --data_path data/cci_programs_data_reduced.csv --output_dir output
|
|
"""
|
|
|
|
import os
|
|
import argparse
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger("cci_workflow")
|
|
|
|
def main():
|
|
"""Run the complete CCI collaboration analysis workflow."""
|
|
parser = argparse.ArgumentParser(description='Run CCI Collaboration Analysis Workflow')
|
|
parser.add_argument('--data_path', type=str, required=True, help='Path to the raw CCI data CSV file')
|
|
parser.add_argument('--output_dir', type=str, default='./output', help='Directory to save all outputs')
|
|
parser.add_argument('--skip_cleaning', action='store_true', help='Skip the data cleaning step')
|
|
parser.add_argument('--skip_analysis', action='store_true', help='Skip the detailed analysis step')
|
|
parser.add_argument('--skip_research', action='store_true', help='Skip the research questions analysis')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Create output directory
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# File paths
|
|
raw_data_path = args.data_path
|
|
cleaned_data_path = output_dir / "cleaned_cci_data.csv"
|
|
data_summary_path = output_dir / "data_summary.json"
|
|
|
|
# Create subdirectories for different analysis outputs
|
|
cleaned_output_dir = output_dir / "cleaned"
|
|
analysis_output_dir = output_dir / "analysis"
|
|
research_output_dir = output_dir / "research"
|
|
|
|
for directory in [cleaned_output_dir, analysis_output_dir, research_output_dir]:
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Step 1: Clean and prepare the data
|
|
if not args.skip_cleaning:
|
|
logger.info("Step 1: Cleaning and preparing the CCI data")
|
|
|
|
try:
|
|
from data_cleaning_script import clean_and_prepare_cci_data, generate_data_summary
|
|
|
|
# Clean and prepare the data
|
|
cleaned_df = clean_and_prepare_cci_data(raw_data_path, cleaned_data_path)
|
|
|
|
# Generate data summary
|
|
generate_data_summary(cleaned_df, data_summary_path)
|
|
|
|
logger.info(f"Data cleaning complete. Cleaned data saved to {cleaned_data_path}")
|
|
logger.info(f"Data summary saved to {data_summary_path}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in data cleaning step: {e}")
|
|
return
|
|
else:
|
|
logger.info("Skipping data cleaning step")
|
|
# Check if cleaned data exists
|
|
if not cleaned_data_path.exists():
|
|
logger.error(f"Cleaned data file {cleaned_data_path} not found. Cannot proceed without data.")
|
|
return
|
|
|
|
# Step 2: Run the detailed collaboration analysis
|
|
if not args.skip_analysis:
|
|
logger.info("Step 2: Running detailed collaboration analysis")
|
|
|
|
try:
|
|
from cci_collaboration_analysis import CCICollaborationAnalyzer
|
|
|
|
# Initialize the analyzer
|
|
analyzer = CCICollaborationAnalyzer(cleaned_data_path, str(analysis_output_dir))
|
|
|
|
# Run full analysis
|
|
analyzer.run_full_analysis()
|
|
|
|
logger.info(f"Detailed analysis complete. Results saved to {analysis_output_dir}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in detailed analysis step: {e}")
|
|
logger.error("Continuing to research analysis with available data...")
|
|
else:
|
|
logger.info("Skipping detailed analysis step")
|
|
|
|
# Step 3: Analyze specific research questions
|
|
if not args.skip_research:
|
|
logger.info("Step 3: Analyzing research questions")
|
|
|
|
try:
|
|
from research_analysis_script import analyze_research_questions
|
|
|
|
# Run research analysis
|
|
findings = analyze_research_questions(cleaned_data_path, str(research_output_dir))
|
|
|
|
if findings:
|
|
logger.info(f"Research analysis complete. Results saved to {research_output_dir}")
|
|
else:
|
|
logger.error("Research analysis failed to complete successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in research analysis step: {e}")
|
|
else:
|
|
logger.info("Skipping research analysis step")
|
|
|
|
logger.info("CCI Collaboration Analysis Workflow complete!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |