diff --git a/analysis/new analysis Aug 2025/analysis10-2021.ipynb b/analysis/new analysis Aug 2025/analysis10-2021.ipynb index cd45580..4ea297a 100644 --- a/analysis/new analysis Aug 2025/analysis10-2021.ipynb +++ b/analysis/new analysis Aug 2025/analysis10-2021.ipynb @@ -1862,6 +1862,545 @@ "\n", "**Bottom line**: The mission change had **net negative effects on delay times**, with rural communities bearing the brunt of the impact. " ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "433ce72e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Columns in spills_gdf:\n", + "- Document #\n", + "- Report\n", + "- Operator\n", + "- Operator #\n", + "- Tracking #\n", + "- Initial Report Date\n", + "- Date of Discovery\n", + "- spill_type\n", + "- Qtr Qtr\n", + "- Section\n", + "- Township\n", + "- range\n", + "- meridian\n", + "- Latitude\n", + "- Longitude\n", + "- Municipality\n", + "- county\n", + "- Facility Type\n", + "- Facility ID\n", + "- API County Code\n", + "- API Sequence Number\n", + "- Spilled outside of berms\n", + "- More than five barrels spilled\n", + "- Oil Spill Volume\n", + "- Condensate Spill Volume\n", + "- Flow Back Spill Volume\n", + "- Produced Water Spill Volume\n", + "- E&P Waste Spill Volume\n", + "- Other Waste\n", + "- Drilling Fluid Spill Volume\n", + "- Current Land Use\n", + "- Other Land Use\n", + "- Weather Conditions\n", + "- Surface Owner\n", + "- Surface Owner Other\n", + "- Waters of the State\n", + "- Residence / Occupied Structure\n", + "- livestock\n", + "- Public Byway\n", + "- Surface Water Supply Area\n", + "- Spill Description\n", + "- Supplemental Report Date\n", + "- Oil BBLs Spilled\n", + "- Oil BBLs Recovered\n", + "- Oil Unknown\n", + "- Condensate BBLs Spilled\n", + "- Condensate BBLs Recovered\n", + "- Condensate Unknown\n", + "- Produced Water BBLs Spilled\n", + "- Produced Water BBLs Recovered\n", + "- Produced Water Unknown\n", + "- Drilling Fluid BBLs Spilled\n", + "- Drilling Fluid BBLs Recovered\n", + "- Drilling Fluid Unknown\n", + "- Flow Back Fluid BBLs Spilled\n", + "- Flow Back Fluid BBLs Recovered\n", + "- Flow Back Fluid Unkown\n", + "- Other E&P Waste BBLS Spilled\n", + "- Other E&P Waste BBLS Recovered\n", + "- Other E&P Waste Unknown\n", + "- Other E&P Waste\n", + "- Spill Contained within Berm\n", + "- Emergency Pit Constructed\n", + "- soil\n", + "- groundwater\n", + "- Surface Water\n", + "- Dry Drainage Feature\n", + "- Surface Area Length\n", + "- Surface Area Width\n", + "- Depth of Impact in Feet\n", + "- Depth of Impact in Inches\n", + "- Area Depth Determined\n", + "- Geology Description\n", + "- Depth to Groundwater\n", + "- Water wells in area\n", + "- Water Wells\n", + "- Water Wells None\n", + "- Surface Water Near\n", + "- Surface Water None\n", + "- Wetlands\n", + "- Wetlands None\n", + "- Springs\n", + "- Springs None\n", + "- Livestock Near\n", + "- Livestock None\n", + "- Occupied Buildings\n", + "- Occupied Buildings None\n", + "- Additional Spill Details\n", + "- Supplemental Report Date CA\n", + "- Human Error\n", + "- Equipment Failure\n", + "- Historical Unkown\n", + "- Other\n", + "- Other Description\n", + "- Root Cause\n", + "- Preventative Measures\n", + "- Soil Excavated\n", + "- Offsite Disposal\n", + "- Onsite Treatment\n", + "- Other Disposition\n", + "- Other Disposition Description\n", + "- Ground Water Removed\n", + "- Surface Water Removed\n", + "- Corrective Actions Completed\n", + "- Approved Form 27\n", + "- Form 27 Project Number\n", + "- GEOID\n", + "- TRACT_NAME\n", + "- total_population\n", + "- white_population\n", + "- hispanic_population\n", + "- median_household_income\n", + "- poverty_population\n", + "- unemployed_population\n", + "- percent_white\n", + "- percent_hispanic\n", + "- percent_poverty\n", + "- unemployment_rate\n", + "- geometry\n", + "- ruca_code\n", + "- ruca_description\n", + "- rurality\n", + "- Report Year\n", + "- Period\n", + "- report_delay\n", + "- report_month\n" + ] + } + ], + "source": [ + "# print columns of spills_gdf\n", + "print(\"\\nColumns in spills_gdf:\")\n", + "for col in spills_gdf.columns:\n", + " print(f\"- {col}\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "98e01f3f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ­ TESTING INDUSTRY RESISTANCE HYPOTHESIS\n", + "============================================================\n", + "Checking data structure:\n", + "Unique values in Period column: ['2021 and After' 'Before 2021']\n", + "Period value counts: Period\n", + "2021 and After 7397\n", + "Before 2021 3799\n", + "Name: count, dtype: int64\n", + "Sample of data:\n", + " Operator Period report_delay\n", + "28 PDC ENERGY INC 2021 and After 1\n", + "163 PDC ENERGY INC 2021 and After 3\n", + "226 NOBLE ENERGY INC Before 2021 2\n", + "334 NOBLE ENERGY INC Before 2021 2\n", + "342 NOBLE ENERGY INC Before 2021 0\n", + "\n", + "1ļøāƒ£ OPERATOR-LEVEL RESISTANCE ANALYSIS\n", + "---------------------------------------------\n", + "Total operators: 117\n", + "Spills per operator - Min: 1, Max: 2371\n", + "\n", + "Operator size distribution:\n", + "size_category\n", + "Small (1-5) 51\n", + "Medium (6-20) 29\n", + "Major (50+) 24\n", + "Large (21-50) 13\n", + "Name: count, dtype: int64\n", + "\n", + "Analyzing periods: '2021 and After' vs 'Before 2021'\n", + "Operator-period combinations: 161\n", + "\n", + "Average delays by operator size and period:\n", + " report_delay Document #\n", + " mean count sum\n", + "size_category Period \n", + "Small (1-5) 2021 and After 49.30 25 69\n", + " Before 2021 5.85 28 65\n", + "Medium (6-20) 2021 and After 10.74 19 137\n", + " Before 2021 4.43 24 216\n", + "Large (21-50) 2021 and After 7.99 11 259\n", + " Before 2021 1.89 8 170\n", + "Major (50+) 2021 and After 5.19 23 6932\n", + " Before 2021 3.52 23 3348\n", + "\n", + "2ļøāƒ£ OPERATOR DELAY CHANGES\n", + "-----------------------------------\n", + "Operators with data in both periods: 44\n", + "Operators with size data: 44\n", + "\n", + "🚨 TOP 10 OPERATORS WITH BIGGEST DELAY INCREASES:\n", + " 1. KP KAUFFMAN COMPANY INC | Change: + 15.0 days | Size: Major (50+)\n", + " 2. AKA ENERGY GROUP LLC | Change: + 6.6 days | Size: Medium (6-20)\n", + " 3. HUNTER RIDGE ENERGY SERVICES LLC | Change: + 5.0 days | Size: Small (1-5)\n", + " 4. EXTRACTION OIL & GAS INC | Change: + 5.0 days | Size: Major (50+)\n", + " 5. RED HAWK PETROLEUM LLC | Change: + 3.8 days | Size: Medium (6-20)\n", + " 6. PETRO MEX RESOURCES | Change: + 3.8 days | Size: Medium (6-20)\n", + " 7. DCP OPERATING COMPANY LP | Change: + 3.6 days | Size: Major (50+)\n", + " 8. ENERPLUS RESOURCES (USA) CORPORATION | Change: + 1.9 days | Size: Medium (6-20)\n", + " 9. ANADARKO WATTENBERG OIL COMPLEX LLC | Change: + 1.8 days | Size: Medium (6-20)\n", + "10. GRAND RIVER GATHERING LLC | Change: + 1.7 days | Size: Major (50+)\n", + "\n", + "āœ… TOP 10 OPERATORS WITH BIGGEST IMPROVEMENTS:\n", + " 1. PETRO OPERATING COMPANY LLC | Change: -80.9 days | Size: Medium (6-20)\n", + " 2. FOUNDATION ENERGY MANAGEMENT LLC | Change: -63.0 days | Size: Large (21-50)\n", + " 3. BLUE CHIP OIL INC | Change: -54.1 days | Size: Medium (6-20)\n", + " 4. UTAH GAS OP LTD DBA UTAH GAS CORP | Change: -23.2 days | Size: Major (50+)\n", + " 5. CHEVRON USA INC | Change: -17.3 days | Size: Major (50+)\n", + " 6. BAYSWATER EXPLORATION & PRODUCTION LLC | Change: -9.7 days | Size: Major (50+)\n", + " 7. GREAT WESTERN OPERATING COMPANY LLC | Change: -7.4 days | Size: Major (50+)\n", + " 8. CRESTONE PEAK RESOURCES OPERATING LLC | Change: -3.8 days | Size: Major (50+)\n", + " 9. NOBLE ENERGY INC | Change: -3.3 days | Size: Major (50+)\n", + "10. KERR MCGEE GATHERING LLC | Change: -3.0 days | Size: Major (50+)\n", + "\n", + "3ļøāƒ£ OPERATOR SIZE EFFECT ANALYSIS\n", + "----------------------------------------\n", + "Average delay changes by operator size:\n", + " mean count std\n", + "size_category \n", + "Small (1-5) 3.00 2 2.83\n", + "Medium (6-20) -8.61 14 25.61\n", + "Large (21-50) -10.17 6 25.90\n", + "Major (50+) -2.05 22 7.59\n", + "\n", + "šŸ” Large vs Small Operators:\n", + " Large operators (n=28): -3.79 days average change\n", + " Small operators (n=16): -7.16 days average change\n", + " Statistical test: t=0.596, p=0.5543 (not significant)\n", + "\n", + "4ļøāƒ£ SPILL SIZE RESISTANCE ANALYSIS\n", + "----------------------------------------\n", + "Delay changes by spill size:\n", + "spill_size_category\n", + "Small (0-1 bbl) NaN\n", + "Medium (1-10 bbl) NaN\n", + "Large (10-50 bbl) NaN\n", + "Major (50+ bbl) NaN\n", + "Name: change, dtype: float64\n", + "\n", + "5ļøāƒ£ GEOGRAPHIC RESISTANCE PATTERNS\n", + "----------------------------------------\n", + "🚨 COUNTIES WITH BIGGEST DELAY INCREASES (min 20 spills):\n", + " GARFIELD : -0.1 days (1437 spills)\n", + " WELD : -1.3 days (9041 spills)\n", + " RIO BLANCO : -6.2 days (718 spills)\n", + "\n", + "6ļøāƒ£ RESISTANCE HYPOTHESIS ASSESSMENT\n", + "---------------------------------------------\n", + "āœ… Major operators overrepresented in worst performers\n", + "āœ… Large operators had bigger delay increases than small operators\n", + "āŒ Major spills did not have above-average delay increases\n", + "\n", + "šŸ“Š RESISTANCE HYPOTHESIS SCORE: 67% (2/3 tests support)\n", + "āš ļø MODERATE EVIDENCE for industry resistance\n", + "\n", + "āœ… Analysis complete!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1241247/2502002660.py:62: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " size_period_summary = operator_period_stats.groupby(['size_category', 'Period']).agg({\n", + "/tmp/ipykernel_1241247/2502002660.py:88: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " complete_operators['delay_change'] = complete_operators[after_period] - complete_operators[before_period]\n", + "/tmp/ipykernel_1241247/2502002660.py:118: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " size_changes = complete_with_size.groupby('size_category')['delay_change'].agg(['mean', 'count', 'std']).round(2)\n", + "/tmp/ipykernel_1241247/2502002660.py:155: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " size_period_analysis = spills_gdf.groupby(['spill_size_category', 'Period'])['report_delay'].mean().unstack()\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np\n", + "from scipy import stats\n", + "import statsmodels.formula.api as smf\n", + "\n", + "print(\"šŸ­ TESTING INDUSTRY RESISTANCE HYPOTHESIS\")\n", + "print(\"=\" * 60)\n", + "\n", + "# First, let's check what Period values we actually have\n", + "print(\"Checking data structure:\")\n", + "print(\"Unique values in Period column:\", spills_gdf['Period'].unique())\n", + "print(\"Period value counts:\", spills_gdf['Period'].value_counts())\n", + "print(\"Sample of data:\")\n", + "print(spills_gdf[['Operator', 'Period', 'report_delay']].head())\n", + "\n", + "# 1. OPERATOR-LEVEL RESISTANCE PATTERNS\n", + "print(\"\\n1ļøāƒ£ OPERATOR-LEVEL RESISTANCE ANALYSIS\")\n", + "print(\"-\" * 45)\n", + "\n", + "# Calculate operator size (number of spills as proxy for company size)\n", + "operator_stats = spills_gdf.groupby('Operator').agg({\n", + " 'Document #': 'count',\n", + " 'report_delay': 'mean'\n", + "}).rename(columns={'Document #': 'total_spills', 'report_delay': 'avg_delay'})\n", + "\n", + "print(f\"Total operators: {len(operator_stats)}\")\n", + "print(f\"Spills per operator - Min: {operator_stats['total_spills'].min()}, Max: {operator_stats['total_spills'].max()}\")\n", + "\n", + "# Categorize operators by size\n", + "operator_stats['size_category'] = pd.cut(operator_stats['total_spills'], \n", + " bins=[0, 5, 20, 50, float('inf')], \n", + " labels=['Small (1-5)', 'Medium (6-20)', 'Large (21-50)', 'Major (50+)'])\n", + "\n", + "print(\"\\nOperator size distribution:\")\n", + "print(operator_stats['size_category'].value_counts())\n", + "\n", + "# Get period values\n", + "period_values = sorted(spills_gdf['Period'].unique())\n", + "if len(period_values) >= 2:\n", + " before_period = period_values[0] \n", + " after_period = period_values[1] \n", + " print(f\"\\nAnalyzing periods: '{before_period}' vs '{after_period}'\")\n", + " \n", + " # Calculate average delays by operator and period\n", + " operator_period_stats = spills_gdf.groupby(['Operator', 'Period']).agg({\n", + " 'report_delay': 'mean',\n", + " 'Document #': 'count'\n", + " }).reset_index()\n", + " \n", + " print(f\"Operator-period combinations: {len(operator_period_stats)}\")\n", + " \n", + " # Add size categories by merging\n", + " operator_period_stats = operator_period_stats.merge(\n", + " operator_stats[['size_category']].reset_index(), \n", + " on='Operator', \n", + " how='left'\n", + " )\n", + " \n", + " # Summary by size and period\n", + " size_period_summary = operator_period_stats.groupby(['size_category', 'Period']).agg({\n", + " 'report_delay': ['mean', 'count'],\n", + " 'Document #': 'sum'\n", + " }).round(2)\n", + " \n", + " print(\"\\nAverage delays by operator size and period:\")\n", + " print(size_period_summary)\n", + " \n", + " # 2. OPERATOR CHANGES ANALYSIS\n", + " print(\"\\n2ļøāƒ£ OPERATOR DELAY CHANGES\")\n", + " print(\"-\" * 35)\n", + " \n", + " # Pivot to get before/after for each operator\n", + " operator_pivot = operator_period_stats.pivot_table(\n", + " index='Operator', \n", + " columns='Period', \n", + " values='report_delay', \n", + " aggfunc='mean'\n", + " )\n", + " \n", + " print(f\"Operators with data in both periods: {len(operator_pivot.dropna())}\")\n", + " \n", + " # Only keep operators with data in both periods\n", + " complete_operators = operator_pivot.dropna()\n", + " \n", + " if len(complete_operators) > 0:\n", + " complete_operators['delay_change'] = complete_operators[after_period] - complete_operators[before_period]\n", + " \n", + " # Add operator size info\n", + " complete_with_size = complete_operators.merge(\n", + " operator_stats[['size_category', 'total_spills']], \n", + " left_index=True, \n", + " right_index=True, \n", + " how='left'\n", + " )\n", + " \n", + " print(f\"Operators with size data: {len(complete_with_size)}\")\n", + " \n", + " # Top 10 worst performers\n", + " if len(complete_with_size) >= 10:\n", + " worst_performers = complete_with_size.nlargest(10, 'delay_change')\n", + " print(f\"\\n🚨 TOP 10 OPERATORS WITH BIGGEST DELAY INCREASES:\")\n", + " for idx, (operator, data) in enumerate(worst_performers.iterrows(), 1):\n", + " print(f\"{idx:2d}. {operator[:50]:50s} | Change: +{data['delay_change']:5.1f} days | Size: {data['size_category']}\")\n", + " \n", + " # Best performers\n", + " if len(complete_with_size) >= 10:\n", + " best_performers = complete_with_size.nsmallest(10, 'delay_change')\n", + " print(f\"\\nāœ… TOP 10 OPERATORS WITH BIGGEST IMPROVEMENTS:\")\n", + " for idx, (operator, data) in enumerate(best_performers.iterrows(), 1):\n", + " print(f\"{idx:2d}. {operator[:50]:50s} | Change: {data['delay_change']:5.1f} days | Size: {data['size_category']}\")\n", + " \n", + " # 3. SIZE-BASED ANALYSIS\n", + " print(\"\\n3ļøāƒ£ OPERATOR SIZE EFFECT ANALYSIS\")\n", + " print(\"-\" * 40)\n", + " \n", + " size_changes = complete_with_size.groupby('size_category')['delay_change'].agg(['mean', 'count', 'std']).round(2)\n", + " print(\"Average delay changes by operator size:\")\n", + " print(size_changes)\n", + " \n", + " # Statistical test: Large vs Small operators\n", + " large_ops = complete_with_size[complete_with_size['size_category'].isin(['Large (21-50)', 'Major (50+)'])]\n", + " small_ops = complete_with_size[complete_with_size['size_category'].isin(['Small (1-5)', 'Medium (6-20)'])]\n", + " \n", + " if len(large_ops) > 0 and len(small_ops) > 0:\n", + " t_stat, p_val = stats.ttest_ind(large_ops['delay_change'], small_ops['delay_change'])\n", + " print(f\"\\nšŸ” Large vs Small Operators:\")\n", + " print(f\" Large operators (n={len(large_ops)}): {large_ops['delay_change'].mean():+.2f} days average change\")\n", + " print(f\" Small operators (n={len(small_ops)}): {small_ops['delay_change'].mean():+.2f} days average change\")\n", + " print(f\" Statistical test: t={t_stat:.3f}, p={p_val:.4f} ({'significant' if p_val < 0.05 else 'not significant'})\")\n", + " \n", + " # 4. SPILL SIZE ANALYSIS\n", + " print(\"\\n4ļøāƒ£ SPILL SIZE RESISTANCE ANALYSIS\")\n", + " print(\"-\" * 40)\n", + " \n", + " # Create total volume\n", + " volume_cols = ['Oil Spill Volume', 'Condensate Spill Volume', 'Flow Back Spill Volume', \n", + " 'Produced Water Spill Volume', 'E&P Waste Spill Volume', 'Drilling Fluid Spill Volume']\n", + " \n", + " for col in volume_cols:\n", + " if col in spills_gdf.columns:\n", + " spills_gdf[col] = pd.to_numeric(spills_gdf[col], errors='coerce').fillna(0)\n", + " \n", + " # Sum available volume columns\n", + " available_vol_cols = [col for col in volume_cols if col in spills_gdf.columns]\n", + " if available_vol_cols:\n", + " spills_gdf['total_volume'] = spills_gdf[available_vol_cols].sum(axis=1)\n", + " \n", + " spills_gdf['spill_size_category'] = pd.cut(spills_gdf['total_volume'], \n", + " bins=[0, 1, 10, 50, float('inf')], \n", + " labels=['Small (0-1 bbl)', 'Medium (1-10 bbl)', \n", + " 'Large (10-50 bbl)', 'Major (50+ bbl)'])\n", + " \n", + " size_period_analysis = spills_gdf.groupby(['spill_size_category', 'Period'])['report_delay'].mean().unstack()\n", + " \n", + " if len(size_period_analysis.columns) == 2:\n", + " size_period_analysis['change'] = size_period_analysis.iloc[:, 1] - size_period_analysis.iloc[:, 0]\n", + " print(\"Delay changes by spill size:\")\n", + " print(size_period_analysis['change'].sort_values(ascending=False).round(2))\n", + " \n", + " # 5. GEOGRAPHIC PATTERNS\n", + " print(\"\\n5ļøāƒ£ GEOGRAPHIC RESISTANCE PATTERNS\")\n", + " print(\"-\" * 40)\n", + " \n", + " county_analysis = spills_gdf.groupby(['county', 'Period'])['report_delay'].mean().unstack()\n", + " \n", + " if len(county_analysis.columns) == 2:\n", + " county_analysis['change'] = county_analysis.iloc[:, 1] - county_analysis.iloc[:, 0]\n", + " county_analysis['total_spills'] = spills_gdf.groupby('county').size()\n", + " \n", + " # Filter for counties with significant data\n", + " significant_counties = county_analysis[county_analysis['total_spills'] >= 20]\n", + " \n", + " if len(significant_counties) > 0:\n", + " worst_counties = significant_counties.nlargest(10, 'change')\n", + " print(\"🚨 COUNTIES WITH BIGGEST DELAY INCREASES (min 20 spills):\")\n", + " for county, data in worst_counties.iterrows():\n", + " print(f\" {county:25s}: {data['change']:+5.1f} days ({data['total_spills']:3.0f} spills)\")\n", + "\n", + " # 6. SUMMARY ASSESSMENT\n", + " print(\"\\n6ļøāƒ£ RESISTANCE HYPOTHESIS ASSESSMENT\")\n", + " print(\"-\" * 45)\n", + " \n", + " evidence_count = 0\n", + " total_tests = 0\n", + " \n", + " # Test 1: Do major operators show up disproportionately in worst performers?\n", + " if 'worst_performers' in locals() and len(worst_performers) > 0:\n", + " total_tests += 1\n", + " major_in_worst = (worst_performers['size_category'] == 'Major (50+)').sum()\n", + " if major_in_worst >= 3: # 30% or more\n", + " evidence_count += 1\n", + " print(\"āœ… Major operators overrepresented in worst performers\")\n", + " else:\n", + " print(\"āŒ Major operators not overrepresented in worst performers\")\n", + " \n", + " # Test 2: Do large operators have bigger average increases?\n", + " if 'large_ops' in locals() and 'small_ops' in locals() and len(large_ops) > 0 and len(small_ops) > 0:\n", + " total_tests += 1\n", + " if large_ops['delay_change'].mean() > small_ops['delay_change'].mean():\n", + " evidence_count += 1\n", + " print(\"āœ… Large operators had bigger delay increases than small operators\")\n", + " else:\n", + " print(\"āŒ Small operators had bigger delay increases than large operators\")\n", + " \n", + " # Test 3: Do larger spills have bigger increases?\n", + " if 'size_period_analysis' in locals() and 'change' in size_period_analysis.columns:\n", + " total_tests += 1\n", + " if size_period_analysis['change'].loc['Major (50+ bbl)'] > size_period_analysis['change'].mean():\n", + " evidence_count += 1\n", + " print(\"āœ… Major spills had above-average delay increases\")\n", + " else:\n", + " print(\"āŒ Major spills did not have above-average delay increases\")\n", + " \n", + " if total_tests > 0:\n", + " resistance_score = (evidence_count / total_tests) * 100\n", + " print(f\"\\nšŸ“Š RESISTANCE HYPOTHESIS SCORE: {resistance_score:.0f}% ({evidence_count}/{total_tests} tests support)\")\n", + " \n", + " if resistance_score >= 67:\n", + " print(\"🚨 STRONG EVIDENCE for systematic industry resistance\")\n", + " elif resistance_score >= 33:\n", + " print(\"āš ļø MODERATE EVIDENCE for industry resistance\")\n", + " else:\n", + " print(\"āœ… LIMITED EVIDENCE for systematic resistance\")\n", + " else:\n", + " print(\"āš ļø Insufficient data to assess resistance hypothesis\")\n", + "\n", + "else:\n", + " print(\"āš ļø Need at least 2 time periods for analysis\")\n", + "\n", + "print(f\"\\nāœ… Analysis complete!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6b8b35c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {