analsyis 10 has rural and industry hyp

2025-08-06 11:58:03 -07:00
parent f4d1166502
commit 389191accd
1 changed files with 539 additions and 0 deletions
--- a/2025/analysis10-2021.ipynb
+++ b/2025/analysis10-2021.ipynb
@@ -1862,6 +1862,545 @@
    "\n",
    "**Bottom line**: The mission change had **net negative effects on delay times**, with rural communities bearing the brunt of the impact. "
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "id": "433ce72e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Columns in spills_gdf:\n",
+      "- Document #\n",
+      "- Report\n",
+      "- Operator\n",
+      "- Operator #\n",
+      "- Tracking #\n",
+      "- Initial Report Date\n",
+      "- Date of Discovery\n",
+      "- spill_type\n",
+      "- Qtr Qtr\n",
+      "- Section\n",
+      "- Township\n",
+      "- range\n",
+      "- meridian\n",
+      "- Latitude\n",
+      "- Longitude\n",
+      "- Municipality\n",
+      "- county\n",
+      "- Facility Type\n",
+      "- Facility ID\n",
+      "- API County Code\n",
+      "- API Sequence Number\n",
+      "- Spilled outside of berms\n",
+      "- More than five barrels spilled\n",
+      "- Oil Spill Volume\n",
+      "- Condensate Spill Volume\n",
+      "- Flow Back Spill Volume\n",
+      "- Produced Water Spill Volume\n",
+      "- E&P Waste Spill Volume\n",
+      "- Other Waste\n",
+      "- Drilling Fluid Spill Volume\n",
+      "- Current Land Use\n",
+      "- Other Land Use\n",
+      "- Weather Conditions\n",
+      "- Surface Owner\n",
+      "- Surface Owner Other\n",
+      "- Waters of the State\n",
+      "- Residence / Occupied Structure\n",
+      "- livestock\n",
+      "- Public Byway\n",
+      "- Surface Water Supply Area\n",
+      "- Spill Description\n",
+      "- Supplemental Report Date\n",
+      "- Oil BBLs Spilled\n",
+      "- Oil BBLs Recovered\n",
+      "- Oil Unknown\n",
+      "- Condensate BBLs Spilled\n",
+      "- Condensate BBLs Recovered\n",
+      "- Condensate Unknown\n",
+      "- Produced Water BBLs Spilled\n",
+      "- Produced Water BBLs Recovered\n",
+      "- Produced Water Unknown\n",
+      "- Drilling Fluid BBLs Spilled\n",
+      "- Drilling Fluid BBLs Recovered\n",
+      "- Drilling Fluid Unknown\n",
+      "- Flow Back Fluid BBLs Spilled\n",
+      "- Flow Back Fluid BBLs Recovered\n",
+      "- Flow Back Fluid Unkown\n",
+      "- Other E&P Waste BBLS Spilled\n",
+      "- Other E&P Waste BBLS Recovered\n",
+      "- Other E&P Waste Unknown\n",
+      "- Other E&P Waste\n",
+      "- Spill Contained within Berm\n",
+      "- Emergency Pit Constructed\n",
+      "- soil\n",
+      "- groundwater\n",
+      "- Surface Water\n",
+      "- Dry Drainage Feature\n",
+      "- Surface Area Length\n",
+      "- Surface Area Width\n",
+      "- Depth of Impact in Feet\n",
+      "- Depth of Impact in Inches\n",
+      "- Area Depth Determined\n",
+      "- Geology Description\n",
+      "- Depth to Groundwater\n",
+      "- Water wells in area\n",
+      "- Water Wells\n",
+      "- Water Wells None\n",
+      "- Surface Water Near\n",
+      "- Surface Water None\n",
+      "- Wetlands\n",
+      "- Wetlands None\n",
+      "- Springs\n",
+      "- Springs None\n",
+      "- Livestock Near\n",
+      "- Livestock None\n",
+      "- Occupied Buildings\n",
+      "- Occupied Buildings None\n",
+      "- Additional Spill Details\n",
+      "- Supplemental Report Date CA\n",
+      "- Human Error\n",
+      "- Equipment Failure\n",
+      "- Historical Unkown\n",
+      "- Other\n",
+      "- Other Description\n",
+      "- Root Cause\n",
+      "- Preventative Measures\n",
+      "- Soil Excavated\n",
+      "- Offsite Disposal\n",
+      "- Onsite Treatment\n",
+      "- Other Disposition\n",
+      "- Other Disposition Description\n",
+      "- Ground Water Removed\n",
+      "- Surface Water Removed\n",
+      "- Corrective Actions Completed\n",
+      "- Approved Form 27\n",
+      "- Form 27 Project Number\n",
+      "- GEOID\n",
+      "- TRACT_NAME\n",
+      "- total_population\n",
+      "- white_population\n",
+      "- hispanic_population\n",
+      "- median_household_income\n",
+      "- poverty_population\n",
+      "- unemployed_population\n",
+      "- percent_white\n",
+      "- percent_hispanic\n",
+      "- percent_poverty\n",
+      "- unemployment_rate\n",
+      "- geometry\n",
+      "- ruca_code\n",
+      "- ruca_description\n",
+      "- rurality\n",
+      "- Report Year\n",
+      "- Period\n",
+      "- report_delay\n",
+      "- report_month\n"
+     ]
+    }
+   ],
+   "source": [
+    "# print columns of spills_gdf\n",
+    "print(\"\\nColumns in spills_gdf:\")\n",
+    "for col in spills_gdf.columns:\n",
+    "    print(f\"- {col}\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "98e01f3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🏭 TESTING INDUSTRY RESISTANCE HYPOTHESIS\n",
+      "============================================================\n",
+      "Checking data structure:\n",
+      "Unique values in Period column: ['2021 and After' 'Before 2021']\n",
+      "Period value counts: Period\n",
+      "2021 and After    7397\n",
+      "Before 2021       3799\n",
+      "Name: count, dtype: int64\n",
+      "Sample of data:\n",
+      "             Operator          Period  report_delay\n",
+      "28     PDC ENERGY INC  2021 and After             1\n",
+      "163    PDC ENERGY INC  2021 and After             3\n",
+      "226  NOBLE ENERGY INC     Before 2021             2\n",
+      "334  NOBLE ENERGY INC     Before 2021             2\n",
+      "342  NOBLE ENERGY INC     Before 2021             0\n",
+      "\n",
+      "1️⃣ OPERATOR-LEVEL RESISTANCE ANALYSIS\n",
+      "---------------------------------------------\n",
+      "Total operators: 117\n",
+      "Spills per operator - Min: 1, Max: 2371\n",
+      "\n",
+      "Operator size distribution:\n",
+      "size_category\n",
+      "Small (1-5)      51\n",
+      "Medium (6-20)    29\n",
+      "Major (50+)      24\n",
+      "Large (21-50)    13\n",
+      "Name: count, dtype: int64\n",
+      "\n",
+      "Analyzing periods: '2021 and After' vs 'Before 2021'\n",
+      "Operator-period combinations: 161\n",
+      "\n",
+      "Average delays by operator size and period:\n",
+      "                             report_delay       Document #\n",
+      "                                     mean count        sum\n",
+      "size_category Period                                      \n",
+      "Small (1-5)   2021 and After        49.30    25         69\n",
+      "              Before 2021            5.85    28         65\n",
+      "Medium (6-20) 2021 and After        10.74    19        137\n",
+      "              Before 2021            4.43    24        216\n",
+      "Large (21-50) 2021 and After         7.99    11        259\n",
+      "              Before 2021            1.89     8        170\n",
+      "Major (50+)   2021 and After         5.19    23       6932\n",
+      "              Before 2021            3.52    23       3348\n",
+      "\n",
+      "2️⃣ OPERATOR DELAY CHANGES\n",
+      "-----------------------------------\n",
+      "Operators with data in both periods: 44\n",
+      "Operators with size data: 44\n",
+      "\n",
+      "🚨 TOP 10 OPERATORS WITH BIGGEST DELAY INCREASES:\n",
+      " 1. KP KAUFFMAN COMPANY INC                            | Change: + 15.0 days | Size: Major (50+)\n",
+      " 2. AKA ENERGY GROUP LLC                               | Change: +  6.6 days | Size: Medium (6-20)\n",
+      " 3. HUNTER RIDGE ENERGY SERVICES LLC                   | Change: +  5.0 days | Size: Small (1-5)\n",
+      " 4. EXTRACTION OIL & GAS INC                           | Change: +  5.0 days | Size: Major (50+)\n",
+      " 5. RED HAWK PETROLEUM LLC                             | Change: +  3.8 days | Size: Medium (6-20)\n",
+      " 6. PETRO MEX RESOURCES                                | Change: +  3.8 days | Size: Medium (6-20)\n",
+      " 7. DCP OPERATING COMPANY LP                           | Change: +  3.6 days | Size: Major (50+)\n",
+      " 8. ENERPLUS RESOURCES (USA) CORPORATION               | Change: +  1.9 days | Size: Medium (6-20)\n",
+      " 9. ANADARKO WATTENBERG OIL COMPLEX LLC                | Change: +  1.8 days | Size: Medium (6-20)\n",
+      "10. GRAND RIVER GATHERING LLC                          | Change: +  1.7 days | Size: Major (50+)\n",
+      "\n",
+      "✅ TOP 10 OPERATORS WITH BIGGEST IMPROVEMENTS:\n",
+      " 1. PETRO OPERATING COMPANY LLC                        | Change: -80.9 days | Size: Medium (6-20)\n",
+      " 2. FOUNDATION ENERGY MANAGEMENT LLC                   | Change: -63.0 days | Size: Large (21-50)\n",
+      " 3. BLUE CHIP OIL INC                                  | Change: -54.1 days | Size: Medium (6-20)\n",
+      " 4. UTAH GAS OP LTD DBA UTAH GAS CORP                  | Change: -23.2 days | Size: Major (50+)\n",
+      " 5. CHEVRON USA INC                                    | Change: -17.3 days | Size: Major (50+)\n",
+      " 6. BAYSWATER EXPLORATION & PRODUCTION LLC             | Change:  -9.7 days | Size: Major (50+)\n",
+      " 7. GREAT WESTERN OPERATING COMPANY LLC                | Change:  -7.4 days | Size: Major (50+)\n",
+      " 8. CRESTONE PEAK RESOURCES OPERATING LLC              | Change:  -3.8 days | Size: Major (50+)\n",
+      " 9. NOBLE ENERGY INC                                   | Change:  -3.3 days | Size: Major (50+)\n",
+      "10. KERR MCGEE GATHERING LLC                           | Change:  -3.0 days | Size: Major (50+)\n",
+      "\n",
+      "3️⃣ OPERATOR SIZE EFFECT ANALYSIS\n",
+      "----------------------------------------\n",
+      "Average delay changes by operator size:\n",
+      "                mean  count    std\n",
+      "size_category                     \n",
+      "Small (1-5)     3.00      2   2.83\n",
+      "Medium (6-20)  -8.61     14  25.61\n",
+      "Large (21-50) -10.17      6  25.90\n",
+      "Major (50+)    -2.05     22   7.59\n",
+      "\n",
+      "🔍 Large vs Small Operators:\n",
+      "   Large operators (n=28): -3.79 days average change\n",
+      "   Small operators (n=16): -7.16 days average change\n",
+      "   Statistical test: t=0.596, p=0.5543 (not significant)\n",
+      "\n",
+      "4️⃣ SPILL SIZE RESISTANCE ANALYSIS\n",
+      "----------------------------------------\n",
+      "Delay changes by spill size:\n",
+      "spill_size_category\n",
+      "Small (0-1 bbl)     NaN\n",
+      "Medium (1-10 bbl)   NaN\n",
+      "Large (10-50 bbl)   NaN\n",
+      "Major (50+ bbl)     NaN\n",
+      "Name: change, dtype: float64\n",
+      "\n",
+      "5️⃣ GEOGRAPHIC RESISTANCE PATTERNS\n",
+      "----------------------------------------\n",
+      "🚨 COUNTIES WITH BIGGEST DELAY INCREASES (min 20 spills):\n",
+      "   GARFIELD                 :  -0.1 days (1437 spills)\n",
+      "   WELD                     :  -1.3 days (9041 spills)\n",
+      "   RIO BLANCO               :  -6.2 days (718 spills)\n",
+      "\n",
+      "6️⃣ RESISTANCE HYPOTHESIS ASSESSMENT\n",
+      "---------------------------------------------\n",
+      "✅ Major operators overrepresented in worst performers\n",
+      "✅ Large operators had bigger delay increases than small operators\n",
+      "❌ Major spills did not have above-average delay increases\n",
+      "\n",
+      "📊 RESISTANCE HYPOTHESIS SCORE: 67% (2/3 tests support)\n",
+      "⚠️  MODERATE EVIDENCE for industry resistance\n",
+      "\n",
+      "✅ Analysis complete!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1241247/2502002660.py:62: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  size_period_summary = operator_period_stats.groupby(['size_category', 'Period']).agg({\n",
+      "/tmp/ipykernel_1241247/2502002660.py:88: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  complete_operators['delay_change'] = complete_operators[after_period] - complete_operators[before_period]\n",
+      "/tmp/ipykernel_1241247/2502002660.py:118: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  size_changes = complete_with_size.groupby('size_category')['delay_change'].agg(['mean', 'count', 'std']).round(2)\n",
+      "/tmp/ipykernel_1241247/2502002660.py:155: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+      "  size_period_analysis = spills_gdf.groupby(['spill_size_category', 'Period'])['report_delay'].mean().unstack()\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import numpy as np\n",
+    "from scipy import stats\n",
+    "import statsmodels.formula.api as smf\n",
+    "\n",
+    "print(\"🏭 TESTING INDUSTRY RESISTANCE HYPOTHESIS\")\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "# First, let's check what Period values we actually have\n",
+    "print(\"Checking data structure:\")\n",
+    "print(\"Unique values in Period column:\", spills_gdf['Period'].unique())\n",
+    "print(\"Period value counts:\", spills_gdf['Period'].value_counts())\n",
+    "print(\"Sample of data:\")\n",
+    "print(spills_gdf[['Operator', 'Period', 'report_delay']].head())\n",
+    "\n",
+    "# 1. OPERATOR-LEVEL RESISTANCE PATTERNS\n",
+    "print(\"\\n1️⃣ OPERATOR-LEVEL RESISTANCE ANALYSIS\")\n",
+    "print(\"-\" * 45)\n",
+    "\n",
+    "# Calculate operator size (number of spills as proxy for company size)\n",
+    "operator_stats = spills_gdf.groupby('Operator').agg({\n",
+    "    'Document #': 'count',\n",
+    "    'report_delay': 'mean'\n",
+    "}).rename(columns={'Document #': 'total_spills', 'report_delay': 'avg_delay'})\n",
+    "\n",
+    "print(f\"Total operators: {len(operator_stats)}\")\n",
+    "print(f\"Spills per operator - Min: {operator_stats['total_spills'].min()}, Max: {operator_stats['total_spills'].max()}\")\n",
+    "\n",
+    "# Categorize operators by size\n",
+    "operator_stats['size_category'] = pd.cut(operator_stats['total_spills'], \n",
+    "                                       bins=[0, 5, 20, 50, float('inf')], \n",
+    "                                       labels=['Small (1-5)', 'Medium (6-20)', 'Large (21-50)', 'Major (50+)'])\n",
+    "\n",
+    "print(\"\\nOperator size distribution:\")\n",
+    "print(operator_stats['size_category'].value_counts())\n",
+    "\n",
+    "# Get period values\n",
+    "period_values = sorted(spills_gdf['Period'].unique())\n",
+    "if len(period_values) >= 2:\n",
+    "    before_period = period_values[0]  \n",
+    "    after_period = period_values[1]   \n",
+    "    print(f\"\\nAnalyzing periods: '{before_period}' vs '{after_period}'\")\n",
+    "    \n",
+    "    # Calculate average delays by operator and period\n",
+    "    operator_period_stats = spills_gdf.groupby(['Operator', 'Period']).agg({\n",
+    "        'report_delay': 'mean',\n",
+    "        'Document #': 'count'\n",
+    "    }).reset_index()\n",
+    "    \n",
+    "    print(f\"Operator-period combinations: {len(operator_period_stats)}\")\n",
+    "    \n",
+    "    # Add size categories by merging\n",
+    "    operator_period_stats = operator_period_stats.merge(\n",
+    "        operator_stats[['size_category']].reset_index(), \n",
+    "        on='Operator', \n",
+    "        how='left'\n",
+    "    )\n",
+    "    \n",
+    "    # Summary by size and period\n",
+    "    size_period_summary = operator_period_stats.groupby(['size_category', 'Period']).agg({\n",
+    "        'report_delay': ['mean', 'count'],\n",
+    "        'Document #': 'sum'\n",
+    "    }).round(2)\n",
+    "    \n",
+    "    print(\"\\nAverage delays by operator size and period:\")\n",
+    "    print(size_period_summary)\n",
+    "    \n",
+    "    # 2. OPERATOR CHANGES ANALYSIS\n",
+    "    print(\"\\n2️⃣ OPERATOR DELAY CHANGES\")\n",
+    "    print(\"-\" * 35)\n",
+    "    \n",
+    "    # Pivot to get before/after for each operator\n",
+    "    operator_pivot = operator_period_stats.pivot_table(\n",
+    "        index='Operator', \n",
+    "        columns='Period', \n",
+    "        values='report_delay', \n",
+    "        aggfunc='mean'\n",
+    "    )\n",
+    "    \n",
+    "    print(f\"Operators with data in both periods: {len(operator_pivot.dropna())}\")\n",
+    "    \n",
+    "    # Only keep operators with data in both periods\n",
+    "    complete_operators = operator_pivot.dropna()\n",
+    "    \n",
+    "    if len(complete_operators) > 0:\n",
+    "        complete_operators['delay_change'] = complete_operators[after_period] - complete_operators[before_period]\n",
+    "        \n",
+    "        # Add operator size info\n",
+    "        complete_with_size = complete_operators.merge(\n",
+    "            operator_stats[['size_category', 'total_spills']], \n",
+    "            left_index=True, \n",
+    "            right_index=True, \n",
+    "            how='left'\n",
+    "        )\n",
+    "        \n",
+    "        print(f\"Operators with size data: {len(complete_with_size)}\")\n",
+    "        \n",
+    "        # Top 10 worst performers\n",
+    "        if len(complete_with_size) >= 10:\n",
+    "            worst_performers = complete_with_size.nlargest(10, 'delay_change')\n",
+    "            print(f\"\\n🚨 TOP 10 OPERATORS WITH BIGGEST DELAY INCREASES:\")\n",
+    "            for idx, (operator, data) in enumerate(worst_performers.iterrows(), 1):\n",
+    "                print(f\"{idx:2d}. {operator[:50]:50s} | Change: +{data['delay_change']:5.1f} days | Size: {data['size_category']}\")\n",
+    "        \n",
+    "        # Best performers\n",
+    "        if len(complete_with_size) >= 10:\n",
+    "            best_performers = complete_with_size.nsmallest(10, 'delay_change')\n",
+    "            print(f\"\\n✅ TOP 10 OPERATORS WITH BIGGEST IMPROVEMENTS:\")\n",
+    "            for idx, (operator, data) in enumerate(best_performers.iterrows(), 1):\n",
+    "                print(f\"{idx:2d}. {operator[:50]:50s} | Change: {data['delay_change']:5.1f} days | Size: {data['size_category']}\")\n",
+    "        \n",
+    "        # 3. SIZE-BASED ANALYSIS\n",
+    "        print(\"\\n3️⃣ OPERATOR SIZE EFFECT ANALYSIS\")\n",
+    "        print(\"-\" * 40)\n",
+    "        \n",
+    "        size_changes = complete_with_size.groupby('size_category')['delay_change'].agg(['mean', 'count', 'std']).round(2)\n",
+    "        print(\"Average delay changes by operator size:\")\n",
+    "        print(size_changes)\n",
+    "        \n",
+    "        # Statistical test: Large vs Small operators\n",
+    "        large_ops = complete_with_size[complete_with_size['size_category'].isin(['Large (21-50)', 'Major (50+)'])]\n",
+    "        small_ops = complete_with_size[complete_with_size['size_category'].isin(['Small (1-5)', 'Medium (6-20)'])]\n",
+    "        \n",
+    "        if len(large_ops) > 0 and len(small_ops) > 0:\n",
+    "            t_stat, p_val = stats.ttest_ind(large_ops['delay_change'], small_ops['delay_change'])\n",
+    "            print(f\"\\n🔍 Large vs Small Operators:\")\n",
+    "            print(f\"   Large operators (n={len(large_ops)}): {large_ops['delay_change'].mean():+.2f} days average change\")\n",
+    "            print(f\"   Small operators (n={len(small_ops)}): {small_ops['delay_change'].mean():+.2f} days average change\")\n",
+    "            print(f\"   Statistical test: t={t_stat:.3f}, p={p_val:.4f} ({'significant' if p_val < 0.05 else 'not significant'})\")\n",
+    "    \n",
+    "    # 4. SPILL SIZE ANALYSIS\n",
+    "    print(\"\\n4️⃣ SPILL SIZE RESISTANCE ANALYSIS\")\n",
+    "    print(\"-\" * 40)\n",
+    "    \n",
+    "    # Create total volume\n",
+    "    volume_cols = ['Oil Spill Volume', 'Condensate Spill Volume', 'Flow Back Spill Volume', \n",
+    "                   'Produced Water Spill Volume', 'E&P Waste Spill Volume', 'Drilling Fluid Spill Volume']\n",
+    "    \n",
+    "    for col in volume_cols:\n",
+    "        if col in spills_gdf.columns:\n",
+    "            spills_gdf[col] = pd.to_numeric(spills_gdf[col], errors='coerce').fillna(0)\n",
+    "    \n",
+    "    # Sum available volume columns\n",
+    "    available_vol_cols = [col for col in volume_cols if col in spills_gdf.columns]\n",
+    "    if available_vol_cols:\n",
+    "        spills_gdf['total_volume'] = spills_gdf[available_vol_cols].sum(axis=1)\n",
+    "        \n",
+    "        spills_gdf['spill_size_category'] = pd.cut(spills_gdf['total_volume'], \n",
+    "                                                  bins=[0, 1, 10, 50, float('inf')], \n",
+    "                                                  labels=['Small (0-1 bbl)', 'Medium (1-10 bbl)', \n",
+    "                                                        'Large (10-50 bbl)', 'Major (50+ bbl)'])\n",
+    "        \n",
+    "        size_period_analysis = spills_gdf.groupby(['spill_size_category', 'Period'])['report_delay'].mean().unstack()\n",
+    "        \n",
+    "        if len(size_period_analysis.columns) == 2:\n",
+    "            size_period_analysis['change'] = size_period_analysis.iloc[:, 1] - size_period_analysis.iloc[:, 0]\n",
+    "            print(\"Delay changes by spill size:\")\n",
+    "            print(size_period_analysis['change'].sort_values(ascending=False).round(2))\n",
+    "    \n",
+    "    # 5. GEOGRAPHIC PATTERNS\n",
+    "    print(\"\\n5️⃣ GEOGRAPHIC RESISTANCE PATTERNS\")\n",
+    "    print(\"-\" * 40)\n",
+    "    \n",
+    "    county_analysis = spills_gdf.groupby(['county', 'Period'])['report_delay'].mean().unstack()\n",
+    "    \n",
+    "    if len(county_analysis.columns) == 2:\n",
+    "        county_analysis['change'] = county_analysis.iloc[:, 1] - county_analysis.iloc[:, 0]\n",
+    "        county_analysis['total_spills'] = spills_gdf.groupby('county').size()\n",
+    "        \n",
+    "        # Filter for counties with significant data\n",
+    "        significant_counties = county_analysis[county_analysis['total_spills'] >= 20]\n",
+    "        \n",
+    "        if len(significant_counties) > 0:\n",
+    "            worst_counties = significant_counties.nlargest(10, 'change')\n",
+    "            print(\"🚨 COUNTIES WITH BIGGEST DELAY INCREASES (min 20 spills):\")\n",
+    "            for county, data in worst_counties.iterrows():\n",
+    "                print(f\"   {county:25s}: {data['change']:+5.1f} days ({data['total_spills']:3.0f} spills)\")\n",
+    "\n",
+    "    # 6. SUMMARY ASSESSMENT\n",
+    "    print(\"\\n6️⃣ RESISTANCE HYPOTHESIS ASSESSMENT\")\n",
+    "    print(\"-\" * 45)\n",
+    "    \n",
+    "    evidence_count = 0\n",
+    "    total_tests = 0\n",
+    "    \n",
+    "    # Test 1: Do major operators show up disproportionately in worst performers?\n",
+    "    if 'worst_performers' in locals() and len(worst_performers) > 0:\n",
+    "        total_tests += 1\n",
+    "        major_in_worst = (worst_performers['size_category'] == 'Major (50+)').sum()\n",
+    "        if major_in_worst >= 3:  # 30% or more\n",
+    "            evidence_count += 1\n",
+    "            print(\"✅ Major operators overrepresented in worst performers\")\n",
+    "        else:\n",
+    "            print(\"❌ Major operators not overrepresented in worst performers\")\n",
+    "    \n",
+    "    # Test 2: Do large operators have bigger average increases?\n",
+    "    if 'large_ops' in locals() and 'small_ops' in locals() and len(large_ops) > 0 and len(small_ops) > 0:\n",
+    "        total_tests += 1\n",
+    "        if large_ops['delay_change'].mean() > small_ops['delay_change'].mean():\n",
+    "            evidence_count += 1\n",
+    "            print(\"✅ Large operators had bigger delay increases than small operators\")\n",
+    "        else:\n",
+    "            print(\"❌ Small operators had bigger delay increases than large operators\")\n",
+    "    \n",
+    "    # Test 3: Do larger spills have bigger increases?\n",
+    "    if 'size_period_analysis' in locals() and 'change' in size_period_analysis.columns:\n",
+    "        total_tests += 1\n",
+    "        if size_period_analysis['change'].loc['Major (50+ bbl)'] > size_period_analysis['change'].mean():\n",
+    "            evidence_count += 1\n",
+    "            print(\"✅ Major spills had above-average delay increases\")\n",
+    "        else:\n",
+    "            print(\"❌ Major spills did not have above-average delay increases\")\n",
+    "    \n",
+    "    if total_tests > 0:\n",
+    "        resistance_score = (evidence_count / total_tests) * 100\n",
+    "        print(f\"\\n📊 RESISTANCE HYPOTHESIS SCORE: {resistance_score:.0f}% ({evidence_count}/{total_tests} tests support)\")\n",
+    "        \n",
+    "        if resistance_score >= 67:\n",
+    "            print(\"🚨 STRONG EVIDENCE for systematic industry resistance\")\n",
+    "        elif resistance_score >= 33:\n",
+    "            print(\"⚠️  MODERATE EVIDENCE for industry resistance\")\n",
+    "        else:\n",
+    "            print(\"✅ LIMITED EVIDENCE for systematic resistance\")\n",
+    "    else:\n",
+    "        print(\"⚠️ Insufficient data to assess resistance hypothesis\")\n",
+    "\n",
+    "else:\n",
+    "    print(\"⚠️ Need at least 2 time periods for analysis\")\n",
+    "\n",
+    "print(f\"\\n✅ Analysis complete!\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6b8b35c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {