analsyis 10 has rural and industry hyp
This commit is contained in:
@@ -1862,6 +1862,545 @@
|
||||
"\n",
|
||||
"**Bottom line**: The mission change had **net negative effects on delay times**, with rural communities bearing the brunt of the impact. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"id": "433ce72e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Columns in spills_gdf:\n",
|
||||
"- Document #\n",
|
||||
"- Report\n",
|
||||
"- Operator\n",
|
||||
"- Operator #\n",
|
||||
"- Tracking #\n",
|
||||
"- Initial Report Date\n",
|
||||
"- Date of Discovery\n",
|
||||
"- spill_type\n",
|
||||
"- Qtr Qtr\n",
|
||||
"- Section\n",
|
||||
"- Township\n",
|
||||
"- range\n",
|
||||
"- meridian\n",
|
||||
"- Latitude\n",
|
||||
"- Longitude\n",
|
||||
"- Municipality\n",
|
||||
"- county\n",
|
||||
"- Facility Type\n",
|
||||
"- Facility ID\n",
|
||||
"- API County Code\n",
|
||||
"- API Sequence Number\n",
|
||||
"- Spilled outside of berms\n",
|
||||
"- More than five barrels spilled\n",
|
||||
"- Oil Spill Volume\n",
|
||||
"- Condensate Spill Volume\n",
|
||||
"- Flow Back Spill Volume\n",
|
||||
"- Produced Water Spill Volume\n",
|
||||
"- E&P Waste Spill Volume\n",
|
||||
"- Other Waste\n",
|
||||
"- Drilling Fluid Spill Volume\n",
|
||||
"- Current Land Use\n",
|
||||
"- Other Land Use\n",
|
||||
"- Weather Conditions\n",
|
||||
"- Surface Owner\n",
|
||||
"- Surface Owner Other\n",
|
||||
"- Waters of the State\n",
|
||||
"- Residence / Occupied Structure\n",
|
||||
"- livestock\n",
|
||||
"- Public Byway\n",
|
||||
"- Surface Water Supply Area\n",
|
||||
"- Spill Description\n",
|
||||
"- Supplemental Report Date\n",
|
||||
"- Oil BBLs Spilled\n",
|
||||
"- Oil BBLs Recovered\n",
|
||||
"- Oil Unknown\n",
|
||||
"- Condensate BBLs Spilled\n",
|
||||
"- Condensate BBLs Recovered\n",
|
||||
"- Condensate Unknown\n",
|
||||
"- Produced Water BBLs Spilled\n",
|
||||
"- Produced Water BBLs Recovered\n",
|
||||
"- Produced Water Unknown\n",
|
||||
"- Drilling Fluid BBLs Spilled\n",
|
||||
"- Drilling Fluid BBLs Recovered\n",
|
||||
"- Drilling Fluid Unknown\n",
|
||||
"- Flow Back Fluid BBLs Spilled\n",
|
||||
"- Flow Back Fluid BBLs Recovered\n",
|
||||
"- Flow Back Fluid Unkown\n",
|
||||
"- Other E&P Waste BBLS Spilled\n",
|
||||
"- Other E&P Waste BBLS Recovered\n",
|
||||
"- Other E&P Waste Unknown\n",
|
||||
"- Other E&P Waste\n",
|
||||
"- Spill Contained within Berm\n",
|
||||
"- Emergency Pit Constructed\n",
|
||||
"- soil\n",
|
||||
"- groundwater\n",
|
||||
"- Surface Water\n",
|
||||
"- Dry Drainage Feature\n",
|
||||
"- Surface Area Length\n",
|
||||
"- Surface Area Width\n",
|
||||
"- Depth of Impact in Feet\n",
|
||||
"- Depth of Impact in Inches\n",
|
||||
"- Area Depth Determined\n",
|
||||
"- Geology Description\n",
|
||||
"- Depth to Groundwater\n",
|
||||
"- Water wells in area\n",
|
||||
"- Water Wells\n",
|
||||
"- Water Wells None\n",
|
||||
"- Surface Water Near\n",
|
||||
"- Surface Water None\n",
|
||||
"- Wetlands\n",
|
||||
"- Wetlands None\n",
|
||||
"- Springs\n",
|
||||
"- Springs None\n",
|
||||
"- Livestock Near\n",
|
||||
"- Livestock None\n",
|
||||
"- Occupied Buildings\n",
|
||||
"- Occupied Buildings None\n",
|
||||
"- Additional Spill Details\n",
|
||||
"- Supplemental Report Date CA\n",
|
||||
"- Human Error\n",
|
||||
"- Equipment Failure\n",
|
||||
"- Historical Unkown\n",
|
||||
"- Other\n",
|
||||
"- Other Description\n",
|
||||
"- Root Cause\n",
|
||||
"- Preventative Measures\n",
|
||||
"- Soil Excavated\n",
|
||||
"- Offsite Disposal\n",
|
||||
"- Onsite Treatment\n",
|
||||
"- Other Disposition\n",
|
||||
"- Other Disposition Description\n",
|
||||
"- Ground Water Removed\n",
|
||||
"- Surface Water Removed\n",
|
||||
"- Corrective Actions Completed\n",
|
||||
"- Approved Form 27\n",
|
||||
"- Form 27 Project Number\n",
|
||||
"- GEOID\n",
|
||||
"- TRACT_NAME\n",
|
||||
"- total_population\n",
|
||||
"- white_population\n",
|
||||
"- hispanic_population\n",
|
||||
"- median_household_income\n",
|
||||
"- poverty_population\n",
|
||||
"- unemployed_population\n",
|
||||
"- percent_white\n",
|
||||
"- percent_hispanic\n",
|
||||
"- percent_poverty\n",
|
||||
"- unemployment_rate\n",
|
||||
"- geometry\n",
|
||||
"- ruca_code\n",
|
||||
"- ruca_description\n",
|
||||
"- rurality\n",
|
||||
"- Report Year\n",
|
||||
"- Period\n",
|
||||
"- report_delay\n",
|
||||
"- report_month\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# print columns of spills_gdf\n",
|
||||
"print(\"\\nColumns in spills_gdf:\")\n",
|
||||
"for col in spills_gdf.columns:\n",
|
||||
" print(f\"- {col}\")\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"id": "98e01f3f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"🏭 TESTING INDUSTRY RESISTANCE HYPOTHESIS\n",
|
||||
"============================================================\n",
|
||||
"Checking data structure:\n",
|
||||
"Unique values in Period column: ['2021 and After' 'Before 2021']\n",
|
||||
"Period value counts: Period\n",
|
||||
"2021 and After 7397\n",
|
||||
"Before 2021 3799\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"Sample of data:\n",
|
||||
" Operator Period report_delay\n",
|
||||
"28 PDC ENERGY INC 2021 and After 1\n",
|
||||
"163 PDC ENERGY INC 2021 and After 3\n",
|
||||
"226 NOBLE ENERGY INC Before 2021 2\n",
|
||||
"334 NOBLE ENERGY INC Before 2021 2\n",
|
||||
"342 NOBLE ENERGY INC Before 2021 0\n",
|
||||
"\n",
|
||||
"1️⃣ OPERATOR-LEVEL RESISTANCE ANALYSIS\n",
|
||||
"---------------------------------------------\n",
|
||||
"Total operators: 117\n",
|
||||
"Spills per operator - Min: 1, Max: 2371\n",
|
||||
"\n",
|
||||
"Operator size distribution:\n",
|
||||
"size_category\n",
|
||||
"Small (1-5) 51\n",
|
||||
"Medium (6-20) 29\n",
|
||||
"Major (50+) 24\n",
|
||||
"Large (21-50) 13\n",
|
||||
"Name: count, dtype: int64\n",
|
||||
"\n",
|
||||
"Analyzing periods: '2021 and After' vs 'Before 2021'\n",
|
||||
"Operator-period combinations: 161\n",
|
||||
"\n",
|
||||
"Average delays by operator size and period:\n",
|
||||
" report_delay Document #\n",
|
||||
" mean count sum\n",
|
||||
"size_category Period \n",
|
||||
"Small (1-5) 2021 and After 49.30 25 69\n",
|
||||
" Before 2021 5.85 28 65\n",
|
||||
"Medium (6-20) 2021 and After 10.74 19 137\n",
|
||||
" Before 2021 4.43 24 216\n",
|
||||
"Large (21-50) 2021 and After 7.99 11 259\n",
|
||||
" Before 2021 1.89 8 170\n",
|
||||
"Major (50+) 2021 and After 5.19 23 6932\n",
|
||||
" Before 2021 3.52 23 3348\n",
|
||||
"\n",
|
||||
"2️⃣ OPERATOR DELAY CHANGES\n",
|
||||
"-----------------------------------\n",
|
||||
"Operators with data in both periods: 44\n",
|
||||
"Operators with size data: 44\n",
|
||||
"\n",
|
||||
"🚨 TOP 10 OPERATORS WITH BIGGEST DELAY INCREASES:\n",
|
||||
" 1. KP KAUFFMAN COMPANY INC | Change: + 15.0 days | Size: Major (50+)\n",
|
||||
" 2. AKA ENERGY GROUP LLC | Change: + 6.6 days | Size: Medium (6-20)\n",
|
||||
" 3. HUNTER RIDGE ENERGY SERVICES LLC | Change: + 5.0 days | Size: Small (1-5)\n",
|
||||
" 4. EXTRACTION OIL & GAS INC | Change: + 5.0 days | Size: Major (50+)\n",
|
||||
" 5. RED HAWK PETROLEUM LLC | Change: + 3.8 days | Size: Medium (6-20)\n",
|
||||
" 6. PETRO MEX RESOURCES | Change: + 3.8 days | Size: Medium (6-20)\n",
|
||||
" 7. DCP OPERATING COMPANY LP | Change: + 3.6 days | Size: Major (50+)\n",
|
||||
" 8. ENERPLUS RESOURCES (USA) CORPORATION | Change: + 1.9 days | Size: Medium (6-20)\n",
|
||||
" 9. ANADARKO WATTENBERG OIL COMPLEX LLC | Change: + 1.8 days | Size: Medium (6-20)\n",
|
||||
"10. GRAND RIVER GATHERING LLC | Change: + 1.7 days | Size: Major (50+)\n",
|
||||
"\n",
|
||||
"✅ TOP 10 OPERATORS WITH BIGGEST IMPROVEMENTS:\n",
|
||||
" 1. PETRO OPERATING COMPANY LLC | Change: -80.9 days | Size: Medium (6-20)\n",
|
||||
" 2. FOUNDATION ENERGY MANAGEMENT LLC | Change: -63.0 days | Size: Large (21-50)\n",
|
||||
" 3. BLUE CHIP OIL INC | Change: -54.1 days | Size: Medium (6-20)\n",
|
||||
" 4. UTAH GAS OP LTD DBA UTAH GAS CORP | Change: -23.2 days | Size: Major (50+)\n",
|
||||
" 5. CHEVRON USA INC | Change: -17.3 days | Size: Major (50+)\n",
|
||||
" 6. BAYSWATER EXPLORATION & PRODUCTION LLC | Change: -9.7 days | Size: Major (50+)\n",
|
||||
" 7. GREAT WESTERN OPERATING COMPANY LLC | Change: -7.4 days | Size: Major (50+)\n",
|
||||
" 8. CRESTONE PEAK RESOURCES OPERATING LLC | Change: -3.8 days | Size: Major (50+)\n",
|
||||
" 9. NOBLE ENERGY INC | Change: -3.3 days | Size: Major (50+)\n",
|
||||
"10. KERR MCGEE GATHERING LLC | Change: -3.0 days | Size: Major (50+)\n",
|
||||
"\n",
|
||||
"3️⃣ OPERATOR SIZE EFFECT ANALYSIS\n",
|
||||
"----------------------------------------\n",
|
||||
"Average delay changes by operator size:\n",
|
||||
" mean count std\n",
|
||||
"size_category \n",
|
||||
"Small (1-5) 3.00 2 2.83\n",
|
||||
"Medium (6-20) -8.61 14 25.61\n",
|
||||
"Large (21-50) -10.17 6 25.90\n",
|
||||
"Major (50+) -2.05 22 7.59\n",
|
||||
"\n",
|
||||
"🔍 Large vs Small Operators:\n",
|
||||
" Large operators (n=28): -3.79 days average change\n",
|
||||
" Small operators (n=16): -7.16 days average change\n",
|
||||
" Statistical test: t=0.596, p=0.5543 (not significant)\n",
|
||||
"\n",
|
||||
"4️⃣ SPILL SIZE RESISTANCE ANALYSIS\n",
|
||||
"----------------------------------------\n",
|
||||
"Delay changes by spill size:\n",
|
||||
"spill_size_category\n",
|
||||
"Small (0-1 bbl) NaN\n",
|
||||
"Medium (1-10 bbl) NaN\n",
|
||||
"Large (10-50 bbl) NaN\n",
|
||||
"Major (50+ bbl) NaN\n",
|
||||
"Name: change, dtype: float64\n",
|
||||
"\n",
|
||||
"5️⃣ GEOGRAPHIC RESISTANCE PATTERNS\n",
|
||||
"----------------------------------------\n",
|
||||
"🚨 COUNTIES WITH BIGGEST DELAY INCREASES (min 20 spills):\n",
|
||||
" GARFIELD : -0.1 days (1437 spills)\n",
|
||||
" WELD : -1.3 days (9041 spills)\n",
|
||||
" RIO BLANCO : -6.2 days (718 spills)\n",
|
||||
"\n",
|
||||
"6️⃣ RESISTANCE HYPOTHESIS ASSESSMENT\n",
|
||||
"---------------------------------------------\n",
|
||||
"✅ Major operators overrepresented in worst performers\n",
|
||||
"✅ Large operators had bigger delay increases than small operators\n",
|
||||
"❌ Major spills did not have above-average delay increases\n",
|
||||
"\n",
|
||||
"📊 RESISTANCE HYPOTHESIS SCORE: 67% (2/3 tests support)\n",
|
||||
"⚠️ MODERATE EVIDENCE for industry resistance\n",
|
||||
"\n",
|
||||
"✅ Analysis complete!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_1241247/2502002660.py:62: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
|
||||
" size_period_summary = operator_period_stats.groupby(['size_category', 'Period']).agg({\n",
|
||||
"/tmp/ipykernel_1241247/2502002660.py:88: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" complete_operators['delay_change'] = complete_operators[after_period] - complete_operators[before_period]\n",
|
||||
"/tmp/ipykernel_1241247/2502002660.py:118: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
|
||||
" size_changes = complete_with_size.groupby('size_category')['delay_change'].agg(['mean', 'count', 'std']).round(2)\n",
|
||||
"/tmp/ipykernel_1241247/2502002660.py:155: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
|
||||
" size_period_analysis = spills_gdf.groupby(['spill_size_category', 'Period'])['report_delay'].mean().unstack()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"import numpy as np\n",
|
||||
"from scipy import stats\n",
|
||||
"import statsmodels.formula.api as smf\n",
|
||||
"\n",
|
||||
"print(\"🏭 TESTING INDUSTRY RESISTANCE HYPOTHESIS\")\n",
|
||||
"print(\"=\" * 60)\n",
|
||||
"\n",
|
||||
"# First, let's check what Period values we actually have\n",
|
||||
"print(\"Checking data structure:\")\n",
|
||||
"print(\"Unique values in Period column:\", spills_gdf['Period'].unique())\n",
|
||||
"print(\"Period value counts:\", spills_gdf['Period'].value_counts())\n",
|
||||
"print(\"Sample of data:\")\n",
|
||||
"print(spills_gdf[['Operator', 'Period', 'report_delay']].head())\n",
|
||||
"\n",
|
||||
"# 1. OPERATOR-LEVEL RESISTANCE PATTERNS\n",
|
||||
"print(\"\\n1️⃣ OPERATOR-LEVEL RESISTANCE ANALYSIS\")\n",
|
||||
"print(\"-\" * 45)\n",
|
||||
"\n",
|
||||
"# Calculate operator size (number of spills as proxy for company size)\n",
|
||||
"operator_stats = spills_gdf.groupby('Operator').agg({\n",
|
||||
" 'Document #': 'count',\n",
|
||||
" 'report_delay': 'mean'\n",
|
||||
"}).rename(columns={'Document #': 'total_spills', 'report_delay': 'avg_delay'})\n",
|
||||
"\n",
|
||||
"print(f\"Total operators: {len(operator_stats)}\")\n",
|
||||
"print(f\"Spills per operator - Min: {operator_stats['total_spills'].min()}, Max: {operator_stats['total_spills'].max()}\")\n",
|
||||
"\n",
|
||||
"# Categorize operators by size\n",
|
||||
"operator_stats['size_category'] = pd.cut(operator_stats['total_spills'], \n",
|
||||
" bins=[0, 5, 20, 50, float('inf')], \n",
|
||||
" labels=['Small (1-5)', 'Medium (6-20)', 'Large (21-50)', 'Major (50+)'])\n",
|
||||
"\n",
|
||||
"print(\"\\nOperator size distribution:\")\n",
|
||||
"print(operator_stats['size_category'].value_counts())\n",
|
||||
"\n",
|
||||
"# Get period values\n",
|
||||
"period_values = sorted(spills_gdf['Period'].unique())\n",
|
||||
"if len(period_values) >= 2:\n",
|
||||
" before_period = period_values[0] \n",
|
||||
" after_period = period_values[1] \n",
|
||||
" print(f\"\\nAnalyzing periods: '{before_period}' vs '{after_period}'\")\n",
|
||||
" \n",
|
||||
" # Calculate average delays by operator and period\n",
|
||||
" operator_period_stats = spills_gdf.groupby(['Operator', 'Period']).agg({\n",
|
||||
" 'report_delay': 'mean',\n",
|
||||
" 'Document #': 'count'\n",
|
||||
" }).reset_index()\n",
|
||||
" \n",
|
||||
" print(f\"Operator-period combinations: {len(operator_period_stats)}\")\n",
|
||||
" \n",
|
||||
" # Add size categories by merging\n",
|
||||
" operator_period_stats = operator_period_stats.merge(\n",
|
||||
" operator_stats[['size_category']].reset_index(), \n",
|
||||
" on='Operator', \n",
|
||||
" how='left'\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Summary by size and period\n",
|
||||
" size_period_summary = operator_period_stats.groupby(['size_category', 'Period']).agg({\n",
|
||||
" 'report_delay': ['mean', 'count'],\n",
|
||||
" 'Document #': 'sum'\n",
|
||||
" }).round(2)\n",
|
||||
" \n",
|
||||
" print(\"\\nAverage delays by operator size and period:\")\n",
|
||||
" print(size_period_summary)\n",
|
||||
" \n",
|
||||
" # 2. OPERATOR CHANGES ANALYSIS\n",
|
||||
" print(\"\\n2️⃣ OPERATOR DELAY CHANGES\")\n",
|
||||
" print(\"-\" * 35)\n",
|
||||
" \n",
|
||||
" # Pivot to get before/after for each operator\n",
|
||||
" operator_pivot = operator_period_stats.pivot_table(\n",
|
||||
" index='Operator', \n",
|
||||
" columns='Period', \n",
|
||||
" values='report_delay', \n",
|
||||
" aggfunc='mean'\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" print(f\"Operators with data in both periods: {len(operator_pivot.dropna())}\")\n",
|
||||
" \n",
|
||||
" # Only keep operators with data in both periods\n",
|
||||
" complete_operators = operator_pivot.dropna()\n",
|
||||
" \n",
|
||||
" if len(complete_operators) > 0:\n",
|
||||
" complete_operators['delay_change'] = complete_operators[after_period] - complete_operators[before_period]\n",
|
||||
" \n",
|
||||
" # Add operator size info\n",
|
||||
" complete_with_size = complete_operators.merge(\n",
|
||||
" operator_stats[['size_category', 'total_spills']], \n",
|
||||
" left_index=True, \n",
|
||||
" right_index=True, \n",
|
||||
" how='left'\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" print(f\"Operators with size data: {len(complete_with_size)}\")\n",
|
||||
" \n",
|
||||
" # Top 10 worst performers\n",
|
||||
" if len(complete_with_size) >= 10:\n",
|
||||
" worst_performers = complete_with_size.nlargest(10, 'delay_change')\n",
|
||||
" print(f\"\\n🚨 TOP 10 OPERATORS WITH BIGGEST DELAY INCREASES:\")\n",
|
||||
" for idx, (operator, data) in enumerate(worst_performers.iterrows(), 1):\n",
|
||||
" print(f\"{idx:2d}. {operator[:50]:50s} | Change: +{data['delay_change']:5.1f} days | Size: {data['size_category']}\")\n",
|
||||
" \n",
|
||||
" # Best performers\n",
|
||||
" if len(complete_with_size) >= 10:\n",
|
||||
" best_performers = complete_with_size.nsmallest(10, 'delay_change')\n",
|
||||
" print(f\"\\n✅ TOP 10 OPERATORS WITH BIGGEST IMPROVEMENTS:\")\n",
|
||||
" for idx, (operator, data) in enumerate(best_performers.iterrows(), 1):\n",
|
||||
" print(f\"{idx:2d}. {operator[:50]:50s} | Change: {data['delay_change']:5.1f} days | Size: {data['size_category']}\")\n",
|
||||
" \n",
|
||||
" # 3. SIZE-BASED ANALYSIS\n",
|
||||
" print(\"\\n3️⃣ OPERATOR SIZE EFFECT ANALYSIS\")\n",
|
||||
" print(\"-\" * 40)\n",
|
||||
" \n",
|
||||
" size_changes = complete_with_size.groupby('size_category')['delay_change'].agg(['mean', 'count', 'std']).round(2)\n",
|
||||
" print(\"Average delay changes by operator size:\")\n",
|
||||
" print(size_changes)\n",
|
||||
" \n",
|
||||
" # Statistical test: Large vs Small operators\n",
|
||||
" large_ops = complete_with_size[complete_with_size['size_category'].isin(['Large (21-50)', 'Major (50+)'])]\n",
|
||||
" small_ops = complete_with_size[complete_with_size['size_category'].isin(['Small (1-5)', 'Medium (6-20)'])]\n",
|
||||
" \n",
|
||||
" if len(large_ops) > 0 and len(small_ops) > 0:\n",
|
||||
" t_stat, p_val = stats.ttest_ind(large_ops['delay_change'], small_ops['delay_change'])\n",
|
||||
" print(f\"\\n🔍 Large vs Small Operators:\")\n",
|
||||
" print(f\" Large operators (n={len(large_ops)}): {large_ops['delay_change'].mean():+.2f} days average change\")\n",
|
||||
" print(f\" Small operators (n={len(small_ops)}): {small_ops['delay_change'].mean():+.2f} days average change\")\n",
|
||||
" print(f\" Statistical test: t={t_stat:.3f}, p={p_val:.4f} ({'significant' if p_val < 0.05 else 'not significant'})\")\n",
|
||||
" \n",
|
||||
" # 4. SPILL SIZE ANALYSIS\n",
|
||||
" print(\"\\n4️⃣ SPILL SIZE RESISTANCE ANALYSIS\")\n",
|
||||
" print(\"-\" * 40)\n",
|
||||
" \n",
|
||||
" # Create total volume\n",
|
||||
" volume_cols = ['Oil Spill Volume', 'Condensate Spill Volume', 'Flow Back Spill Volume', \n",
|
||||
" 'Produced Water Spill Volume', 'E&P Waste Spill Volume', 'Drilling Fluid Spill Volume']\n",
|
||||
" \n",
|
||||
" for col in volume_cols:\n",
|
||||
" if col in spills_gdf.columns:\n",
|
||||
" spills_gdf[col] = pd.to_numeric(spills_gdf[col], errors='coerce').fillna(0)\n",
|
||||
" \n",
|
||||
" # Sum available volume columns\n",
|
||||
" available_vol_cols = [col for col in volume_cols if col in spills_gdf.columns]\n",
|
||||
" if available_vol_cols:\n",
|
||||
" spills_gdf['total_volume'] = spills_gdf[available_vol_cols].sum(axis=1)\n",
|
||||
" \n",
|
||||
" spills_gdf['spill_size_category'] = pd.cut(spills_gdf['total_volume'], \n",
|
||||
" bins=[0, 1, 10, 50, float('inf')], \n",
|
||||
" labels=['Small (0-1 bbl)', 'Medium (1-10 bbl)', \n",
|
||||
" 'Large (10-50 bbl)', 'Major (50+ bbl)'])\n",
|
||||
" \n",
|
||||
" size_period_analysis = spills_gdf.groupby(['spill_size_category', 'Period'])['report_delay'].mean().unstack()\n",
|
||||
" \n",
|
||||
" if len(size_period_analysis.columns) == 2:\n",
|
||||
" size_period_analysis['change'] = size_period_analysis.iloc[:, 1] - size_period_analysis.iloc[:, 0]\n",
|
||||
" print(\"Delay changes by spill size:\")\n",
|
||||
" print(size_period_analysis['change'].sort_values(ascending=False).round(2))\n",
|
||||
" \n",
|
||||
" # 5. GEOGRAPHIC PATTERNS\n",
|
||||
" print(\"\\n5️⃣ GEOGRAPHIC RESISTANCE PATTERNS\")\n",
|
||||
" print(\"-\" * 40)\n",
|
||||
" \n",
|
||||
" county_analysis = spills_gdf.groupby(['county', 'Period'])['report_delay'].mean().unstack()\n",
|
||||
" \n",
|
||||
" if len(county_analysis.columns) == 2:\n",
|
||||
" county_analysis['change'] = county_analysis.iloc[:, 1] - county_analysis.iloc[:, 0]\n",
|
||||
" county_analysis['total_spills'] = spills_gdf.groupby('county').size()\n",
|
||||
" \n",
|
||||
" # Filter for counties with significant data\n",
|
||||
" significant_counties = county_analysis[county_analysis['total_spills'] >= 20]\n",
|
||||
" \n",
|
||||
" if len(significant_counties) > 0:\n",
|
||||
" worst_counties = significant_counties.nlargest(10, 'change')\n",
|
||||
" print(\"🚨 COUNTIES WITH BIGGEST DELAY INCREASES (min 20 spills):\")\n",
|
||||
" for county, data in worst_counties.iterrows():\n",
|
||||
" print(f\" {county:25s}: {data['change']:+5.1f} days ({data['total_spills']:3.0f} spills)\")\n",
|
||||
"\n",
|
||||
" # 6. SUMMARY ASSESSMENT\n",
|
||||
" print(\"\\n6️⃣ RESISTANCE HYPOTHESIS ASSESSMENT\")\n",
|
||||
" print(\"-\" * 45)\n",
|
||||
" \n",
|
||||
" evidence_count = 0\n",
|
||||
" total_tests = 0\n",
|
||||
" \n",
|
||||
" # Test 1: Do major operators show up disproportionately in worst performers?\n",
|
||||
" if 'worst_performers' in locals() and len(worst_performers) > 0:\n",
|
||||
" total_tests += 1\n",
|
||||
" major_in_worst = (worst_performers['size_category'] == 'Major (50+)').sum()\n",
|
||||
" if major_in_worst >= 3: # 30% or more\n",
|
||||
" evidence_count += 1\n",
|
||||
" print(\"✅ Major operators overrepresented in worst performers\")\n",
|
||||
" else:\n",
|
||||
" print(\"❌ Major operators not overrepresented in worst performers\")\n",
|
||||
" \n",
|
||||
" # Test 2: Do large operators have bigger average increases?\n",
|
||||
" if 'large_ops' in locals() and 'small_ops' in locals() and len(large_ops) > 0 and len(small_ops) > 0:\n",
|
||||
" total_tests += 1\n",
|
||||
" if large_ops['delay_change'].mean() > small_ops['delay_change'].mean():\n",
|
||||
" evidence_count += 1\n",
|
||||
" print(\"✅ Large operators had bigger delay increases than small operators\")\n",
|
||||
" else:\n",
|
||||
" print(\"❌ Small operators had bigger delay increases than large operators\")\n",
|
||||
" \n",
|
||||
" # Test 3: Do larger spills have bigger increases?\n",
|
||||
" if 'size_period_analysis' in locals() and 'change' in size_period_analysis.columns:\n",
|
||||
" total_tests += 1\n",
|
||||
" if size_period_analysis['change'].loc['Major (50+ bbl)'] > size_period_analysis['change'].mean():\n",
|
||||
" evidence_count += 1\n",
|
||||
" print(\"✅ Major spills had above-average delay increases\")\n",
|
||||
" else:\n",
|
||||
" print(\"❌ Major spills did not have above-average delay increases\")\n",
|
||||
" \n",
|
||||
" if total_tests > 0:\n",
|
||||
" resistance_score = (evidence_count / total_tests) * 100\n",
|
||||
" print(f\"\\n📊 RESISTANCE HYPOTHESIS SCORE: {resistance_score:.0f}% ({evidence_count}/{total_tests} tests support)\")\n",
|
||||
" \n",
|
||||
" if resistance_score >= 67:\n",
|
||||
" print(\"🚨 STRONG EVIDENCE for systematic industry resistance\")\n",
|
||||
" elif resistance_score >= 33:\n",
|
||||
" print(\"⚠️ MODERATE EVIDENCE for industry resistance\")\n",
|
||||
" else:\n",
|
||||
" print(\"✅ LIMITED EVIDENCE for systematic resistance\")\n",
|
||||
" else:\n",
|
||||
" print(\"⚠️ Insufficient data to assess resistance hypothesis\")\n",
|
||||
"\n",
|
||||
"else:\n",
|
||||
" print(\"⚠️ Need at least 2 time periods for analysis\")\n",
|
||||
"\n",
|
||||
"print(f\"\\n✅ Analysis complete!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f6b8b35c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
Reference in New Issue
Block a user