updated analaysis with offshore considerations
This commit is contained in:
635
analysis/archive/well_analyzer_templates.ipynb
Normal file
635
analysis/archive/well_analyzer_templates.ipynb
Normal file
@@ -0,0 +1,635 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Well Analyzer Notebook Templates\n",
|
||||
"Use these cells as starting points for future analysis notebooks."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Imports & Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"repo_root = Path('..').resolve()\n",
|
||||
"if str(repo_root) not in os.sys.path:\n",
|
||||
" os.sys.path.insert(0, str(repo_root))\n",
|
||||
"\n",
|
||||
"from analysis.well_analyzer import WellAnalyzer\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Instantiate the analyzer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-11-08 20:31:14,124 - INFO - Connecting to Postgres\n",
|
||||
"2025-11-08 20:32:36,129 - INFO - Loaded 1010431 wells from public.well_enriched_all_plus\n",
|
||||
"2025-11-08 20:32:55,260 - INFO - Loaded 2151839 inspections from public.inspections\n",
|
||||
"2025-11-08 20:32:58,951 - INFO - Loaded 242899 violations from public.violations\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'total_wells': 1010431,\n",
|
||||
" 'unique_census_tracts': 2981,\n",
|
||||
" 'total_inspections': 2151839,\n",
|
||||
" 'total_violations': 242899}"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"analyzer = WellAnalyzer(chunk_size=50_000)\n",
|
||||
"summary_stats = analyzer.get_summary_stats()\n",
|
||||
"summary_stats\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Summary stats as DataFrame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<style type=\"text/css\">\n",
|
||||
"</style>\n",
|
||||
"<table id=\"T_fcd18\">\n",
|
||||
" <thead>\n",
|
||||
" <tr>\n",
|
||||
" <th class=\"blank level0\" > </th>\n",
|
||||
" <th id=\"T_fcd18_level0_col0\" class=\"col_heading level0 col0\" >value</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th id=\"T_fcd18_level0_row0\" class=\"row_heading level0 row0\" >total_wells</th>\n",
|
||||
" <td id=\"T_fcd18_row0_col0\" class=\"data row0 col0\" >1,010,431.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th id=\"T_fcd18_level0_row1\" class=\"row_heading level0 row1\" >unique_census_tracts</th>\n",
|
||||
" <td id=\"T_fcd18_row1_col0\" class=\"data row1 col0\" >2,981.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th id=\"T_fcd18_level0_row2\" class=\"row_heading level0 row2\" >total_inspections</th>\n",
|
||||
" <td id=\"T_fcd18_row2_col0\" class=\"data row2 col0\" >2,151,839.00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th id=\"T_fcd18_level0_row3\" class=\"row_heading level0 row3\" >total_violations</th>\n",
|
||||
" <td id=\"T_fcd18_row3_col0\" class=\"data row3 col0\" >242,899.00</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n"
|
||||
],
|
||||
"text/plain": [
|
||||
"<pandas.io.formats.style.Styler at 0x7f623f6cacf0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pd.DataFrame([summary_stats]).T.rename(columns={0: 'value'}).style.format({'value': '{:,.2f}'})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Inspection analysis helpers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||||
"columns": [
|
||||
{
|
||||
"name": "index",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "value",
|
||||
"rawType": "float64",
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"ref": "648d5bab-d6a9-4e87-95a4-d81a3087ad63",
|
||||
"rows": [
|
||||
[
|
||||
"total_inspections",
|
||||
"2151839.0"
|
||||
],
|
||||
[
|
||||
"unique_wells_inspected",
|
||||
"483352.0"
|
||||
],
|
||||
[
|
||||
"overall_compliance_rate",
|
||||
"89.15035000295096"
|
||||
],
|
||||
[
|
||||
"avg_days_between_inspections",
|
||||
"548.291420910082"
|
||||
],
|
||||
[
|
||||
"median_days_between_inspections",
|
||||
"322.0"
|
||||
]
|
||||
],
|
||||
"shape": {
|
||||
"columns": 1,
|
||||
"rows": 5
|
||||
}
|
||||
},
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>value</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>total_inspections</th>\n",
|
||||
" <td>2.151839e+06</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>unique_wells_inspected</th>\n",
|
||||
" <td>4.833520e+05</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>overall_compliance_rate</th>\n",
|
||||
" <td>8.915035e+01</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>avg_days_between_inspections</th>\n",
|
||||
" <td>5.482914e+02</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>median_days_between_inspections</th>\n",
|
||||
" <td>3.220000e+02</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" value\n",
|
||||
"total_inspections 2.151839e+06\n",
|
||||
"unique_wells_inspected 4.833520e+05\n",
|
||||
"overall_compliance_rate 8.915035e+01\n",
|
||||
"avg_days_between_inspections 5.482914e+02\n",
|
||||
"median_days_between_inspections 3.220000e+02"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inspection_analysis = analyzer.analyze_inspection_patterns()\n",
|
||||
"pd.Series(inspection_analysis['overall_statistics']).to_frame('value')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Violations slice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||||
"columns": [
|
||||
{
|
||||
"name": "index",
|
||||
"rawType": "int64",
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"name": "canonical_api10",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "violation_disc_date",
|
||||
"rawType": "datetime64[ns]",
|
||||
"type": "datetime"
|
||||
},
|
||||
{
|
||||
"name": "violated_rule",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "major_viol_ind",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"ref": "7a3969db-0981-48f8-846b-31946d7c0e64",
|
||||
"rows": [
|
||||
[
|
||||
"0",
|
||||
"4233530876",
|
||||
"2017-09-19 00:00:00",
|
||||
"SWR 91(d)(1)",
|
||||
"N"
|
||||
],
|
||||
[
|
||||
"1",
|
||||
"4233532284",
|
||||
"2017-07-26 00:00:00",
|
||||
"SWR 91(d)(1)",
|
||||
"N"
|
||||
],
|
||||
[
|
||||
"2",
|
||||
"4233532284",
|
||||
"2017-09-13 00:00:00",
|
||||
"SWR 91(d)(1)",
|
||||
"N"
|
||||
],
|
||||
[
|
||||
"3",
|
||||
"4210300169",
|
||||
"2017-10-25 00:00:00",
|
||||
"SWR 91(d)(1)",
|
||||
"N"
|
||||
],
|
||||
[
|
||||
"4",
|
||||
"4222736906",
|
||||
"2016-02-02 00:00:00",
|
||||
"SWR 91(d)(1)",
|
||||
"N"
|
||||
]
|
||||
],
|
||||
"shape": {
|
||||
"columns": 4,
|
||||
"rows": 5
|
||||
}
|
||||
},
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>canonical_api10</th>\n",
|
||||
" <th>violation_disc_date</th>\n",
|
||||
" <th>violated_rule</th>\n",
|
||||
" <th>major_viol_ind</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>4233530876</td>\n",
|
||||
" <td>2017-09-19</td>\n",
|
||||
" <td>SWR 91(d)(1)</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>4233532284</td>\n",
|
||||
" <td>2017-07-26</td>\n",
|
||||
" <td>SWR 91(d)(1)</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>4233532284</td>\n",
|
||||
" <td>2017-09-13</td>\n",
|
||||
" <td>SWR 91(d)(1)</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4210300169</td>\n",
|
||||
" <td>2017-10-25</td>\n",
|
||||
" <td>SWR 91(d)(1)</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>4222736906</td>\n",
|
||||
" <td>2016-02-02</td>\n",
|
||||
" <td>SWR 91(d)(1)</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" canonical_api10 violation_disc_date violated_rule major_viol_ind\n",
|
||||
"0 4233530876 2017-09-19 SWR 91(d)(1) N\n",
|
||||
"1 4233532284 2017-07-26 SWR 91(d)(1) N\n",
|
||||
"2 4233532284 2017-09-13 SWR 91(d)(1) N\n",
|
||||
"3 4210300169 2017-10-25 SWR 91(d)(1) N\n",
|
||||
"4 4222736906 2016-02-02 SWR 91(d)(1) N"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"violations_df = analyzer.data['violations'][['canonical_api10', 'violation_disc_date', 'violated_rule', 'major_viol_ind']]\n",
|
||||
"violations_df.head()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Environmental-justice aggregation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||||
"columns": [
|
||||
{
|
||||
"name": "index",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "high",
|
||||
"rawType": "float64",
|
||||
"type": "float"
|
||||
},
|
||||
{
|
||||
"name": "low",
|
||||
"rawType": "float64",
|
||||
"type": "float"
|
||||
}
|
||||
],
|
||||
"ref": "5ab7f2d3-3c34-4a70-83f8-23a6929c2e30",
|
||||
"rows": [
|
||||
[
|
||||
"avg_inspections",
|
||||
"5.668602168650754",
|
||||
"5.8424998043529195"
|
||||
],
|
||||
[
|
||||
"avg_violations",
|
||||
"0.6798208564386784",
|
||||
"0.7418603711839766"
|
||||
],
|
||||
[
|
||||
"major_violations",
|
||||
"0.01963439404197698",
|
||||
"0.02503382949932341"
|
||||
],
|
||||
[
|
||||
"avg_compliance_rate",
|
||||
"91.80852389969775",
|
||||
"91.22339687712729"
|
||||
],
|
||||
[
|
||||
"avg_days_between_inspections",
|
||||
"757.1575143021564",
|
||||
"732.3780043474643"
|
||||
],
|
||||
[
|
||||
"reinspection_compliance_rate",
|
||||
"13.868187092556035",
|
||||
"14.88125749212477"
|
||||
],
|
||||
[
|
||||
"wells_in_tract",
|
||||
"307.38253215978335",
|
||||
"374.30311231393773"
|
||||
]
|
||||
],
|
||||
"shape": {
|
||||
"columns": 2,
|
||||
"rows": 7
|
||||
}
|
||||
},
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>high</th>\n",
|
||||
" <th>low</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>avg_inspections</th>\n",
|
||||
" <td>5.668602</td>\n",
|
||||
" <td>5.842500</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>avg_violations</th>\n",
|
||||
" <td>0.679821</td>\n",
|
||||
" <td>0.741860</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>major_violations</th>\n",
|
||||
" <td>0.019634</td>\n",
|
||||
" <td>0.025034</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>avg_compliance_rate</th>\n",
|
||||
" <td>91.808524</td>\n",
|
||||
" <td>91.223397</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>avg_days_between_inspections</th>\n",
|
||||
" <td>757.157514</td>\n",
|
||||
" <td>732.378004</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>reinspection_compliance_rate</th>\n",
|
||||
" <td>13.868187</td>\n",
|
||||
" <td>14.881257</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>wells_in_tract</th>\n",
|
||||
" <td>307.382532</td>\n",
|
||||
" <td>374.303112</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" high low\n",
|
||||
"avg_inspections 5.668602 5.842500\n",
|
||||
"avg_violations 0.679821 0.741860\n",
|
||||
"major_violations 0.019634 0.025034\n",
|
||||
"avg_compliance_rate 91.808524 91.223397\n",
|
||||
"avg_days_between_inspections 757.157514 732.378004\n",
|
||||
"reinspection_compliance_rate 13.868187 14.881257\n",
|
||||
"wells_in_tract 307.382532 374.303112"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ej = analyzer.analyze_environmental_justice()\n",
|
||||
"pd.DataFrame(ej['high_vulnerability_vs_low']).T\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c233c217",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. District comparisons\n",
|
||||
"Group inspections by district (alphanumeric-safe) to see volume + compliance deltas."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4ada35c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"insp = analyzer.data['inspections'].copy()\n",
|
||||
"if 'district' not in insp.columns:\n",
|
||||
" raise KeyError('district column missing in inspections data')\n",
|
||||
"\n",
|
||||
"insp['district_str'] = insp['district'].astype(str).fillna('Unknown')\n",
|
||||
"summary = insp.groupby('district_str').agg(\n",
|
||||
" inspections=('district_str', 'size'),\n",
|
||||
" unique_wells=('canonical_api10', 'nunique'),\n",
|
||||
" compliance_rate=('compliance', lambda x: (x == 'Yes').mean() * 100 if 'Yes' in x.values else float('nan'))\n",
|
||||
")\n",
|
||||
"summary = summary.sort_values('inspections', ascending=False)\n",
|
||||
"summary.head(15)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user