From fff3019e279a283d428f0c0b60a7c3741b8b5623 Mon Sep 17 00:00:00 2001 From: dadams Date: Wed, 25 Feb 2026 12:49:52 -0800 Subject: [PATCH] Extend inspection/violation data through 2025 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SQL year filters: BETWEEN 2016 AND 2025 - is_budget_year flag: year >= 2024 (covers both 2024 budget estimate and 2025 where no budget data exists) — both excluded from regressions - Data & Methods and Analysis cells updated to reflect 2016-2025 data range with 2016-2023 as the regression sample Co-Authored-By: Claude Sonnet 4.6 --- texas_inspection_expenses.ipynb | 1062 +------------------------------ 1 file changed, 16 insertions(+), 1046 deletions(-) diff --git a/texas_inspection_expenses.ipynb b/texas_inspection_expenses.ipynb index 08bb589..d45867f 100644 --- a/texas_inspection_expenses.ipynb +++ b/texas_inspection_expenses.ipynb @@ -247,208 +247,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "43886f13", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Inspections panel: 117 district-year rows | 13 districts\n" - ] - }, - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "district", - "rawType": "object", - "type": "string" - }, - { - "name": "year", - "rawType": "int64", - "type": "integer" - }, - { - "name": "total_inspections", - "rawType": "int64", - "type": "integer" - }, - { - "name": "unique_wells", - "rawType": "int64", - "type": "integer" - }, - { - "name": "compliance_rate", - "rawType": "float64", - "type": "float" - }, - { - "name": "avg_days_between_inspections", - "rawType": "float64", - "type": "float" - } - ], - "ref": "bb0fbbc5-e333-4e3f-8d87-2fbd4cb4ce5c", - "rows": [ - [ - "0", - "01", - "2016", - "13975", - "4055", - "69.42", - "18.9" - ], - [ - "1", - "01", - "2017", - "18022", - "6153", - "83.52", - "56.8" - ], - [ - "2", - "01", - "2018", - "23826", - "9109", - "85.61", - "53.5" - ], - [ - "3", - "01", - "2019", - "19790", - "6447", - "84.97", - "79.8" - ], - [ - "4", - "01", - "2020", - "26006", - "8716", - "85.52", - "122.9" - ] - ], - "shape": { - "columns": 6, - "rows": 5 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
districtyeartotal_inspectionsunique_wellscompliance_rateavg_days_between_inspections
001201613975405569.4218.90
101201718022615383.5256.80
201201823826910985.6153.50
301201919790644784.9779.80
401202026006871685.52122.90
\n", - "
" - ], - "text/plain": [ - " district year total_inspections unique_wells compliance_rate \\\n", - "0 01 2016 13975 4055 69.42 \n", - "1 01 2017 18022 6153 83.52 \n", - "2 01 2018 23826 9109 85.61 \n", - "3 01 2019 19790 6447 84.97 \n", - "4 01 2020 26006 8716 85.52 \n", - "\n", - " avg_days_between_inspections \n", - "0 18.90 \n", - "1 56.80 \n", - "2 53.50 \n", - "3 79.80 \n", - "4 122.90 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# District-year inspection metrics aggregated in SQL.\n", "# LAG() computes days since the previous inspection for the same well (api_norm).\n", @@ -467,7 +269,7 @@ " FROM inspections\n", " WHERE inspection_date IS NOT NULL\n", " AND district IS NOT NULL\n", - " AND EXTRACT(year FROM inspection_date) BETWEEN 2016 AND 2024\n", + " AND EXTRACT(year FROM inspection_date) BETWEEN 2016 AND 2025\n", ")\n", "SELECT\n", " district,\n", @@ -488,247 +290,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "3841e2f5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Violations panel: 117 district-year rows\n" - ] - }, - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "district", - "rawType": "object", - "type": "string" - }, - { - "name": "year", - "rawType": "int64", - "type": "integer" - }, - { - "name": "total_violations", - "rawType": "int64", - "type": "integer" - }, - { - "name": "unique_wells_with_violations", - "rawType": "int64", - "type": "integer" - }, - { - "name": "major_violations", - "rawType": "int64", - "type": "integer" - }, - { - "name": "resolution_rate", - "rawType": "float64", - "type": "float" - }, - { - "name": "enforcement_rate", - "rawType": "float64", - "type": "float" - }, - { - "name": "avg_days_to_enforcement", - "rawType": "float64", - "type": "float" - } - ], - "ref": "dc5b2fc5-adce-4403-adf4-d5c7d1c2caee", - "rows": [ - [ - "0", - "01", - "2016", - "5720", - "1009", - "0", - "21.42", - "100.0", - "198.6" - ], - [ - "1", - "01", - "2017", - "4380", - "767", - "0", - "44.36", - "100.0", - "269.5" - ], - [ - "2", - "01", - "2018", - "5766", - "997", - "0", - "64.46", - "100.0", - "229.0" - ], - [ - "3", - "01", - "2019", - "3593", - "902", - "4", - "49.37", - "100.0", - "239.0" - ], - [ - "4", - "01", - "2020", - "4838", - "1019", - "5", - "27.43", - "100.0", - "402.9" - ] - ], - "shape": { - "columns": 8, - "rows": 5 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
districtyeartotal_violationsunique_wells_with_violationsmajor_violationsresolution_rateenforcement_rateavg_days_to_enforcement
001201657201009021.42100.00198.60
10120174380767044.36100.00269.50
20120185766997064.46100.00229.00
30120193593902449.37100.00239.00
401202048381019527.43100.00402.90
\n", - "
" - ], - "text/plain": [ - " district year total_violations unique_wells_with_violations \\\n", - "0 01 2016 5720 1009 \n", - "1 01 2017 4380 767 \n", - "2 01 2018 5766 997 \n", - "3 01 2019 3593 902 \n", - "4 01 2020 4838 1019 \n", - "\n", - " major_violations resolution_rate enforcement_rate \\\n", - "0 0 21.42 100.00 \n", - "1 0 44.36 100.00 \n", - "2 0 64.46 100.00 \n", - "3 4 49.37 100.00 \n", - "4 5 27.43 100.00 \n", - "\n", - " avg_days_to_enforcement \n", - "0 198.60 \n", - "1 269.50 \n", - "2 229.00 \n", - "3 239.00 \n", - "4 402.90 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# District-year violation metrics. Blank last_enf_action strings treated as no action.\n", "viol_sql = \"\"\"\n", @@ -750,7 +315,7 @@ "FROM violations\n", "WHERE violation_disc_date IS NOT NULL\n", " AND district IS NOT NULL\n", - " AND EXTRACT(year FROM violation_disc_date) BETWEEN 2016 AND 2024\n", + " AND EXTRACT(year FROM violation_disc_date) BETWEEN 2016 AND 2025\n", "GROUP BY district, year\n", "ORDER BY district, year\n", "\"\"\"\n", @@ -1453,609 +1018,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "896d152b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Analysis panel: 117 rows | 13 districts | 9 years\n" - ] - }, - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "district", - "rawType": "object", - "type": "string" - }, - { - "name": "year", - "rawType": "int64", - "type": "integer" - }, - { - "name": "total_inspections", - "rawType": "int64", - "type": "integer" - }, - { - "name": "unique_wells", - "rawType": "int64", - "type": "integer" - }, - { - "name": "compliance_rate", - "rawType": "float64", - "type": "float" - }, - { - "name": "avg_days_between_inspections", - "rawType": "float64", - "type": "float" - }, - { - "name": "total_violations", - "rawType": "int64", - "type": "integer" - }, - { - "name": "unique_wells_with_violations", - "rawType": "int64", - "type": "integer" - }, - { - "name": "major_violations", - "rawType": "int64", - "type": "integer" - }, - { - "name": "resolution_rate", - "rawType": "float64", - "type": "float" - }, - { - "name": "enforcement_rate", - "rawType": "float64", - "type": "float" - }, - { - "name": "avg_days_to_enforcement", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_total_budget", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_salaries", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_other_personnel", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_professional_fees", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_travel", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_other_operating", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_capital_exp", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_fte", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_total_budget", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_salaries", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_other_personnel", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_professional_fees", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_travel", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_other_operating", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_capital_exp", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_fte", - "rawType": "float64", - "type": "float" - }, - { - "name": "violations_per_inspection", - "rawType": "float64", - "type": "float" - }, - { - "name": "ogi_budget_m", - "rawType": "float64", - "type": "float" - }, - { - "name": "erd_budget_m", - "rawType": "float64", - "type": "float" - }, - { - "name": "post_2019", - "rawType": "int64", - "type": "integer" - }, - { - "name": "is_budget_year", - "rawType": "int64", - "type": "integer" - }, - { - "name": "inspection_budget_share", - "rawType": "float64", - "type": "float" - } - ], - "ref": "a03434d6-237f-4e4c-bfd6-df51f8595da7", - "rows": [ - [ - "0", - "01", - "2016", - "13975", - "4055", - "69.42", - "18.9", - "5720", - "1009", - "0", - "21.42", - "100.0", - "198.6", - "18471666.0", - "15080122.0", - "685768.0", - "1546321.0", - "22630.0", - "208311.0", - "121363.0", - "256.7", - "11708475.0", - "7669719.0", - "398589.0", - "3366389.0", - "16477.0", - "210293.0", - "0.0", - "130.6", - "0.40930232558139534", - "18.471666", - "11.708475", - "0", - "0", - "0.612047041132114" - ], - [ - "1", - "01", - "2017", - "18022", - "6153", - "83.52", - "56.8", - "4380", - "767", - "0", - "44.36", - "100.0", - "269.5", - "17204058.0", - "15086262.0", - "686194.0", - "176786.0", - "19654.0", - "230525.0", - "272461.0", - "249.5", - "10911094.0", - "7273775.0", - "389348.0", - "3118066.0", - "6792.0", - "77855.0", - "0.0", - "120.3", - "0.24303628898013538", - "17.204058", - "10.911094", - "0", - "0", - "0.6119141024028609" - ], - [ - "2", - "01", - "2018", - "23826", - "9109", - "85.61", - "53.5", - "5766", - "997", - "0", - "64.46", - "100.0", - "229.0", - "17562431.0", - "13083406.0", - "430429.0", - "1147080.0", - "57312.0", - "1040639.0", - "649172.0", - "229.9", - "9846886.0", - "7292933.0", - "282337.0", - "977645.0", - "28694.0", - "1045727.0", - "0.0", - "131.0", - "0.24200453286325863", - "17.562431", - "9.846886", - "0", - "0", - "0.6407467577539419" - ], - [ - "3", - "01", - "2019", - "19790", - "6447", - "84.97", - "79.8", - "3593", - "902", - "4", - "49.37", - "100.0", - "239.0", - "21951747.0", - "14878875.0", - "340135.0", - "2895436.0", - "187048.0", - "1185772.0", - "1255930.0", - "255.6", - "11123757.0", - "8068497.0", - "217988.0", - "1493755.0", - "73651.0", - "988740.0", - "13232.0", - "137.4", - "0.18155634158665993", - "21.951747", - "11.123757", - "1", - "0", - "0.6636859411121898" - ], - [ - "4", - "01", - "2020", - "26006", - "8716", - "85.52", - "122.9", - "4838", - "1019", - "5", - "27.43", - "100.0", - "402.9", - "26057560.0", - "17228302.0", - "417683.0", - "4822351.0", - "106428.0", - "1398705.0", - "896846.0", - "284.0", - "17280569.0", - "9707894.0", - "236356.0", - "5989236.0", - "41752.0", - "1165481.0", - "54037.0", - "153.4", - "0.18603399215565639", - "26.05756", - "17.280569", - "1", - "0", - "0.6012617665151165" - ] - ], - "shape": { - "columns": 34, - "rows": 5 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
districtyeartotal_inspectionsunique_wellscompliance_rateavg_days_between_inspectionstotal_violationsunique_wells_with_violationsmajor_violationsresolution_rate...erd_travelerd_other_operatingerd_capital_experd_fteviolations_per_inspectionogi_budget_merd_budget_mpost_2019is_budget_yearinspection_budget_share
001201613975405569.4218.9057201009021.42...16,477.00210,293.000.00130.600.4118.4711.71000.61
101201718022615383.5256.804380767044.36...6,792.0077,855.000.00120.300.2417.2010.91000.61
201201823826910985.6153.505766997064.46...28,694.001,045,727.000.00131.000.2417.569.85000.64
301201919790644784.9779.803593902449.37...73,651.00988,740.0013,232.00137.400.1821.9511.12100.66
401202026006871685.52122.9048381019527.43...41,752.001,165,481.0054,037.00153.400.1926.0617.28100.60
\n", - "

5 rows × 34 columns

\n", - "
" - ], - "text/plain": [ - " district year total_inspections unique_wells compliance_rate \\\n", - "0 01 2016 13975 4055 69.42 \n", - "1 01 2017 18022 6153 83.52 \n", - "2 01 2018 23826 9109 85.61 \n", - "3 01 2019 19790 6447 84.97 \n", - "4 01 2020 26006 8716 85.52 \n", - "\n", - " avg_days_between_inspections total_violations \\\n", - "0 18.90 5720 \n", - "1 56.80 4380 \n", - "2 53.50 5766 \n", - "3 79.80 3593 \n", - "4 122.90 4838 \n", - "\n", - " unique_wells_with_violations major_violations resolution_rate ... \\\n", - "0 1009 0 21.42 ... \n", - "1 767 0 44.36 ... \n", - "2 997 0 64.46 ... \n", - "3 902 4 49.37 ... \n", - "4 1019 5 27.43 ... \n", - "\n", - " erd_travel erd_other_operating erd_capital_exp erd_fte \\\n", - "0 16,477.00 210,293.00 0.00 130.60 \n", - "1 6,792.00 77,855.00 0.00 120.30 \n", - "2 28,694.00 1,045,727.00 0.00 131.00 \n", - "3 73,651.00 988,740.00 13,232.00 137.40 \n", - "4 41,752.00 1,165,481.00 54,037.00 153.40 \n", - "\n", - " violations_per_inspection ogi_budget_m erd_budget_m post_2019 \\\n", - "0 0.41 18.47 11.71 0 \n", - "1 0.24 17.20 10.91 0 \n", - "2 0.24 17.56 9.85 0 \n", - "3 0.18 21.95 11.12 1 \n", - "4 0.19 26.06 17.28 1 \n", - "\n", - " is_budget_year inspection_budget_share \n", - "0 0 0.61 \n", - "1 0 0.61 \n", - "2 0 0.64 \n", - "3 0 0.66 \n", - "4 0 0.60 \n", - "\n", - "[5 rows x 34 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# ── Wide budget: one row per year with ogi_ / erd_ prefixed columns ──────────\n", "ogi_wide = ogi.drop(columns=\"strategy\").add_prefix(\"ogi_\")\n", @@ -2080,7 +1046,8 @@ "panel[\"ogi_budget_m\"] = panel[\"ogi_total_budget\"] / 1_000_000 # dollars → millions\n", "panel[\"erd_budget_m\"] = panel[\"erd_total_budget\"] / 1_000_000\n", "panel[\"post_2019\"] = (panel[\"year\"] >= 2019).astype(int)\n", - "panel[\"is_budget_year\"] = (panel[\"year\"] == 2024).astype(int)\n", + "# 2024 = budget estimate; 2025 = no budget data — exclude both from regressions\n", + "panel[\"is_budget_year\"] = (panel[\"year\"] >= 2024).astype(int)\n", "\n", "# Goal ambiguity: share of combined budget going to the inspection mission.\n", "# Higher share = clearer mission focus; lower share = more goal ambiguity.\n", @@ -2233,7 +1200,7 @@ "\n", "This study draws on two primary data sources. The first is the Texas Railroad Commission\n", "(RRC) Oil and Gas Division administrative database, accessed via a PostGIS spatial data\n", - "warehouse. Inspection records span fiscal years 2016–2023 and encompass approximately\n", + "warehouse. Inspection records span fiscal years 2016–2025 and encompass approximately\n", "1.9 million inspection events distributed across 13 RRC administrative districts;\n", "violation records include approximately 193,000 enforcement actions. From the inspections\n", "table, district-year aggregates are constructed for three regulatory output measures:\n", @@ -2259,7 +1226,9 @@ "### Sample and Panel Construction\n", "\n", "The unit of analysis is the **district-year**. The analytic panel contains\n", - "**N = 104 observations** (13 districts × 8 years, 2016–2023). Because RRC budget\n", + "**N = 104 observations** (13 districts × 8 years, 2016–2023; years 2024 and\n", + "2025 are included in descriptive analyses but excluded from regression models\n", + "because budget actuals are unavailable for those years). Because RRC budget\n", "appropriations are reported at the statewide level, budget and FTE variables enter the\n", "panel as year-varying but district-invariant covariates. Identification of budget effects\n", "therefore relies on year-to-year variation in statewide appropriations rather than\n", @@ -2394,7 +1363,8 @@ "variation that could indicate omitted spatial processes or spillovers across district\n", "boundaries.\n", "\n", - "All regressions exclude the fiscal year 2024 observation. Fiscal year 2017 recorded the\n", + "All regressions exclude fiscal years 2024 (budget estimate) and 2025 (no\n", + "budget data), retaining 2016–2023 as the regression sample. Fiscal year 2017 recorded the\n", "lowest OGI budget over the study period ($17.20M) and is retained as a within-sample\n", "data point; the 2017 dip in budget coincides with slightly lower average inspections per\n", "district, consistent with the capacity hypothesis.\n"