Initial commit
This commit is contained in:
607
rebuild/add_shape_layers.ipynb
Normal file
607
rebuild/add_shape_layers.ipynb
Normal file
@@ -0,0 +1,607 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e095f37f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Connected to texas_data at localhost:5432 as postgres\n",
|
||||
"{'well_tables_found': ['well_shape_tract', 'well_shapes']}\n",
|
||||
"{'using_well_table': 'well_shape_tract'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Cell 1: Imports & database connection setup\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import geopandas as gpd\n",
|
||||
"from sqlalchemy import create_engine, text\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"PGHOST = os.getenv(\"PGHOST\", \"localhost\")\n",
|
||||
"PGPORT = os.getenv(\"PGPORT\", \"5432\")\n",
|
||||
"PGUSER = os.getenv(\"PGUSER\", \"postgres\")\n",
|
||||
"PGPASSWORD = os.getenv(\"PGPASSWORD\", \"\")\n",
|
||||
"PGDATABASE = os.getenv(\"PGDATABASE\", \"texas_data\") # default; we may switch if target DB differs\n",
|
||||
"\n",
|
||||
"pg_url = f\"postgresql+psycopg2://{PGUSER}:{PGPASSWORD}@{PGHOST}:{PGPORT}/{PGDATABASE}\"\n",
|
||||
"engine = create_engine(pg_url)\n",
|
||||
"print(f\"Connected to {PGDATABASE} at {PGHOST}:{PGPORT} as {PGUSER}\")\n",
|
||||
"\n",
|
||||
"# Detect candidate well tables\n",
|
||||
"candidate_well_tables = [\"well_shape_tract\", \"well_shapes\", \"wells\", \"rcc_well_api_raw\"]\n",
|
||||
"existing_well_tables = []\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" rows = conn.execute(text(\"\"\"\n",
|
||||
" SELECT table_name\n",
|
||||
" FROM information_schema.tables\n",
|
||||
" WHERE table_schema='public'\n",
|
||||
" \"\"\")).fetchall()\n",
|
||||
" present = {r[0] for r in rows}\n",
|
||||
" for t in candidate_well_tables:\n",
|
||||
" if t in present:\n",
|
||||
" existing_well_tables.append(t)\n",
|
||||
"\n",
|
||||
"print({\"well_tables_found\": existing_well_tables})\n",
|
||||
"# Select preferred well source by priority order\n",
|
||||
"preferred_order = [\"well_shape_tract\", \"well_shapes\", \"wells\", \"rcc_well_api_raw\"]\n",
|
||||
"well_table = None\n",
|
||||
"for t in preferred_order:\n",
|
||||
" if t in existing_well_tables:\n",
|
||||
" well_table = t\n",
|
||||
" break\n",
|
||||
"if well_table is None:\n",
|
||||
" raise RuntimeError(\"No well table found among candidates; ensure wells are loaded first.\")\n",
|
||||
"print({\"using_well_table\": well_table})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cd8aea17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cell 2: Helper to read and standardize a shapefile layer to EPSG:4326\n",
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"def load_layer(path: Path, layer_name: str, name_field_candidates=None) -> gpd.GeoDataFrame:\n",
|
||||
" if name_field_candidates is None:\n",
|
||||
" name_field_candidates = [\"name\", \"Name\", \"Basin\", \"BASIN\", \"Play\", \"PLAY\", \"Region\", \"REGION\", \"TITLE\"]\n",
|
||||
" if not path.exists():\n",
|
||||
" raise FileNotFoundError(f\"Path does not exist: {path}\")\n",
|
||||
" gdf = gpd.read_file(path)\n",
|
||||
" # Reproject if needed\n",
|
||||
" if gdf.crs is None:\n",
|
||||
" print(f\"[warn] {layer_name} has no CRS; assuming EPSG:4326\")\n",
|
||||
" gdf = gdf.set_crs(4326)\n",
|
||||
" if gdf.crs.to_epsg() != 4326:\n",
|
||||
" gdf = gdf.to_crs(4326)\n",
|
||||
" # Pick a name field\n",
|
||||
" chosen = None\n",
|
||||
" for cand in name_field_candidates:\n",
|
||||
" if cand in gdf.columns:\n",
|
||||
" chosen = cand\n",
|
||||
" break\n",
|
||||
" if chosen is None:\n",
|
||||
" # Create a synthetic name\n",
|
||||
" gdf[\"feature_name\"] = gdf.index.astype(str)\n",
|
||||
" chosen = \"feature_name\"\n",
|
||||
" gdf = gdf.rename(columns={chosen: \"feature_name\"})\n",
|
||||
" gdf = gdf[[\"feature_name\", \"geometry\"]].copy()\n",
|
||||
" gdf[\"feature_name\"] = gdf[\"feature_name\"].astype(str).str.strip()\n",
|
||||
" gdf = gdf[gdf[\"geometry\"].notna()]\n",
|
||||
" gdf[\"feature_name\"] = gdf[\"feature_name\"].replace({\"\": None})\n",
|
||||
" print({\"layer\": layer_name, \"features\": len(gdf)})\n",
|
||||
" return gdf\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6bb77203",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'basin_shp': '/home/dadams/Repos/texas-rebuild-postgis/data/oil_gas_basin_shape/Tx_ShaleBasins_11_USEIA.shp', 'play_shp': '/home/dadams/Repos/texas-rebuild-postgis/data/shale_play_shape/Tx_ShalePlays_11_USEIA.shp', 'texmex_shp': '/home/dadams/Repos/texas-rebuild-postgis/data/texmex_shape/tl_2023_us_internationalboundary.shp'}\n",
|
||||
"{'layer': 'basins', 'features': 9}\n",
|
||||
"{'layer': 'shale_plays', 'features': 10}\n",
|
||||
"{'layer': 'texmex', 'features': 4759}\n",
|
||||
"{'basins_rows': 9, 'plays_rows': 10, 'texmex_rows': 4759}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Cell 3: Load basin, shale play, and texmex regions (robust discovery + diagnostics)\n",
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"base_dir = Path(\"data\")\n",
|
||||
"basin_dir = base_dir / \"oil_gas_basin_shape\"\n",
|
||||
"play_dir = base_dir / \"shale_play_shape\"\n",
|
||||
"texmex_dir = base_dir / \"texmex_shape\" # now pointing to data/texmex_shape\n",
|
||||
"\n",
|
||||
"# Provide direct shapefile paths (user-specified)\n",
|
||||
"DIRECT_BASIN_SHP = Path(\"/home/dadams/Repos/texas-rebuild-postgis/data/oil_gas_basin_shape/Tx_ShaleBasins_11_USEIA.shp\")\n",
|
||||
"DIRECT_PLAY_SHP = Path(\"/home/dadams/Repos/texas-rebuild-postgis/data/shale_play_shape/Tx_ShalePlays_11_USEIA.shp\")\n",
|
||||
"DIRECT_TEXMEX_SHP = Path(\"/home/dadams/Repos/texas-rebuild-postgis/data/texmex_shape/tl_2023_us_internationalboundary.shp\")\n",
|
||||
"\n",
|
||||
"# Utility: list all shapefiles under a directory (one level recursive)\n",
|
||||
"def list_all_shapefiles(root: Path, max_results=50):\n",
|
||||
" if not root.exists():\n",
|
||||
" return []\n",
|
||||
" files = list(root.rglob(\"*.shp\"))\n",
|
||||
" return files[:max_results]\n",
|
||||
"\n",
|
||||
"# Discover a shapefile by directory or accept a direct file path. If missing, print suggestions instead of hard failing.\n",
|
||||
"def find_first_shp(path_candidate: Path, hint: str = \"layer\") -> Path | None:\n",
|
||||
" if path_candidate and path_candidate.is_file() and path_candidate.suffix.lower() == \".shp\":\n",
|
||||
" return path_candidate\n",
|
||||
" candidates = []\n",
|
||||
" if path_candidate and path_candidate.exists() and path_candidate.is_dir():\n",
|
||||
" candidates.extend(path_candidate.rglob(\"*.shp\"))\n",
|
||||
" if base_dir.exists():\n",
|
||||
" candidates.extend([p for p in base_dir.rglob(f\"*{hint}*.shp\")])\n",
|
||||
" seen = set(); deduped = []\n",
|
||||
" for c in candidates:\n",
|
||||
" if c not in seen:\n",
|
||||
" deduped.append(c); seen.add(c)\n",
|
||||
" if deduped:\n",
|
||||
" return deduped[0]\n",
|
||||
" print(f\"[warn] No shapefile found for hint '{hint}'. Listing available shapefiles under {base_dir}:\")\n",
|
||||
" for p in list_all_shapefiles(base_dir):\n",
|
||||
" print(f\" - {p}\")\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"basin_shp = find_first_shp(DIRECT_BASIN_SHP if DIRECT_BASIN_SHP.exists() else basin_dir, hint=\"basin\")\n",
|
||||
"play_shp = find_first_shp(DIRECT_PLAY_SHP if DIRECT_PLAY_SHP.exists() else play_dir, hint=\"play\")\n",
|
||||
"texmex_shp = find_first_shp(DIRECT_TEXMEX_SHP if DIRECT_TEXMEX_SHP.exists() else texmex_dir, hint=\"texmex\")\n",
|
||||
"\n",
|
||||
"print({\"basin_shp\": str(basin_shp) if basin_shp else None,\n",
|
||||
" \"play_shp\": str(play_shp) if play_shp else None,\n",
|
||||
" \"texmex_shp\": str(texmex_shp) if texmex_shp else None})\n",
|
||||
"\n",
|
||||
"basins_gdf = load_layer(basin_shp, \"basins\") if basin_shp else gpd.GeoDataFrame(columns=[\"feature_name\",\"geometry\"], geometry=\"geometry\", crs=4326)\n",
|
||||
"plays_gdf = load_layer(play_shp, \"shale_plays\") if play_shp else gpd.GeoDataFrame(columns=[\"feature_name\",\"geometry\"], geometry=\"geometry\", crs=4326)\n",
|
||||
"texmex_gdf = load_layer(texmex_shp, \"texmex\") if texmex_shp else gpd.GeoDataFrame(columns=[\"feature_name\",\"geometry\"], geometry=\"geometry\", crs=4326)\n",
|
||||
"\n",
|
||||
"print({\"basins_rows\": len(basins_gdf), \"plays_rows\": len(plays_gdf), \"texmex_rows\": len(texmex_gdf)})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a4d82064",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'table': 'oil_gas_basins', 'rows': 9}\n",
|
||||
"{'table': 'shale_plays', 'rows': 10}\n",
|
||||
"{'table': 'texmex_regions', 'rows': 4759}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Cell 4: Persist shape layers to PostGIS with GIST indexes\n",
|
||||
"layer_specs = [\n",
|
||||
" (basins_gdf, \"oil_gas_basins\", \"basin_name\"),\n",
|
||||
" (plays_gdf, \"shale_plays\", \"play_name\"),\n",
|
||||
" (texmex_gdf, \"texmex_regions\", \"texmex_name\"),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" conn.execute(text(\"CREATE EXTENSION IF NOT EXISTS postgis\"))\n",
|
||||
" for gdf, table, new_name in layer_specs:\n",
|
||||
" df = gdf.rename(columns={\"feature_name\": new_name})\n",
|
||||
" # Write (replace each time); default geometry column name varies by geopandas version (often 'geom')\n",
|
||||
" df.to_postgis(table, conn, if_exists=\"replace\", index=False)\n",
|
||||
" # Normalize geometry column name to 'geometry'\n",
|
||||
" # Try geometry_columns first\n",
|
||||
" geom_col = conn.execute(\n",
|
||||
" text(\"\"\"\n",
|
||||
" SELECT f_geometry_column\n",
|
||||
" FROM geometry_columns\n",
|
||||
" WHERE f_table_schema='public' AND f_table_name=:t\n",
|
||||
" LIMIT 1\n",
|
||||
" \"\"\"), {\"t\": table}\n",
|
||||
" ).scalar()\n",
|
||||
" if geom_col is None:\n",
|
||||
" # Fallback: inspect columns for 'geom' or 'geometry'\n",
|
||||
" cols = [r[0] for r in conn.execute(text(\n",
|
||||
" \"SELECT column_name FROM information_schema.columns WHERE table_schema='public' AND table_name=:t\"\n",
|
||||
" ), {\"t\": table})]\n",
|
||||
" if \"geometry\" in cols:\n",
|
||||
" geom_col = \"geometry\"\n",
|
||||
" elif \"geom\" in cols:\n",
|
||||
" geom_col = \"geom\"\n",
|
||||
" if geom_col and geom_col != \"geometry\":\n",
|
||||
" conn.execute(text(f\"ALTER TABLE {table} RENAME COLUMN {geom_col} TO geometry\"))\n",
|
||||
" # Create GIST index on normalized column\n",
|
||||
" conn.execute(text(f\"CREATE INDEX IF NOT EXISTS idx_{table}_geom ON {table} USING GIST (geometry)\"))\n",
|
||||
" # Basic stats\n",
|
||||
" count = conn.execute(text(f\"SELECT COUNT(*) FROM {table}\")).scalar()\n",
|
||||
" print({\"table\": table, \"rows\": int(count)})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0a3c2df9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'well_geo_features_rows': 1413157}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Cell 5: Materialize well_geo_features by spatially joining wells to basins, plays, and texmex\n",
|
||||
"# Contract:\n",
|
||||
"# - Input: well_table with geom in EPSG:4326\n",
|
||||
"# - Outputs: well_geo_features table with columns:\n",
|
||||
"# id, canonical_api10 (if present), api10_number/api_number (if present), geom,\n",
|
||||
"# basin_name, play_name, texmex_name (nullable)\n",
|
||||
"# - Indexes: btree on canonical_api10 (if exists) and GIST on geom\n",
|
||||
"\n",
|
||||
"# Probe well_table columns to pick identifiers and geometry column\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" cols = conn.execute(text(\"\"\"\n",
|
||||
" SELECT column_name\n",
|
||||
" FROM information_schema.columns\n",
|
||||
" WHERE table_schema='public' AND table_name=:t\n",
|
||||
" \"\"\"), {\"t\": well_table}).fetchall()\n",
|
||||
" cols = {r[0] for r in cols}\n",
|
||||
"\n",
|
||||
"id_cols = [c for c in [\"canonical_api10\", \"api10_number\", \"api_number\"] if c in cols]\n",
|
||||
"geom_col = \"geom\" if \"geom\" in cols else (\"geometry\" if \"geometry\" in cols else None)\n",
|
||||
"if geom_col is None:\n",
|
||||
" raise RuntimeError(f\"No geometry column found in {well_table} (expected geom or geometry)\")\n",
|
||||
"\n",
|
||||
"# Build SQL for left-joins using ST_Intersects; if none, try nearest within 1km\n",
|
||||
"select_core = f\"\"\"\n",
|
||||
"SELECT\n",
|
||||
" w.id,\n",
|
||||
" {', '.join([f'w.{c}' for c in id_cols]) if id_cols else 'NULL::text AS canonical_api10'} ,\n",
|
||||
" w.{geom_col} AS geom,\n",
|
||||
" b.basin_name,\n",
|
||||
" p.play_name,\n",
|
||||
" t.texmex_name\n",
|
||||
"FROM {well_table} w\n",
|
||||
"LEFT JOIN oil_gas_basins b\n",
|
||||
" ON ST_Intersects(w.{geom_col}, b.geometry)\n",
|
||||
"LEFT JOIN shale_plays p\n",
|
||||
" ON ST_Intersects(w.{geom_col}, p.geometry)\n",
|
||||
"LEFT JOIN texmex_regions t\n",
|
||||
" ON ST_Intersects(w.{geom_col}, t.geometry)\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"materialize_sql = f\"\"\"\n",
|
||||
"DROP TABLE IF EXISTS _well_geo_features_stage;\n",
|
||||
"CREATE TABLE _well_geo_features_stage AS\n",
|
||||
"{select_core};\n",
|
||||
"\n",
|
||||
"-- Replace target\n",
|
||||
"DROP TABLE IF EXISTS well_geo_features;\n",
|
||||
"ALTER TABLE _well_geo_features_stage RENAME TO well_geo_features;\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" conn.execute(text(\"CREATE EXTENSION IF NOT EXISTS postgis\"))\n",
|
||||
" conn.execute(text(materialize_sql))\n",
|
||||
" # Indexes\n",
|
||||
" if \"canonical_api10\" in id_cols:\n",
|
||||
" conn.execute(text(\"CREATE INDEX IF NOT EXISTS idx_wgf_api10 ON well_geo_features (canonical_api10)\"))\n",
|
||||
" conn.execute(text(\"CREATE INDEX IF NOT EXISTS idx_wgf_geom ON well_geo_features USING GIST (geom)\"))\n",
|
||||
" conn.execute(text(\"ANALYZE well_geo_features\"))\n",
|
||||
"\n",
|
||||
"# Row count and sample\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" cnt = conn.execute(text(\"SELECT COUNT(*) FROM well_geo_features\")).scalar()\n",
|
||||
"print({\"well_geo_features_rows\": int(cnt)})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "83c322ea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tables present (subset):\n",
|
||||
" - oil_gas_basins\n",
|
||||
" - shale_plays\n",
|
||||
" - texmex_regions\n",
|
||||
" - well_geo_features\n",
|
||||
"Sample well_geo_features:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||||
"columns": [
|
||||
{
|
||||
"name": "index",
|
||||
"rawType": "int64",
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"name": "id",
|
||||
"rawType": "int64",
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"name": "canonical_api10",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "api10_number",
|
||||
"rawType": "object",
|
||||
"type": "unknown"
|
||||
},
|
||||
{
|
||||
"name": "api_number",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "geom",
|
||||
"rawType": "object",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "basin_name",
|
||||
"rawType": "object",
|
||||
"type": "unknown"
|
||||
},
|
||||
{
|
||||
"name": "play_name",
|
||||
"rawType": "object",
|
||||
"type": "unknown"
|
||||
},
|
||||
{
|
||||
"name": "texmex_name",
|
||||
"rawType": "object",
|
||||
"type": "unknown"
|
||||
}
|
||||
],
|
||||
"ref": "3301df45-2297-4eaa-8fd0-58c1152387f4",
|
||||
"rows": [
|
||||
[
|
||||
"0",
|
||||
"892679",
|
||||
"4236101293",
|
||||
null,
|
||||
"36101293",
|
||||
"0101000020E6100000F9E7D515206E57C052658EAEA3123E40",
|
||||
null,
|
||||
null,
|
||||
null
|
||||
],
|
||||
[
|
||||
"1",
|
||||
"892696",
|
||||
"4236130846",
|
||||
null,
|
||||
"36130846",
|
||||
"0101000020E610000031106C9FA26E57C01AA62FF3010D3E40",
|
||||
"8",
|
||||
null,
|
||||
null
|
||||
],
|
||||
[
|
||||
"2",
|
||||
"892697",
|
||||
"4236130889",
|
||||
null,
|
||||
"36130889",
|
||||
"0101000020E61000006EAF7E272C6F57C027656F143A0B3E40",
|
||||
"8",
|
||||
null,
|
||||
null
|
||||
],
|
||||
[
|
||||
"3",
|
||||
"892698",
|
||||
"4236130612",
|
||||
null,
|
||||
"36130612",
|
||||
"0101000020E6100000FE4F591F686F57C0EF5986F5270B3E40",
|
||||
"8",
|
||||
null,
|
||||
null
|
||||
],
|
||||
[
|
||||
"4",
|
||||
"892699",
|
||||
"4236130951",
|
||||
null,
|
||||
"36130951",
|
||||
"0101000020E6100000A9056F842A6F57C0EB553C5CF4093E40",
|
||||
"8",
|
||||
null,
|
||||
null
|
||||
]
|
||||
],
|
||||
"shape": {
|
||||
"columns": 8,
|
||||
"rows": 5
|
||||
}
|
||||
},
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>canonical_api10</th>\n",
|
||||
" <th>api10_number</th>\n",
|
||||
" <th>api_number</th>\n",
|
||||
" <th>geom</th>\n",
|
||||
" <th>basin_name</th>\n",
|
||||
" <th>play_name</th>\n",
|
||||
" <th>texmex_name</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>892679</td>\n",
|
||||
" <td>4236101293</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>36101293</td>\n",
|
||||
" <td>0101000020E6100000F9E7D515206E57C052658EAEA312...</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>892696</td>\n",
|
||||
" <td>4236130846</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>36130846</td>\n",
|
||||
" <td>0101000020E610000031106C9FA26E57C01AA62FF3010D...</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>892697</td>\n",
|
||||
" <td>4236130889</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>36130889</td>\n",
|
||||
" <td>0101000020E61000006EAF7E272C6F57C027656F143A0B...</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>892698</td>\n",
|
||||
" <td>4236130612</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>36130612</td>\n",
|
||||
" <td>0101000020E6100000FE4F591F686F57C0EF5986F5270B...</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>892699</td>\n",
|
||||
" <td>4236130951</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>36130951</td>\n",
|
||||
" <td>0101000020E6100000A9056F842A6F57C0EB553C5CF409...</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" id canonical_api10 api10_number api_number \\\n",
|
||||
"0 892679 4236101293 None 36101293 \n",
|
||||
"1 892696 4236130846 None 36130846 \n",
|
||||
"2 892697 4236130889 None 36130889 \n",
|
||||
"3 892698 4236130612 None 36130612 \n",
|
||||
"4 892699 4236130951 None 36130951 \n",
|
||||
"\n",
|
||||
" geom basin_name play_name \\\n",
|
||||
"0 0101000020E6100000F9E7D515206E57C052658EAEA312... None None \n",
|
||||
"1 0101000020E610000031106C9FA26E57C01AA62FF3010D... 8 None \n",
|
||||
"2 0101000020E61000006EAF7E272C6F57C027656F143A0B... 8 None \n",
|
||||
"3 0101000020E6100000FE4F591F686F57C0EF5986F5270B... 8 None \n",
|
||||
"4 0101000020E6100000A9056F842A6F57C0EB553C5CF409... 8 None \n",
|
||||
"\n",
|
||||
" texmex_name \n",
|
||||
"0 None \n",
|
||||
"1 None \n",
|
||||
"2 None \n",
|
||||
"3 None \n",
|
||||
"4 None "
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Cell 6: Diagnostics & sample output\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" tables = conn.execute(text(\"\"\"\n",
|
||||
" SELECT table_name\n",
|
||||
" FROM information_schema.tables\n",
|
||||
" WHERE table_schema='public' AND table_type='BASE TABLE'\n",
|
||||
" \"\"\")).fetchall()\n",
|
||||
" table_list = sorted([t[0] for t in tables])\n",
|
||||
"print(\"Tables present (subset):\")\n",
|
||||
"for t in table_list:\n",
|
||||
" if t in [\"oil_gas_basins\", \"shale_plays\", \"texmex_regions\", \"well_geo_features\"]:\n",
|
||||
" print(f\" - {t}\")\n",
|
||||
"\n",
|
||||
"with engine.begin() as conn:\n",
|
||||
" sample = pd.read_sql(text(\"SELECT * FROM well_geo_features LIMIT 5\"), conn)\n",
|
||||
"print(\"Sample well_geo_features:\")\n",
|
||||
"sample"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user