2036 lines
67 KiB
Plaintext
2036 lines
67 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b6671d51",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Census Demographics Enrichment for Wells\n",
|
||
"\n",
|
||
"This notebook:\n",
|
||
"1. Connects to Postgres and reads tract GEOIDs from `well_shape_tract`.\n",
|
||
"2. Fetches ACS 2021 5-year demographic variables relevant to environmental justice and performance analysis via the Census API (dataset: `acs/acs5`).\n",
|
||
"3. Computes derived metrics (percent minority, poverty rate, unemployment, educational attainment, linguistic isolation proxy, etc.).\n",
|
||
"4. Writes a new table `census_tract_demographics` and (optionally) demonstrates joining it back to `well_shape_tract`.\n",
|
||
"\n",
|
||
"Prerequisites:\n",
|
||
"- Environment variables for Postgres (PGHOST, PGPORT, PGUSER, PGPASSWORD, PGDATABASE).\n",
|
||
"- Optional: `CENSUS_API_KEY` for higher request volume (without a key, small pulls usually still work but may be throttled).\n",
|
||
"\n",
|
||
"Adjust variables or add/remove as needed before running the fetch cell."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "5d5672c2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Distinct tracts from wells: 3551\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||
"columns": [
|
||
{
|
||
"name": "index",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "geoid",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
}
|
||
],
|
||
"ref": "1697201f-d939-4f51-a9af-8ccdf2da3849",
|
||
"rows": [
|
||
[
|
||
"0",
|
||
"48001950100"
|
||
],
|
||
[
|
||
"1",
|
||
"48001950401"
|
||
],
|
||
[
|
||
"2",
|
||
"48001950402"
|
||
],
|
||
[
|
||
"3",
|
||
"48001950500"
|
||
],
|
||
[
|
||
"4",
|
||
"48001950600"
|
||
]
|
||
],
|
||
"shape": {
|
||
"columns": 1,
|
||
"rows": 5
|
||
}
|
||
},
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>geoid</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>48001950100</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>48001950401</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>48001950402</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>48001950500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>48001950600</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" geoid\n",
|
||
"0 48001950100\n",
|
||
"1 48001950401\n",
|
||
"2 48001950402\n",
|
||
"3 48001950500\n",
|
||
"4 48001950600"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Cell 2: Pull tract list from well_shape_tract\n",
|
||
"from sqlalchemy import text\n",
|
||
"\n",
|
||
"with engine.begin() as conn:\n",
|
||
" tracts = pd.read_sql(text(\"\"\"\n",
|
||
" SELECT DISTINCT census_tract_geoid AS geoid\n",
|
||
" FROM well_shape_tract\n",
|
||
" WHERE census_tract_geoid IS NOT NULL\n",
|
||
" \"\"\"), conn)\n",
|
||
"\n",
|
||
"print(f\"Distinct tracts from wells: {len(tracts)}\")\n",
|
||
"tracts.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "25eff3bb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Total vars: 54; chunk count: 2\n",
|
||
"Raw merged ACS rows: 3551\n",
|
||
"Raw merged ACS rows: 3551\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||
"columns": [
|
||
{
|
||
"name": "index",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "geoid",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "name",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "total_population",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "white_alone",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "black_alone",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "american_indian_alone",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "asian_alone",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "native_hawaiian_pacific_alone",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "other_race_alone",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "two_or_more_races",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "hispanic_any_race",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "hispanic_base",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "poverty_universe",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "below_poverty",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "education_universe",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_002",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_003",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_004",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_005",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_006",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_007",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_008",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_009",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_010",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_011",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_012",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_013",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_014",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_015",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b15003_016",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "civilian_labor_force",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "civilian_unemployed",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "language_universe",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "linguistic_isolation_est",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "median_household_income",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "renters_universe",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b25070_008",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b25070_009",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b25070_010",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "b25070_011",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "disability_universe",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "with_disability",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_under_5",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_under_5",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_65_66",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_67_69",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_70_74",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_75_79",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_80_84",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "male_85_plus",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_65_66",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_67_69",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_70_74",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_75_79",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_80_84",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "female_85_plus",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
}
|
||
],
|
||
"ref": "bde8ae8b-2bba-4fb2-9b8d-0891f465bd5c",
|
||
"rows": [
|
||
[
|
||
"0",
|
||
"48001950100",
|
||
"Census Tract 9501, Anderson County, Texas",
|
||
"5447",
|
||
"4540",
|
||
"212",
|
||
"4",
|
||
"21",
|
||
"0",
|
||
"66",
|
||
"604",
|
||
"644",
|
||
"5447",
|
||
"5404",
|
||
"913",
|
||
"3648",
|
||
"22",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"9",
|
||
"21",
|
||
"0",
|
||
"92",
|
||
"54",
|
||
"76",
|
||
"83",
|
||
"3",
|
||
"2092",
|
||
"157",
|
||
"1823",
|
||
"0",
|
||
"61325",
|
||
"219",
|
||
"0",
|
||
"13",
|
||
"25",
|
||
"42",
|
||
"5409",
|
||
"1433",
|
||
"325",
|
||
"214",
|
||
"57",
|
||
"77",
|
||
"138",
|
||
"119",
|
||
"49",
|
||
"13",
|
||
"36",
|
||
"101",
|
||
"264",
|
||
"114",
|
||
"19",
|
||
"83"
|
||
],
|
||
[
|
||
"1",
|
||
"48001950401",
|
||
"Census Tract 9504.01, Anderson County, Texas",
|
||
"4544",
|
||
"2145",
|
||
"1682",
|
||
"10",
|
||
"8",
|
||
"0",
|
||
"58",
|
||
"641",
|
||
"1435",
|
||
"4544",
|
||
"114",
|
||
"0",
|
||
"3854",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"10",
|
||
"13",
|
||
"0",
|
||
"7",
|
||
"34",
|
||
"92",
|
||
"71",
|
||
"57",
|
||
"264",
|
||
"264",
|
||
"318",
|
||
"70",
|
||
"55",
|
||
"0",
|
||
"27",
|
||
"0",
|
||
"92813",
|
||
"15",
|
||
"6",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"114",
|
||
"49",
|
||
"0",
|
||
"15",
|
||
"28",
|
||
"15",
|
||
"14",
|
||
"8",
|
||
"10",
|
||
"10",
|
||
"0",
|
||
"0",
|
||
"4",
|
||
"0",
|
||
"7",
|
||
"0"
|
||
],
|
||
[
|
||
"2",
|
||
"48001950402",
|
||
"Census Tract 9504.02, Anderson County, Texas",
|
||
"6997",
|
||
"3476",
|
||
"2587",
|
||
"19",
|
||
"0",
|
||
"10",
|
||
"133",
|
||
"772",
|
||
"2045",
|
||
"6997",
|
||
"20",
|
||
"20",
|
||
"6624",
|
||
"35",
|
||
"0",
|
||
"0",
|
||
"20",
|
||
"10",
|
||
"24",
|
||
"31",
|
||
"35",
|
||
"113",
|
||
"88",
|
||
"247",
|
||
"405",
|
||
"386",
|
||
"469",
|
||
"46",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"-666666666",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"20",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"53",
|
||
"96",
|
||
"56",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"0"
|
||
],
|
||
[
|
||
"3",
|
||
"48001950500",
|
||
"Census Tract 9505, Anderson County, Texas",
|
||
"4236",
|
||
"2948",
|
||
"673",
|
||
"33",
|
||
"0",
|
||
"34",
|
||
"194",
|
||
"354",
|
||
"1307",
|
||
"4236",
|
||
"4182",
|
||
"802",
|
||
"2850",
|
||
"64",
|
||
"0",
|
||
"0",
|
||
"20",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"10",
|
||
"67",
|
||
"0",
|
||
"17",
|
||
"28",
|
||
"42",
|
||
"43",
|
||
"32",
|
||
"1682",
|
||
"48",
|
||
"1379",
|
||
"0",
|
||
"41713",
|
||
"477",
|
||
"49",
|
||
"62",
|
||
"77",
|
||
"28",
|
||
"4190",
|
||
"1217",
|
||
"360",
|
||
"137",
|
||
"23",
|
||
"14",
|
||
"115",
|
||
"56",
|
||
"55",
|
||
"33",
|
||
"15",
|
||
"61",
|
||
"189",
|
||
"106",
|
||
"81",
|
||
"43"
|
||
],
|
||
[
|
||
"4",
|
||
"48001950600",
|
||
"Census Tract 9506, Anderson County, Texas",
|
||
"5843",
|
||
"3408",
|
||
"1901",
|
||
"0",
|
||
"0",
|
||
"0",
|
||
"126",
|
||
"408",
|
||
"1018",
|
||
"5843",
|
||
"5766",
|
||
"1913",
|
||
"3770",
|
||
"174",
|
||
"0",
|
||
"0",
|
||
"20",
|
||
"0",
|
||
"41",
|
||
"0",
|
||
"0",
|
||
"9",
|
||
"0",
|
||
"0",
|
||
"93",
|
||
"61",
|
||
"218",
|
||
"556",
|
||
"2128",
|
||
"71",
|
||
"1919",
|
||
"0",
|
||
"32552",
|
||
"833",
|
||
"60",
|
||
"0",
|
||
"199",
|
||
"287",
|
||
"5779",
|
||
"1402",
|
||
"60",
|
||
"199",
|
||
"16",
|
||
"28",
|
||
"33",
|
||
"58",
|
||
"12",
|
||
"49",
|
||
"142",
|
||
"117",
|
||
"170",
|
||
"77",
|
||
"45",
|
||
"96"
|
||
]
|
||
],
|
||
"shape": {
|
||
"columns": 56,
|
||
"rows": 5
|
||
}
|
||
},
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>geoid</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>total_population</th>\n",
|
||
" <th>white_alone</th>\n",
|
||
" <th>black_alone</th>\n",
|
||
" <th>american_indian_alone</th>\n",
|
||
" <th>asian_alone</th>\n",
|
||
" <th>native_hawaiian_pacific_alone</th>\n",
|
||
" <th>other_race_alone</th>\n",
|
||
" <th>two_or_more_races</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>male_70_74</th>\n",
|
||
" <th>male_75_79</th>\n",
|
||
" <th>male_80_84</th>\n",
|
||
" <th>male_85_plus</th>\n",
|
||
" <th>female_65_66</th>\n",
|
||
" <th>female_67_69</th>\n",
|
||
" <th>female_70_74</th>\n",
|
||
" <th>female_75_79</th>\n",
|
||
" <th>female_80_84</th>\n",
|
||
" <th>female_85_plus</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>48001950100</td>\n",
|
||
" <td>Census Tract 9501, Anderson County, Texas</td>\n",
|
||
" <td>5447</td>\n",
|
||
" <td>4540</td>\n",
|
||
" <td>212</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>604</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>138</td>\n",
|
||
" <td>119</td>\n",
|
||
" <td>49</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>264</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>19</td>\n",
|
||
" <td>83</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>48001950401</td>\n",
|
||
" <td>Census Tract 9504.01, Anderson County, Texas</td>\n",
|
||
" <td>4544</td>\n",
|
||
" <td>2145</td>\n",
|
||
" <td>1682</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>58</td>\n",
|
||
" <td>641</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>48001950402</td>\n",
|
||
" <td>Census Tract 9504.02, Anderson County, Texas</td>\n",
|
||
" <td>6997</td>\n",
|
||
" <td>3476</td>\n",
|
||
" <td>2587</td>\n",
|
||
" <td>19</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>133</td>\n",
|
||
" <td>772</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>56</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>48001950500</td>\n",
|
||
" <td>Census Tract 9505, Anderson County, Texas</td>\n",
|
||
" <td>4236</td>\n",
|
||
" <td>2948</td>\n",
|
||
" <td>673</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>34</td>\n",
|
||
" <td>194</td>\n",
|
||
" <td>354</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>115</td>\n",
|
||
" <td>56</td>\n",
|
||
" <td>55</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>61</td>\n",
|
||
" <td>189</td>\n",
|
||
" <td>106</td>\n",
|
||
" <td>81</td>\n",
|
||
" <td>43</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>48001950600</td>\n",
|
||
" <td>Census Tract 9506, Anderson County, Texas</td>\n",
|
||
" <td>5843</td>\n",
|
||
" <td>3408</td>\n",
|
||
" <td>1901</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>126</td>\n",
|
||
" <td>408</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>58</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>49</td>\n",
|
||
" <td>142</td>\n",
|
||
" <td>117</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>77</td>\n",
|
||
" <td>45</td>\n",
|
||
" <td>96</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 56 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" geoid name \\\n",
|
||
"0 48001950100 Census Tract 9501, Anderson County, Texas \n",
|
||
"1 48001950401 Census Tract 9504.01, Anderson County, Texas \n",
|
||
"2 48001950402 Census Tract 9504.02, Anderson County, Texas \n",
|
||
"3 48001950500 Census Tract 9505, Anderson County, Texas \n",
|
||
"4 48001950600 Census Tract 9506, Anderson County, Texas \n",
|
||
"\n",
|
||
" total_population white_alone black_alone american_indian_alone \\\n",
|
||
"0 5447 4540 212 4 \n",
|
||
"1 4544 2145 1682 10 \n",
|
||
"2 6997 3476 2587 19 \n",
|
||
"3 4236 2948 673 33 \n",
|
||
"4 5843 3408 1901 0 \n",
|
||
"\n",
|
||
" asian_alone native_hawaiian_pacific_alone other_race_alone \\\n",
|
||
"0 21 0 66 \n",
|
||
"1 8 0 58 \n",
|
||
"2 0 10 133 \n",
|
||
"3 0 34 194 \n",
|
||
"4 0 0 126 \n",
|
||
"\n",
|
||
" two_or_more_races ... male_70_74 male_75_79 male_80_84 male_85_plus \\\n",
|
||
"0 604 ... 138 119 49 13 \n",
|
||
"1 641 ... 14 8 10 10 \n",
|
||
"2 772 ... 56 0 0 0 \n",
|
||
"3 354 ... 115 56 55 33 \n",
|
||
"4 408 ... 33 58 12 49 \n",
|
||
"\n",
|
||
" female_65_66 female_67_69 female_70_74 female_75_79 female_80_84 \\\n",
|
||
"0 36 101 264 114 19 \n",
|
||
"1 0 0 4 0 7 \n",
|
||
"2 0 0 0 0 0 \n",
|
||
"3 15 61 189 106 81 \n",
|
||
"4 142 117 170 77 45 \n",
|
||
"\n",
|
||
" female_85_plus \n",
|
||
"0 83 \n",
|
||
"1 0 \n",
|
||
"2 0 \n",
|
||
"3 43 \n",
|
||
"4 96 \n",
|
||
"\n",
|
||
"[5 rows x 56 columns]"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Cell 4: Fetch ACS data for all Texas tracts intersecting our wells (chunked to avoid 400 errors)\n",
|
||
"import math\n",
|
||
"\n",
|
||
"# Build list of tract components (state=48 always for Texas GEOIDs)\n",
|
||
"tracts['state'] = '48'\n",
|
||
"tracts['county'] = tracts['geoid'].str.slice(2, 5)\n",
|
||
"tracts['tract'] = tracts['geoid'].str.slice(5)\n",
|
||
"\n",
|
||
"BASE_URL = \"https://api.census.gov/data/2021/acs/acs5\"\n",
|
||
"all_vars = list(acs_vars.keys()) # variable codes\n",
|
||
"NAME_VAR = 'NAME'\n",
|
||
"\n",
|
||
"# Census API often fails if too many vars or malformed ones; chunk variables (e.g., 40 per request)\n",
|
||
"CHUNK_SIZE = 40\n",
|
||
"var_chunks = [all_vars[i:i+CHUNK_SIZE] for i in range(0, len(all_vars), CHUNK_SIZE)]\n",
|
||
"print(f\"Total vars: {len(all_vars)}; chunk count: {len(var_chunks)}\")\n",
|
||
"\n",
|
||
"import requests\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"def fetch_for_county(county, tract_subset):\n",
|
||
" frames = []\n",
|
||
" for chunk in var_chunks:\n",
|
||
" params = {\n",
|
||
" 'get': ','.join(chunk + [NAME_VAR]),\n",
|
||
" 'for': 'tract:*',\n",
|
||
" 'in': f'state:48 county:{county}'\n",
|
||
" }\n",
|
||
" if API_KEY:\n",
|
||
" params['key'] = API_KEY\n",
|
||
" resp = requests.get(BASE_URL, params=params, timeout=30)\n",
|
||
" if resp.status_code != 200:\n",
|
||
" print(f\"[warn] status {resp.status_code} for county {county} chunk starting {chunk[0]}\")\n",
|
||
" try:\n",
|
||
" print(resp.text[:500])\n",
|
||
" except Exception:\n",
|
||
" pass\n",
|
||
" continue\n",
|
||
" data = resp.json()\n",
|
||
" header, *records = data\n",
|
||
" df = pd.DataFrame(records, columns=header)\n",
|
||
" keep = set(tract_subset['tract'])\n",
|
||
" df = df[df['tract'].isin(keep)].copy()\n",
|
||
" df['geoid'] = '48' + df['county'] + df['tract']\n",
|
||
" frames.append(df)\n",
|
||
" if not frames:\n",
|
||
" return pd.DataFrame()\n",
|
||
" # Merge on geoid only to accumulate variables; keep NAME/state/county/tract from the first frame\n",
|
||
" base = frames[0]\n",
|
||
" for f in frames[1:]:\n",
|
||
" # Drop duplicate columns present in base, but never drop the join key\n",
|
||
" join_keys = {'geoid'}\n",
|
||
" dup_cols = set(c for c in f.columns if c in base.columns and c not in join_keys)\n",
|
||
" if dup_cols:\n",
|
||
" f = f.drop(columns=list(dup_cols))\n",
|
||
" base = base.merge(f, on='geoid', how='left')\n",
|
||
" return base\n",
|
||
"\n",
|
||
"county_frames = []\n",
|
||
"for county, part in tracts.groupby('county'):\n",
|
||
" cf = fetch_for_county(county, part)\n",
|
||
" if not cf.empty:\n",
|
||
" county_frames.append(cf)\n",
|
||
"\n",
|
||
"acs_raw = pd.concat(county_frames, ignore_index=True) if county_frames else pd.DataFrame()\n",
|
||
"print(f\"Raw merged ACS rows: {len(acs_raw)}\")\n",
|
||
"\n",
|
||
"# Rename variables to friendly names and coerce numerics\n",
|
||
"rename_map = {k: v for k, v in acs_vars.items()}\n",
|
||
"acs_df = acs_raw.rename(columns=rename_map)\n",
|
||
"\n",
|
||
"num_cols = list(rename_map.values())\n",
|
||
"for c in num_cols:\n",
|
||
" if c in acs_df.columns:\n",
|
||
" acs_df[c] = pd.to_numeric(acs_df[c], errors='coerce')\n",
|
||
"\n",
|
||
"# Guarantee columns exist (fill missing with NA)\n",
|
||
"for c in num_cols:\n",
|
||
" if c not in acs_df.columns:\n",
|
||
" acs_df[c] = None\n",
|
||
"\n",
|
||
"# Standardize the tract name column to lowercase 'name' to avoid quoted identifier issues\n",
|
||
"if NAME_VAR in acs_df.columns:\n",
|
||
" acs_df = acs_df.rename(columns={NAME_VAR: 'name'})\n",
|
||
"\n",
|
||
"acs_df = acs_df[['geoid', 'name'] + num_cols]\n",
|
||
"acs_df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "3b8739b1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" geoid pct_minority pct_hispanic poverty_rate unemployment_rate \\\n",
|
||
"0 48001950100 0.166514 0.118230 0.168949 0.075048 \n",
|
||
"1 48001950401 0.527949 0.315801 0.000000 0.000000 \n",
|
||
"2 48001950402 0.503216 0.292268 1.000000 NaN \n",
|
||
"3 48001950500 0.304060 0.308546 0.191774 0.028537 \n",
|
||
"4 48001950600 0.416738 0.174226 0.331772 0.033365 \n",
|
||
"\n",
|
||
" less_than_hs_pct linguistic_isolation_rate renter_cost_burden_rate \\\n",
|
||
"0 0.098684 0.0 0.365297 \n",
|
||
"1 0.311365 0.0 0.400000 \n",
|
||
"2 0.288194 NaN NaN \n",
|
||
"3 0.113333 0.0 0.452830 \n",
|
||
"4 0.310875 0.0 0.655462 \n",
|
||
"\n",
|
||
" disability_rate pct_under5 pct_65plus ej_composite_score \\\n",
|
||
"0 0.264929 0.098954 0.196438 0.474332 \n",
|
||
"1 0.429825 0.003301 0.021127 0.501327 \n",
|
||
"2 0.000000 0.000000 0.029298 0.668572 \n",
|
||
"3 0.290453 0.117328 0.186733 0.511775 \n",
|
||
"4 0.242603 0.044327 0.144275 0.646844 \n",
|
||
"\n",
|
||
" median_household_income \n",
|
||
"0 61325 \n",
|
||
"1 92813 \n",
|
||
"2 -666666666 \n",
|
||
"3 41713 \n",
|
||
"4 32552 \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Cell 5: Derived EJ metrics\n",
|
||
"# Minority and Hispanic\n",
|
||
"acs_df['minority_population'] = (acs_df['total_population'] - acs_df['white_alone'].fillna(0))\n",
|
||
"acs_df['pct_minority'] = (acs_df['minority_population'] / acs_df['total_population']).where(acs_df['total_population']>0)\n",
|
||
"acs_df['pct_hispanic'] = (acs_df['hispanic_any_race'] / acs_df['hispanic_base']).where(acs_df['hispanic_base']>0)\n",
|
||
"\n",
|
||
"# Poverty\n",
|
||
"acs_df['poverty_rate'] = (acs_df['below_poverty'] / acs_df['poverty_universe']).where(acs_df['poverty_universe']>0)\n",
|
||
"\n",
|
||
"# Unemployment: unemployed / civilian labor force\n",
|
||
"acs_df['unemployment_rate'] = (acs_df['civilian_unemployed'] / acs_df['civilian_labor_force']).where(acs_df['civilian_labor_force']>0)\n",
|
||
"\n",
|
||
"# Linguistic isolation proxy\n",
|
||
"acs_df['linguistic_isolation_rate'] = (acs_df['linguistic_isolation_est'] / acs_df['language_universe']).where(acs_df['language_universe']>0)\n",
|
||
"\n",
|
||
"# Educational attainment (< HS): sum B15003_002.._016 over education_universe\n",
|
||
"b15003_bins = [f'b15003_{i:03d}' for i in range(2, 17)]\n",
|
||
"acs_df['less_than_hs'] = acs_df[b15003_bins].sum(axis=1, skipna=True)\n",
|
||
"acs_df['less_than_hs_pct'] = (acs_df['less_than_hs'] / acs_df['education_universe']).where(acs_df['education_universe']>0)\n",
|
||
"\n",
|
||
"# Renter cost burden >30% = sum bins 8-11 over renters_universe\n",
|
||
"rent_burden_bins = ['b25070_008','b25070_009','b25070_010','b25070_011']\n",
|
||
"acs_df['renter_cost_burden_over_30'] = acs_df[rent_burden_bins].sum(axis=1, skipna=True)\n",
|
||
"acs_df['renter_cost_burden_rate'] = (acs_df['renter_cost_burden_over_30'] / acs_df['renters_universe']).where(acs_df['renters_universe']>0)\n",
|
||
"\n",
|
||
"# Disability rate\n",
|
||
"acs_df['disability_rate'] = (acs_df['with_disability'] / acs_df['disability_universe']).where(acs_df['disability_universe']>0)\n",
|
||
"\n",
|
||
"# Age structure: pct under 5, pct 65+\n",
|
||
"acs_df['under5_total'] = acs_df['male_under_5'].fillna(0) + acs_df['female_under_5'].fillna(0)\n",
|
||
"acs_df['pct_under5'] = (acs_df['under5_total'] / acs_df['total_population']).where(acs_df['total_population']>0)\n",
|
||
"\n",
|
||
"elder_bins = ['male_65_66','male_67_69','male_70_74','male_75_79','male_80_84','male_85_plus',\n",
|
||
" 'female_65_66','female_67_69','female_70_74','female_75_79','female_80_84','female_85_plus']\n",
|
||
"acs_df['elder_total'] = acs_df[elder_bins].sum(axis=1, skipna=True)\n",
|
||
"acs_df['pct_65plus'] = (acs_df['elder_total'] / acs_df['total_population']).where(acs_df['total_population']>0)\n",
|
||
"\n",
|
||
"# Composite EJ score: percentile rank across selected metrics and average\n",
|
||
"# Higher is worse (more vulnerable): poverty_rate, pct_minority, renter_cost_burden_rate, linguistic_isolation_rate,\n",
|
||
"# less_than_hs_pct, unemployment_rate, disability_rate\n",
|
||
"comp_cols = ['poverty_rate','pct_minority','renter_cost_burden_rate','linguistic_isolation_rate',\n",
|
||
" 'less_than_hs_pct','unemployment_rate','disability_rate']\n",
|
||
"\n",
|
||
"# Compute percentiles per column (0..1)\n",
|
||
"for c in comp_cols:\n",
|
||
" # rank method=average, pct=True gives 0..1 percentile; handle NA by leaving NA then fill with 0\n",
|
||
" acs_df[c + '_pctile'] = acs_df[c].rank(pct=True)\n",
|
||
" acs_df.loc[acs_df[c].isna(), c + '_pctile'] = None\n",
|
||
"\n",
|
||
"acs_df['ej_composite_score'] = acs_df[[c + '_pctile' for c in comp_cols]].mean(axis=1, skipna=True)\n",
|
||
"\n",
|
||
"# Keep a tidy preview\n",
|
||
"print(acs_df[['geoid','pct_minority','pct_hispanic','poverty_rate','unemployment_rate','less_than_hs_pct',\n",
|
||
" 'linguistic_isolation_rate','renter_cost_burden_rate','disability_rate','pct_under5','pct_65plus',\n",
|
||
" 'ej_composite_score','median_household_income']].head())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "92ac800b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Updated census_tract_demographics (schema reconciled, data refreshed, indexes ensured).\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
|
||
"columns": [
|
||
{
|
||
"name": "index",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "canonical_api10",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "census_tract_geoid",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "ruca_category",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "is_rural",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "ej_composite_score",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "pct_minority",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "poverty_rate",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "unemployment_rate",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "less_than_hs_pct",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "linguistic_isolation_rate",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "renter_cost_burden_rate",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "disability_rate",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "pct_under5",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "pct_65plus",
|
||
"rawType": "float64",
|
||
"type": "float"
|
||
},
|
||
{
|
||
"name": "median_household_income",
|
||
"rawType": "int64",
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"name": "ruca_primary_description",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "ruca_secondary_description",
|
||
"rawType": "object",
|
||
"type": "string"
|
||
}
|
||
],
|
||
"ref": "b720feb6-5b69-4250-bf7f-5b130bf14b85",
|
||
"rows": [
|
||
[
|
||
"0",
|
||
"4236101293",
|
||
"48361020300",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.762412654987491",
|
||
"0.34279038718291055",
|
||
"0.3413793103448276",
|
||
"0.08861859252823631",
|
||
"0.18162839248434237",
|
||
"0.021718602455146365",
|
||
"0.47280334728033474",
|
||
"0.32164634146341464",
|
||
"0.0931241655540721",
|
||
"0.15453938584779706",
|
||
"48218",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
],
|
||
[
|
||
"1",
|
||
"4236130846",
|
||
"48361020300",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.762412654987491",
|
||
"0.34279038718291055",
|
||
"0.3413793103448276",
|
||
"0.08861859252823631",
|
||
"0.18162839248434237",
|
||
"0.021718602455146365",
|
||
"0.47280334728033474",
|
||
"0.32164634146341464",
|
||
"0.0931241655540721",
|
||
"0.15453938584779706",
|
||
"48218",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
],
|
||
[
|
||
"2",
|
||
"4236130889",
|
||
"48361020300",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.762412654987491",
|
||
"0.34279038718291055",
|
||
"0.3413793103448276",
|
||
"0.08861859252823631",
|
||
"0.18162839248434237",
|
||
"0.021718602455146365",
|
||
"0.47280334728033474",
|
||
"0.32164634146341464",
|
||
"0.0931241655540721",
|
||
"0.15453938584779706",
|
||
"48218",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
],
|
||
[
|
||
"3",
|
||
"4236130612",
|
||
"48361020300",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.762412654987491",
|
||
"0.34279038718291055",
|
||
"0.3413793103448276",
|
||
"0.08861859252823631",
|
||
"0.18162839248434237",
|
||
"0.021718602455146365",
|
||
"0.47280334728033474",
|
||
"0.32164634146341464",
|
||
"0.0931241655540721",
|
||
"0.15453938584779706",
|
||
"48218",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
],
|
||
[
|
||
"4",
|
||
"4236130951",
|
||
"48361020300",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.762412654987491",
|
||
"0.34279038718291055",
|
||
"0.3413793103448276",
|
||
"0.08861859252823631",
|
||
"0.18162839248434237",
|
||
"0.021718602455146365",
|
||
"0.47280334728033474",
|
||
"0.32164634146341464",
|
||
"0.0931241655540721",
|
||
"0.15453938584779706",
|
||
"48218",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
],
|
||
[
|
||
"5",
|
||
"4236130691",
|
||
"48361021200",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.4149936725524403",
|
||
"0.07561597281223449",
|
||
"0.11989741397734559",
|
||
"0.04049586776859504",
|
||
"0.07249129471351694",
|
||
"0.0",
|
||
"0.7454545454545455",
|
||
"0.225785896346644",
|
||
"0.04927782497875956",
|
||
"0.17587085811384875",
|
||
"75030",
|
||
"Micropolitan high commuting",
|
||
"Micropolitan high commuting, no additional code"
|
||
],
|
||
[
|
||
"6",
|
||
"4236130768",
|
||
"48361021200",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.4149936725524403",
|
||
"0.07561597281223449",
|
||
"0.11989741397734559",
|
||
"0.04049586776859504",
|
||
"0.07249129471351694",
|
||
"0.0",
|
||
"0.7454545454545455",
|
||
"0.225785896346644",
|
||
"0.04927782497875956",
|
||
"0.17587085811384875",
|
||
"75030",
|
||
"Micropolitan high commuting",
|
||
"Micropolitan high commuting, no additional code"
|
||
],
|
||
[
|
||
"7",
|
||
"4236100686",
|
||
"48361022200",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.3776728029920977",
|
||
"0.05415499533146592",
|
||
"0.07862004224360479",
|
||
"0.04386374241717219",
|
||
"0.03543613707165109",
|
||
"0.0",
|
||
"0.458128078817734",
|
||
"0.3131289492160075",
|
||
"0.08029878618113913",
|
||
"0.1092436974789916",
|
||
"110550",
|
||
"Micropolitan low commuting",
|
||
"Micropolitan low commuting, no additional code"
|
||
],
|
||
[
|
||
"8",
|
||
"4236130968",
|
||
"48361022301",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.28235883683560237",
|
||
"0.12156951689725562",
|
||
"0.0381038784304831",
|
||
"0.027311744049941473",
|
||
"0.06414091060152875",
|
||
"0.015587529976019185",
|
||
"0.20195439739413681",
|
||
"0.19505556815604447",
|
||
"0.05057836244046269",
|
||
"0.13608528010886822",
|
||
"94161",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
],
|
||
[
|
||
"9",
|
||
"4236130847",
|
||
"48361021300",
|
||
"Micropolitan",
|
||
"false",
|
||
"0.3443398903976459",
|
||
"0.19737715803452854",
|
||
"0.05079681274900399",
|
||
"0.029069767441860465",
|
||
"0.06048387096774194",
|
||
"0.0",
|
||
"0.31693989071038253",
|
||
"0.2974904437427289",
|
||
"0.08598937583001329",
|
||
"0.13645418326693226",
|
||
"86287",
|
||
"Micropolitan core",
|
||
"Micropolitan core, no additional code"
|
||
]
|
||
],
|
||
"shape": {
|
||
"columns": 17,
|
||
"rows": 10
|
||
}
|
||
},
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>canonical_api10</th>\n",
|
||
" <th>census_tract_geoid</th>\n",
|
||
" <th>ruca_category</th>\n",
|
||
" <th>is_rural</th>\n",
|
||
" <th>ej_composite_score</th>\n",
|
||
" <th>pct_minority</th>\n",
|
||
" <th>poverty_rate</th>\n",
|
||
" <th>unemployment_rate</th>\n",
|
||
" <th>less_than_hs_pct</th>\n",
|
||
" <th>linguistic_isolation_rate</th>\n",
|
||
" <th>renter_cost_burden_rate</th>\n",
|
||
" <th>disability_rate</th>\n",
|
||
" <th>pct_under5</th>\n",
|
||
" <th>pct_65plus</th>\n",
|
||
" <th>median_household_income</th>\n",
|
||
" <th>ruca_primary_description</th>\n",
|
||
" <th>ruca_secondary_description</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>4236101293</td>\n",
|
||
" <td>48361020300</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.762413</td>\n",
|
||
" <td>0.342790</td>\n",
|
||
" <td>0.341379</td>\n",
|
||
" <td>0.088619</td>\n",
|
||
" <td>0.181628</td>\n",
|
||
" <td>0.021719</td>\n",
|
||
" <td>0.472803</td>\n",
|
||
" <td>0.321646</td>\n",
|
||
" <td>0.093124</td>\n",
|
||
" <td>0.154539</td>\n",
|
||
" <td>48218</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>4236130846</td>\n",
|
||
" <td>48361020300</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.762413</td>\n",
|
||
" <td>0.342790</td>\n",
|
||
" <td>0.341379</td>\n",
|
||
" <td>0.088619</td>\n",
|
||
" <td>0.181628</td>\n",
|
||
" <td>0.021719</td>\n",
|
||
" <td>0.472803</td>\n",
|
||
" <td>0.321646</td>\n",
|
||
" <td>0.093124</td>\n",
|
||
" <td>0.154539</td>\n",
|
||
" <td>48218</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>4236130889</td>\n",
|
||
" <td>48361020300</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.762413</td>\n",
|
||
" <td>0.342790</td>\n",
|
||
" <td>0.341379</td>\n",
|
||
" <td>0.088619</td>\n",
|
||
" <td>0.181628</td>\n",
|
||
" <td>0.021719</td>\n",
|
||
" <td>0.472803</td>\n",
|
||
" <td>0.321646</td>\n",
|
||
" <td>0.093124</td>\n",
|
||
" <td>0.154539</td>\n",
|
||
" <td>48218</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4236130612</td>\n",
|
||
" <td>48361020300</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.762413</td>\n",
|
||
" <td>0.342790</td>\n",
|
||
" <td>0.341379</td>\n",
|
||
" <td>0.088619</td>\n",
|
||
" <td>0.181628</td>\n",
|
||
" <td>0.021719</td>\n",
|
||
" <td>0.472803</td>\n",
|
||
" <td>0.321646</td>\n",
|
||
" <td>0.093124</td>\n",
|
||
" <td>0.154539</td>\n",
|
||
" <td>48218</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4236130951</td>\n",
|
||
" <td>48361020300</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.762413</td>\n",
|
||
" <td>0.342790</td>\n",
|
||
" <td>0.341379</td>\n",
|
||
" <td>0.088619</td>\n",
|
||
" <td>0.181628</td>\n",
|
||
" <td>0.021719</td>\n",
|
||
" <td>0.472803</td>\n",
|
||
" <td>0.321646</td>\n",
|
||
" <td>0.093124</td>\n",
|
||
" <td>0.154539</td>\n",
|
||
" <td>48218</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>4236130691</td>\n",
|
||
" <td>48361021200</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.414994</td>\n",
|
||
" <td>0.075616</td>\n",
|
||
" <td>0.119897</td>\n",
|
||
" <td>0.040496</td>\n",
|
||
" <td>0.072491</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.745455</td>\n",
|
||
" <td>0.225786</td>\n",
|
||
" <td>0.049278</td>\n",
|
||
" <td>0.175871</td>\n",
|
||
" <td>75030</td>\n",
|
||
" <td>Micropolitan high commuting</td>\n",
|
||
" <td>Micropolitan high commuting, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>4236130768</td>\n",
|
||
" <td>48361021200</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.414994</td>\n",
|
||
" <td>0.075616</td>\n",
|
||
" <td>0.119897</td>\n",
|
||
" <td>0.040496</td>\n",
|
||
" <td>0.072491</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.745455</td>\n",
|
||
" <td>0.225786</td>\n",
|
||
" <td>0.049278</td>\n",
|
||
" <td>0.175871</td>\n",
|
||
" <td>75030</td>\n",
|
||
" <td>Micropolitan high commuting</td>\n",
|
||
" <td>Micropolitan high commuting, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>4236100686</td>\n",
|
||
" <td>48361022200</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.377673</td>\n",
|
||
" <td>0.054155</td>\n",
|
||
" <td>0.078620</td>\n",
|
||
" <td>0.043864</td>\n",
|
||
" <td>0.035436</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.458128</td>\n",
|
||
" <td>0.313129</td>\n",
|
||
" <td>0.080299</td>\n",
|
||
" <td>0.109244</td>\n",
|
||
" <td>110550</td>\n",
|
||
" <td>Micropolitan low commuting</td>\n",
|
||
" <td>Micropolitan low commuting, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>4236130968</td>\n",
|
||
" <td>48361022301</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.282359</td>\n",
|
||
" <td>0.121570</td>\n",
|
||
" <td>0.038104</td>\n",
|
||
" <td>0.027312</td>\n",
|
||
" <td>0.064141</td>\n",
|
||
" <td>0.015588</td>\n",
|
||
" <td>0.201954</td>\n",
|
||
" <td>0.195056</td>\n",
|
||
" <td>0.050578</td>\n",
|
||
" <td>0.136085</td>\n",
|
||
" <td>94161</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>4236130847</td>\n",
|
||
" <td>48361021300</td>\n",
|
||
" <td>Micropolitan</td>\n",
|
||
" <td>false</td>\n",
|
||
" <td>0.344340</td>\n",
|
||
" <td>0.197377</td>\n",
|
||
" <td>0.050797</td>\n",
|
||
" <td>0.029070</td>\n",
|
||
" <td>0.060484</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.316940</td>\n",
|
||
" <td>0.297490</td>\n",
|
||
" <td>0.085989</td>\n",
|
||
" <td>0.136454</td>\n",
|
||
" <td>86287</td>\n",
|
||
" <td>Micropolitan core</td>\n",
|
||
" <td>Micropolitan core, no additional code</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" canonical_api10 census_tract_geoid ruca_category is_rural \\\n",
|
||
"0 4236101293 48361020300 Micropolitan false \n",
|
||
"1 4236130846 48361020300 Micropolitan false \n",
|
||
"2 4236130889 48361020300 Micropolitan false \n",
|
||
"3 4236130612 48361020300 Micropolitan false \n",
|
||
"4 4236130951 48361020300 Micropolitan false \n",
|
||
"5 4236130691 48361021200 Micropolitan false \n",
|
||
"6 4236130768 48361021200 Micropolitan false \n",
|
||
"7 4236100686 48361022200 Micropolitan false \n",
|
||
"8 4236130968 48361022301 Micropolitan false \n",
|
||
"9 4236130847 48361021300 Micropolitan false \n",
|
||
"\n",
|
||
" ej_composite_score pct_minority poverty_rate unemployment_rate \\\n",
|
||
"0 0.762413 0.342790 0.341379 0.088619 \n",
|
||
"1 0.762413 0.342790 0.341379 0.088619 \n",
|
||
"2 0.762413 0.342790 0.341379 0.088619 \n",
|
||
"3 0.762413 0.342790 0.341379 0.088619 \n",
|
||
"4 0.762413 0.342790 0.341379 0.088619 \n",
|
||
"5 0.414994 0.075616 0.119897 0.040496 \n",
|
||
"6 0.414994 0.075616 0.119897 0.040496 \n",
|
||
"7 0.377673 0.054155 0.078620 0.043864 \n",
|
||
"8 0.282359 0.121570 0.038104 0.027312 \n",
|
||
"9 0.344340 0.197377 0.050797 0.029070 \n",
|
||
"\n",
|
||
" less_than_hs_pct linguistic_isolation_rate renter_cost_burden_rate \\\n",
|
||
"0 0.181628 0.021719 0.472803 \n",
|
||
"1 0.181628 0.021719 0.472803 \n",
|
||
"2 0.181628 0.021719 0.472803 \n",
|
||
"3 0.181628 0.021719 0.472803 \n",
|
||
"4 0.181628 0.021719 0.472803 \n",
|
||
"5 0.072491 0.000000 0.745455 \n",
|
||
"6 0.072491 0.000000 0.745455 \n",
|
||
"7 0.035436 0.000000 0.458128 \n",
|
||
"8 0.064141 0.015588 0.201954 \n",
|
||
"9 0.060484 0.000000 0.316940 \n",
|
||
"\n",
|
||
" disability_rate pct_under5 pct_65plus median_household_income \\\n",
|
||
"0 0.321646 0.093124 0.154539 48218 \n",
|
||
"1 0.321646 0.093124 0.154539 48218 \n",
|
||
"2 0.321646 0.093124 0.154539 48218 \n",
|
||
"3 0.321646 0.093124 0.154539 48218 \n",
|
||
"4 0.321646 0.093124 0.154539 48218 \n",
|
||
"5 0.225786 0.049278 0.175871 75030 \n",
|
||
"6 0.225786 0.049278 0.175871 75030 \n",
|
||
"7 0.313129 0.080299 0.109244 110550 \n",
|
||
"8 0.195056 0.050578 0.136085 94161 \n",
|
||
"9 0.297490 0.085989 0.136454 86287 \n",
|
||
"\n",
|
||
" ruca_primary_description \\\n",
|
||
"0 Micropolitan core \n",
|
||
"1 Micropolitan core \n",
|
||
"2 Micropolitan core \n",
|
||
"3 Micropolitan core \n",
|
||
"4 Micropolitan core \n",
|
||
"5 Micropolitan high commuting \n",
|
||
"6 Micropolitan high commuting \n",
|
||
"7 Micropolitan low commuting \n",
|
||
"8 Micropolitan core \n",
|
||
"9 Micropolitan core \n",
|
||
"\n",
|
||
" ruca_secondary_description \n",
|
||
"0 Micropolitan core, no additional code \n",
|
||
"1 Micropolitan core, no additional code \n",
|
||
"2 Micropolitan core, no additional code \n",
|
||
"3 Micropolitan core, no additional code \n",
|
||
"4 Micropolitan core, no additional code \n",
|
||
"5 Micropolitan high commuting, no additional code \n",
|
||
"6 Micropolitan high commuting, no additional code \n",
|
||
"7 Micropolitan low commuting, no additional code \n",
|
||
"8 Micropolitan core, no additional code \n",
|
||
"9 Micropolitan core, no additional code "
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Cell 6: Persist tract demographics to Postgres and join preview (non-destructive schema-safe update)\n",
|
||
"from sqlalchemy import text\n",
|
||
"\n",
|
||
"persist_cols = [\n",
|
||
" 'geoid','name','total_population','minority_population','pct_minority','pct_hispanic','poverty_rate',\n",
|
||
" 'unemployment_rate','less_than_hs_pct','linguistic_isolation_rate','renter_cost_burden_rate','disability_rate',\n",
|
||
" 'pct_under5','pct_65plus','ej_composite_score','median_household_income',\n",
|
||
" 'ruca_code_2020','ruca_primary','ruca_primary_description','ruca_secondary','ruca_secondary_description','ruca_category','is_nonmetro','is_rural'\n",
|
||
"]\n",
|
||
"\n",
|
||
"write_df = acs_df[persist_cols].copy()\n",
|
||
"\n",
|
||
"with engine.begin() as conn:\n",
|
||
" # Ensure target table exists; if not, create fresh via to_sql\n",
|
||
" existing = conn.execute(text(\"\"\"\n",
|
||
" SELECT to_regclass('census_tract_demographics') IS NOT NULL AS exists\n",
|
||
" \"\"\")) .scalar()\n",
|
||
" if not existing:\n",
|
||
" write_df.to_sql('census_tract_demographics', con=conn, if_exists='replace', index=False, method='multi')\n",
|
||
" else:\n",
|
||
" # Add any missing columns before load, and normalize legacy NAME -> name\n",
|
||
" existing_cols = [r[0] for r in conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='census_tract_demographics'\"))]\n",
|
||
" if 'NAME' in existing_cols and 'name' not in existing_cols:\n",
|
||
" # If a quoted \"NAME\" exists, rename to lowercase name to avoid identifier issues\n",
|
||
" conn.execute(text('ALTER TABLE census_tract_demographics RENAME COLUMN \"NAME\" TO name'))\n",
|
||
" existing_cols = [r[0] for r in conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='census_tract_demographics'\"))]\n",
|
||
" for col in persist_cols:\n",
|
||
" if col not in existing_cols:\n",
|
||
" # Infer a reasonable SQL type from pandas dtype\n",
|
||
" series = write_df[col]\n",
|
||
" if pd.api.types.is_integer_dtype(series.dropna()):\n",
|
||
" sql_type = 'BIGINT'\n",
|
||
" elif pd.api.types.is_float_dtype(series.dropna()):\n",
|
||
" sql_type = 'DOUBLE PRECISION'\n",
|
||
" elif pd.api.types.is_bool_dtype(series.dropna()):\n",
|
||
" sql_type = 'BOOLEAN'\n",
|
||
" else:\n",
|
||
" sql_type = 'TEXT'\n",
|
||
" conn.execute(text(f\"ALTER TABLE census_tract_demographics ADD COLUMN IF NOT EXISTS {col} {sql_type}\"))\n",
|
||
" # Stage data in a temp table\n",
|
||
" write_df.to_sql('_census_tract_demographics_stage', con=conn, if_exists='replace', index=False, method='multi')\n",
|
||
" # Upsert strategy: delete all then insert (tract-level snapshot)\n",
|
||
" conn.execute(text(\"DELETE FROM census_tract_demographics\"))\n",
|
||
" insert_cols = ','.join(persist_cols)\n",
|
||
" conn.execute(text(f\"INSERT INTO census_tract_demographics ({insert_cols}) SELECT {insert_cols} FROM _census_tract_demographics_stage\"))\n",
|
||
" conn.execute(text(\"DROP TABLE IF EXISTS _census_tract_demographics_stage\"))\n",
|
||
" # Indexes (create if absent)\n",
|
||
" conn.execute(text(\"CREATE INDEX IF NOT EXISTS idx_ctd_geoid ON census_tract_demographics (geoid)\"))\n",
|
||
" conn.execute(text(\"CREATE INDEX IF NOT EXISTS idx_ctd_ej_score ON census_tract_demographics (ej_composite_score)\"))\n",
|
||
" conn.execute(text(\"CREATE INDEX IF NOT EXISTS idx_ctd_poverty_rate ON census_tract_demographics (poverty_rate)\"))\n",
|
||
" conn.execute(text(\"CREATE INDEX IF NOT EXISTS idx_ctd_pct_minority ON census_tract_demographics (pct_minority)\"))\n",
|
||
" conn.execute(text(\"ANALYZE census_tract_demographics\"))\n",
|
||
"\n",
|
||
"print(\"Updated census_tract_demographics (schema reconciled, data refreshed, indexes ensured).\")\n",
|
||
"\n",
|
||
"# Preview join back to wells\n",
|
||
"with engine.begin() as conn:\n",
|
||
" preview = pd.read_sql(text(\"\"\"\n",
|
||
" SELECT w.canonical_api10, w.census_tract_geoid,\n",
|
||
" d.ruca_category, d.is_rural, d.ej_composite_score, d.pct_minority, d.poverty_rate, d.unemployment_rate,\n",
|
||
" d.less_than_hs_pct, d.linguistic_isolation_rate, d.renter_cost_burden_rate,\n",
|
||
" d.disability_rate, d.pct_under5, d.pct_65plus, d.median_household_income,\n",
|
||
" d.ruca_primary_description, d.ruca_secondary_description\n",
|
||
" FROM well_shape_tract w\n",
|
||
" LEFT JOIN census_tract_demographics d\n",
|
||
" ON w.census_tract_geoid = d.geoid\n",
|
||
" LIMIT 10\n",
|
||
" \"\"\"), conn)\n",
|
||
"\n",
|
||
"preview"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "d38db1df",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'well_shape_tract_columns_count': 11}\n",
|
||
"{'column': 'canonical_api10', 'present': True, 'nonnull_count': 1010430}\n",
|
||
"{'column': 'api10_number', 'present': True, 'nonnull_count': 852539}\n",
|
||
"{'column': 'api_number', 'present': True, 'nonnull_count': 1010430}\n",
|
||
"{'materialized_table': 'well_with_demographics_table', 'rows': 1373579}\n",
|
||
"Sample (first 10) from materialized table:\n",
|
||
" canonical_api10 api10_number api_number census_tract_geoid\n",
|
||
"0 4236101293 None 36101293 48361020300\n",
|
||
"1 4236130846 None 36130846 48361020300\n",
|
||
"2 4236130889 None 36130889 48361020300\n",
|
||
"3 4236130612 None 36130612 48361020300\n",
|
||
"4 4236130951 None 36130951 48361020300\n",
|
||
"5 4236130691 None 36130691 48361021200\n",
|
||
"6 4236130768 None 36130768 48361021200\n",
|
||
"7 4236100686 None 36100686 48361022200\n",
|
||
"8 4236130968 None 36130968 48361022301\n",
|
||
"9 4236130847 None 36130847 48361021300\n",
|
||
"{'materialized_table': 'well_with_demographics_table', 'rows': 1373579}\n",
|
||
"Sample (first 10) from materialized table:\n",
|
||
" canonical_api10 api10_number api_number census_tract_geoid\n",
|
||
"0 4236101293 None 36101293 48361020300\n",
|
||
"1 4236130846 None 36130846 48361020300\n",
|
||
"2 4236130889 None 36130889 48361020300\n",
|
||
"3 4236130612 None 36130612 48361020300\n",
|
||
"4 4236130951 None 36130951 48361020300\n",
|
||
"5 4236130691 None 36130691 48361021200\n",
|
||
"6 4236130768 None 36130768 48361021200\n",
|
||
"7 4236100686 None 36100686 48361022200\n",
|
||
"8 4236130968 None 36130968 48361022301\n",
|
||
"9 4236130847 None 36130847 48361021300\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Cell 8: Materialize wells + demographics into a PostGIS table (defensive: preserve API ids)\n",
|
||
"from sqlalchemy import text\n",
|
||
"\n",
|
||
"target_table = 'well_with_demographics_table'\n",
|
||
"\n",
|
||
"with engine.begin() as conn:\n",
|
||
" # Inspect available identifier columns on well_shape_tract\n",
|
||
" existing_cols = [r[0] for r in conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='well_shape_tract'\"))]\n",
|
||
" print({'well_shape_tract_columns_count': len(existing_cols)})\n",
|
||
" for col in ['canonical_api10','api10_number','api_number']:\n",
|
||
" present = col in existing_cols\n",
|
||
" nonnull = 0\n",
|
||
" if present:\n",
|
||
" nonnull = conn.execute(text(f\"SELECT COUNT(*) FROM well_shape_tract WHERE {col} IS NOT NULL\")).scalar()\n",
|
||
" print({ 'column': col, 'present': present, 'nonnull_count': int(nonnull) if present else None })\n",
|
||
"\n",
|
||
" # Build canonical_api10 expression: prefer canonical_api10, then api10_number, then api_number\n",
|
||
" if 'canonical_api10' in existing_cols:\n",
|
||
" canonical_expr = 'w.canonical_api10::text'\n",
|
||
" elif 'api10_number' in existing_cols:\n",
|
||
" canonical_expr = 'w.api10_number::text'\n",
|
||
" elif 'api_number' in existing_cols:\n",
|
||
" canonical_expr = 'w.api_number::text'\n",
|
||
" else:\n",
|
||
" canonical_expr = \"NULL::text\"\n",
|
||
"\n",
|
||
" # Keep raw api columns too if present\n",
|
||
" raw_api_selects = []\n",
|
||
" if 'api10_number' in existing_cols:\n",
|
||
" raw_api_selects.append('w.api10_number')\n",
|
||
" if 'api_number' in existing_cols:\n",
|
||
" raw_api_selects.append('w.api_number')\n",
|
||
"\n",
|
||
" # Define select list with canonical_api10 first, then raw APIs, then other fields\n",
|
||
" select_list = [f\"{canonical_expr} AS canonical_api10\"] + raw_api_selects + [\n",
|
||
" 'w.census_tract_geoid',\n",
|
||
" 'w.latitude',\n",
|
||
" 'w.longitude',\n",
|
||
" 'w.geom'\n",
|
||
" ]\n",
|
||
"\n",
|
||
" # Add demographic columns to the select (same as before)\n",
|
||
" dem_cols = [\n",
|
||
" 'd.name AS tract_name', 'd.ruca_code_2020', 'd.ruca_category', 'd.ruca_primary_description', 'd.ruca_secondary_description',\n",
|
||
" 'd.ej_composite_score', 'd.pct_minority','d.pct_hispanic','d.poverty_rate','d.unemployment_rate','d.less_than_hs_pct',\n",
|
||
" 'd.linguistic_isolation_rate','d.renter_cost_burden_rate','d.disability_rate','d.pct_under5','d.pct_65plus','d.median_household_income'\n",
|
||
" ]\n",
|
||
" select_list.extend(dem_cols)\n",
|
||
"\n",
|
||
" select_sql = \"SELECT\\n \" + \",\\n \".join(select_list) + f\"\\nFROM well_shape_tract w\\nLEFT JOIN census_tract_demographics d ON w.census_tract_geoid = d.geoid\"\n",
|
||
"\n",
|
||
" # Ensure PostGIS\n",
|
||
" conn.execute(text(\"CREATE EXTENSION IF NOT EXISTS postgis\"))\n",
|
||
"\n",
|
||
" # Stage and atomically replace\n",
|
||
" conn.execute(text(\"DROP TABLE IF EXISTS _well_with_demographics_stage\"))\n",
|
||
" conn.execute(text(f\"CREATE TABLE _well_with_demographics_stage AS {select_sql}\"))\n",
|
||
"\n",
|
||
" conn.execute(text(f\"DROP TABLE IF EXISTS {target_table}\"))\n",
|
||
" conn.execute(text(f\"ALTER TABLE _well_with_demographics_stage RENAME TO {target_table}\"))\n",
|
||
"\n",
|
||
" # Indexes\n",
|
||
" conn.execute(text(f\"CREATE INDEX IF NOT EXISTS idx_wd_api10 ON {target_table} (canonical_api10)\"))\n",
|
||
" conn.execute(text(f\"CREATE INDEX IF NOT EXISTS idx_wd_geoid ON {target_table} (census_tract_geoid)\"))\n",
|
||
" # geometry column may be named geom in this table\n",
|
||
" # create GIST index on geom if present\n",
|
||
" cols_after = [r[0] for r in conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name=:t\"), {'t': target_table})]\n",
|
||
" if 'geom' in cols_after:\n",
|
||
" conn.execute(text(f\"CREATE INDEX IF NOT EXISTS idx_wd_geom ON {target_table} USING GIST (geom)\"))\n",
|
||
" elif 'geometry' in cols_after:\n",
|
||
" conn.execute(text(f\"CREATE INDEX IF NOT EXISTS idx_wd_geometry ON {target_table} USING GIST (geometry)\"))\n",
|
||
"\n",
|
||
" conn.execute(text(f\"ANALYZE {target_table}\"))\n",
|
||
"\n",
|
||
"# Report row count and a quick sample\n",
|
||
"with engine.begin() as conn:\n",
|
||
" cnt = conn.execute(text(f\"SELECT COUNT(*) FROM {target_table}\")).scalar()\n",
|
||
" print({\"materialized_table\": target_table, \"rows\": int(cnt)})\n",
|
||
" # Build a safe sample query using only existing columns\n",
|
||
" cols_now = [r[0] for r in conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name=:t\"), {'t': target_table})]\n",
|
||
" sample_cols = ['canonical_api10'] + [c for c in ['api10_number','api_number','census_tract_geoid'] if c in cols_now]\n",
|
||
" sample_sql = f\"SELECT {', '.join(sample_cols)} FROM {target_table} LIMIT 10\"\n",
|
||
" sample = pd.read_sql(text(sample_sql), conn)\n",
|
||
"\n",
|
||
"print(\"Sample (first 10) from materialized table:\")\n",
|
||
"print(sample)\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.13.7"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|