This commit is contained in:
2024-09-28 23:23:40 -07:00
parent d637131886
commit 55b630e6c8
2 changed files with 318 additions and 232 deletions

View File

@@ -623,7 +623,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 44, "execution_count": 54,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@@ -666,6 +666,7 @@
"4 None None None \n", "4 None None None \n",
"\n", "\n",
"[5 rows x 194 columns]\n", "[5 rows x 194 columns]\n",
"Spatial index created successfully.\n",
"Data uploaded and spatial index created successfully.\n" "Data uploaded and spatial index created successfully.\n"
] ]
} }
@@ -677,37 +678,141 @@
"# Step 1: Load your merged GeoDataFrame (assuming it already exists as 'merged_data')\n", "# Step 1: Load your merged GeoDataFrame (assuming it already exists as 'merged_data')\n",
"# If it's not a GeoDataFrame, convert it to one\n", "# If it's not a GeoDataFrame, convert it to one\n",
"if not isinstance(merged_data, gpd.GeoDataFrame):\n", "if not isinstance(merged_data, gpd.GeoDataFrame):\n",
" merged_data = gpd.GeoDataFrame(merged_data)\n", " if 'geometry' not in merged_data.columns:\n",
" raise ValueError(\"The DataFrame must have a 'geometry' column to be converted into a GeoDataFrame.\")\n",
" merged_data = gpd.GeoDataFrame(merged_data, geometry='geometry')\n",
"\n", "\n",
"# Step 2: Set CRS to EPSG:3110 (California Albers) if not already set\n", "# Step 2: Set CRS to EPSG:3110 (California Albers) if not already set\n",
"if merged_data.crs is None or merged_data.crs.to_string() != 'EPSG:3110':\n", "if merged_data.crs is None or merged_data.crs.to_string() != 'EPSG:3110':\n",
" merged_data = merged_data.set_crs(epsg=4326) # Set initial CRS if not defined\n",
" merged_data = merged_data.to_crs(epsg=3110)\n", " merged_data = merged_data.to_crs(epsg=3110)\n",
"\n", "\n",
"# Step 3: Confirm geometry column is named 'geometry'\n", "# Step 3: Confirm geometry column is named 'geometry'\n",
"geometry_col = 'geometry'\n", "geometry_col = 'geometry'\n",
"if geometry_col not in merged_data.columns:\n",
" raise ValueError(\"GeoDataFrame does not contain a geometry column named 'geometry'.\")\n",
"\n", "\n",
"# Step 4: Create a connection to your PostGIS database\n", "# Step 4: Create a connection to your PostGIS database\n",
"engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/calif_equity')\n", "engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/calif_equity')\n",
"\n", "\n",
"# Step 5: Push the data to PostGIS, replacing the existing table if it exists\n", "# Step 5: Push the data to PostGIS, replacing the existing table if it exists\n",
"# The `to_postgis()` function automatically recognizes the geometry column\n", "# The `to_postgis()` function automatically recognizes the geometry column\n",
"try:\n",
" merged_data.to_postgis('california_climate_investment', engine, if_exists='replace')\n", " merged_data.to_postgis('california_climate_investment', engine, if_exists='replace')\n",
"except Exception as e:\n",
" print(f\"Error uploading data to PostGIS: {e}\")\n",
"\n", "\n",
"# Step 6: Verify data upload by querying the table (select the first 5 rows)\n", "# Step 6: Verify data upload by querying the table (select the first 5 rows)\n",
"try:\n",
" gdf = gpd.read_postgis(f\"SELECT * FROM california_climate_investment LIMIT 5\", engine, geom_col=geometry_col)\n", " gdf = gpd.read_postgis(f\"SELECT * FROM california_climate_investment LIMIT 5\", engine, geom_col=geometry_col)\n",
" print(gdf.head())\n", " print(gdf.head())\n",
"except Exception as e:\n",
" print(f\"Error reading data from PostGIS: {e}\")\n",
"\n", "\n",
"# Step 7: Create a spatial index on the geometry column (using SQLAlchemy's text() function)\n", "# Step 7: Create a spatial index on the geometry column (using SQLAlchemy's text() function)\n",
"try:\n",
" with engine.connect() as conn:\n", " with engine.connect() as conn:\n",
" conn.execute(text(f\"\"\"\n", " conn.execute(text(f\"\"\"\n",
" CREATE INDEX IF NOT EXISTS california_climate_investment_geom_idx\n", " CREATE INDEX IF NOT EXISTS california_climate_investment_geom_idx\n",
" ON california_climate_investment\n", " ON california_climate_investment\n",
" USING GIST ({geometry_col});\n", " USING GIST ({geometry_col});\n",
" \"\"\"))\n", " \"\"\"))\n",
" print(\"Spatial index created successfully.\")\n",
"except Exception as e:\n",
" print(f\"Error creating spatial index: {e}\")\n",
"\n", "\n",
"print(\"Data uploaded and spatial index created successfully.\")\n" "print(\"Data uploaded and spatial index created successfully.\")\n"
] ]
}, },
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EPSG:3110\n",
"EPSG:3110\n"
]
}
],
"source": [
"print(merged_data.crs)\n",
"print(gdf.crs)\n"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Row count of merged_data: 119356\n",
"Row count of gdf: 5\n",
"Columns of merged_data: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
" 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
" ...\n",
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
" dtype='object', length=194)\n",
"Columns of gdf: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
" 'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
" ...\n",
" 'Net Density DUA', 'Applicants Assisted', 'Invasive Cover 12 Months',\n",
" 'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
" 'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
" 'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
" dtype='object', length=194)\n"
]
}
],
"source": [
"print(\"Row count of merged_data:\", len(merged_data))\n",
"print(\"Row count of gdf:\", len(gdf))\n",
"print(\"Columns of merged_data:\", merged_data.columns)\n",
"print(\"Columns of gdf:\", gdf.columns)\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'merged_data' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgeopandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mmerged_data\u001b[49m\u001b[38;5;241m.\u001b[39mplot(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m10\u001b[39m), edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblack\u001b[39m\u001b[38;5;124m'\u001b[39m, linewidth\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Add title and labels\u001b[39;00m\n\u001b[1;32m 8\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCensus Tracts\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[0;31mNameError\u001b[0m: name 'merged_data' is not defined"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import geopandas as gpd\n",
"\n",
"\n",
"merged_data.plot(figsize=(10, 10), edgecolor='black', linewidth=0.5)\n",
"\n",
"# Add title and labels\n",
"plt.title('Census Tracts')\n",
"plt.xlabel('Longitude')\n",
"plt.ylabel('Latitude')\n",
"\n",
"# Show the plot\n",
"plt.show()\n"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 31,

File diff suppressed because one or more lines are too long