one eday

2024-09-28 23:23:40 -07:00
parent d637131886
commit 55b630e6c8
2 changed files with 318 additions and 232 deletions
--- a/initial_view/merge_cci_califequity_postgis.ipynb
+++ b/initial_view/merge_cci_califequity_postgis.ipynb
@@ -623,7 +623,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
@@ -666,6 +666,7 @@
      "4                            None                None              None  \n",
      "\n",
      "[5 rows x 194 columns]\n",
      "Spatial index created successfully.\n",
      "Data uploaded and spatial index created successfully.\n"
     ]
    }
@@ -677,37 +678,141 @@
    "# Step 1: Load your merged GeoDataFrame (assuming it already exists as 'merged_data')\n",
    "# If it's not a GeoDataFrame, convert it to one\n",
    "if not isinstance(merged_data, gpd.GeoDataFrame):\n",
-    "    merged_data = gpd.GeoDataFrame(merged_data)\n",
+    "    if 'geometry' not in merged_data.columns:\n",
    "        raise ValueError(\"The DataFrame must have a 'geometry' column to be converted into a GeoDataFrame.\")\n",
    "    merged_data = gpd.GeoDataFrame(merged_data, geometry='geometry')\n",
    "\n",
    "# Step 2: Set CRS to EPSG:3110 (California Albers) if not already set\n",
    "if merged_data.crs is None or merged_data.crs.to_string() != 'EPSG:3110':\n",
    "    merged_data = merged_data.set_crs(epsg=4326)  # Set initial CRS if not defined\n",
    "    merged_data = merged_data.to_crs(epsg=3110)\n",
    "\n",
    "# Step 3: Confirm geometry column is named 'geometry'\n",
    "geometry_col = 'geometry'\n",
    "if geometry_col not in merged_data.columns:\n",
    "    raise ValueError(\"GeoDataFrame does not contain a geometry column named 'geometry'.\")\n",
    "\n",
    "# Step 4: Create a connection to your PostGIS database\n",
    "engine = create_engine('postgresql://postgres:MandyLinkToby3@192.168.0.74:5432/calif_equity')\n",
    "\n",
    "# Step 5: Push the data to PostGIS, replacing the existing table if it exists\n",
    "# The `to_postgis()` function automatically recognizes the geometry column\n",
    "try:\n",
    "    merged_data.to_postgis('california_climate_investment', engine, if_exists='replace')\n",
    "except Exception as e:\n",
    "    print(f\"Error uploading data to PostGIS: {e}\")\n",
    "\n",
    "# Step 6: Verify data upload by querying the table (select the first 5 rows)\n",
    "try:\n",
    "    gdf = gpd.read_postgis(f\"SELECT * FROM california_climate_investment LIMIT 5\", engine, geom_col=geometry_col)\n",
    "    print(gdf.head())\n",
    "except Exception as e:\n",
    "    print(f\"Error reading data from PostGIS: {e}\")\n",
    "\n",
    "# Step 7: Create a spatial index on the geometry column (using SQLAlchemy's text() function)\n",
    "try:\n",
    "    with engine.connect() as conn:\n",
    "        conn.execute(text(f\"\"\"\n",
    "            CREATE INDEX IF NOT EXISTS california_climate_investment_geom_idx\n",
    "            ON california_climate_investment\n",
    "            USING GIST ({geometry_col});\n",
    "        \"\"\"))\n",
    "    print(\"Spatial index created successfully.\")\n",
    "except Exception as e:\n",
    "    print(f\"Error creating spatial index: {e}\")\n",
    "\n",
    "print(\"Data uploaded and spatial index created successfully.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EPSG:3110\n",
      "EPSG:3110\n"
     ]
    }
   ],
   "source": [
    "print(merged_data.crs)\n",
    "print(gdf.crs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Row count of merged_data: 119356\n",
      "Row count of gdf: 5\n",
      "Columns of merged_data: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
      "       'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
      "       ...\n",
      "       'Net Density  DUA', 'Applicants  Assisted', 'Invasive Cover 12 Months',\n",
      "       'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
      "       'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
      "       'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
      "      dtype='object', length=194)\n",
      "Columns of gdf: Index(['Tract', 'ZIP', 'County_x', 'ApproxLoc', 'TotPop19', 'CIscore',\n",
      "       'CIscoreP', 'Ozone', 'OzoneP', 'PM2_5',\n",
      "       ...\n",
      "       'Net Density  DUA', 'Applicants  Assisted', 'Invasive Cover 12 Months',\n",
      "       'Invasive Cover 36 Months', 'Project Acreage', 'IS IAE',\n",
      "       'Intermediary Admin Expenses Calc', 'PRIMARY_FUNDING_RECIPIENT_TYPE',\n",
      "       'TRIBAL AFFILIATION', 'PROJECT PARTNERS'],\n",
      "      dtype='object', length=194)\n"
     ]
    }
   ],
   "source": [
    "print(\"Row count of merged_data:\", len(merged_data))\n",
    "print(\"Row count of gdf:\", len(gdf))\n",
    "print(\"Columns of merged_data:\", merged_data.columns)\n",
    "print(\"Columns of gdf:\", gdf.columns)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'merged_data' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgeopandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgpd\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mmerged_data\u001b[49m\u001b[38;5;241m.\u001b[39mplot(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m, \u001b[38;5;241m10\u001b[39m), edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblack\u001b[39m\u001b[38;5;124m'\u001b[39m, linewidth\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m)\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# Add title and labels\u001b[39;00m\n\u001b[1;32m      8\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCensus Tracts\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'merged_data' is not defined"
     ]
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import geopandas as gpd\n",
    "\n",
    "\n",
    "merged_data.plot(figsize=(10, 10), edgecolor='black', linewidth=0.5)\n",
    "\n",
    "# Add title and labels\n",
    "plt.title('Census Tracts')\n",
    "plt.xlabel('Longitude')\n",
    "plt.ylabel('Latitude')\n",
    "\n",
    "# Show the plot\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
--- a/initial_view/some_descriptives.ipynb
+++ b/initial_view/some_descriptives.ipynb