update csvs

This commit is contained in:
2024-08-12 15:22:54 -07:00
parent 776f6f796b
commit c1fd9e8bef
5 changed files with 36669 additions and 16961 deletions

View File

@@ -275,7 +275,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -361,7 +361,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -473,7 +473,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -482,7 +482,7 @@
"(158, 8)"
]
},
"execution_count": 13,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,181 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sqlalchemy import create_engine\n",
"import geopandas as gpd\n",
"\n",
"import os\n",
"\n",
"# Database connection details from zshrc environment variables\n",
"db_name = 'colorado_spills'\n",
"user = os.getenv('DB_USER')\n",
"password = os.getenv('DB_PASSWORD')\n",
"host = os.getenv('DB_HOST')\n",
"port = os.getenv('DB_PORT')\n",
"\n",
"\n",
"# Create an engine to connect to the PostgreSQL database\n",
"engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}')\n",
"\n",
"# Read in the spills_with_demographics as spills\n",
"spills_df = pd.read_sql_table('spills_with_demographics', engine)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<bound method NDFrame.head of Document # Report Operator \\\n",
"0 400628833 I w/S GREAT WESTERN OPERATING COMPANY LLC \n",
"1 400629910 I COLORADO OIL & GAS CONSERVATION COMMISSION \n",
"2 400629998 S COLORADO OIL & GAS CONSERVATION COMMISSION \n",
"3 400630821 I KERR MCGEE GATHERING LLC \n",
"4 400631216 I NOBLE ENERGY INC \n",
"... ... ... ... \n",
"16885 403825450 I w/S NOBLE ENERGY INC \n",
"16886 403825485 I w/S NOBLE ENERGY INC \n",
"16887 403825575 I w/S NOBLE ENERGY INC \n",
"16888 403826200 I NOBLE ENERGY INC \n",
"16889 403826294 I CAERUS PICEANCE LLC \n",
"\n",
" Operator # Tracking # Initial Report Date Date of Discovery \\\n",
"0 10110 400628833 06/18/2014 06/11/2014 \n",
"1 5 400629910 06/19/2014 06/14/2014 \n",
"2 5 400629910 06/19/2014 06/14/2014 \n",
"3 47121 400630821 06/20/2014 06/19/2014 \n",
"4 100322 400631216 06/23/2014 06/20/2014 \n",
"... ... ... ... ... \n",
"16885 100322 403825450 06/14/2024 06/13/2024 \n",
"16886 100322 403825485 06/14/2024 06/13/2024 \n",
"16887 100322 403825575 06/14/2024 06/13/2024 \n",
"16888 100322 403826200 06/14/2024 06/14/2024 \n",
"16889 10456 403826294 06/15/2024 06/15/2024 \n",
"\n",
" Spill Type Qtr Qtr Section ... total_population white_population \\\n",
"0 Historical SESE 2 ... 5491.0 4767.0 \n",
"1 Historical NWNE 1 ... 7335.0 6302.0 \n",
"2 Historical NWNE 1 ... 7335.0 6302.0 \n",
"3 Recent NWNW 15 ... 427.0 222.0 \n",
"4 Recent NENW 36 ... 3470.0 3105.0 \n",
"... ... ... ... ... ... ... \n",
"16885 Historical SWSE 7 ... 7335.0 6302.0 \n",
"16886 Historical SWSE 7 ... 7335.0 6302.0 \n",
"16887 Historical SWSE 7 ... 7335.0 6302.0 \n",
"16888 Recent SWSE 5 ... 4023.0 3581.0 \n",
"16889 Recent NESW 22 ... 7240.0 5646.0 \n",
"\n",
" hispanic_population median_household_income poverty_population \\\n",
"0 772.0 88286.0 325.0 \n",
"1 2011.0 71440.0 831.0 \n",
"2 2011.0 71440.0 831.0 \n",
"3 183.0 -666666666.0 0.0 \n",
"4 167.0 122604.0 129.0 \n",
"... ... ... ... \n",
"16885 2011.0 71440.0 831.0 \n",
"16886 2011.0 71440.0 831.0 \n",
"16887 2011.0 71440.0 831.0 \n",
"16888 612.0 82865.0 268.0 \n",
"16889 1659.0 64573.0 693.0 \n",
"\n",
" unemployed_population percent_white percent_hispanic percent_poverty \\\n",
"0 80.0 86.814788 14.059370 5.918776 \n",
"1 166.0 85.916837 27.416496 11.329243 \n",
"2 166.0 85.916837 27.416496 11.329243 \n",
"3 0.0 51.990632 42.857143 0.000000 \n",
"4 13.0 89.481268 4.812680 3.717579 \n",
"... ... ... ... ... \n",
"16885 166.0 85.916837 27.416496 11.329243 \n",
"16886 166.0 85.916837 27.416496 11.329243 \n",
"16887 166.0 85.916837 27.416496 11.329243 \n",
"16888 62.0 89.013174 15.212528 6.661695 \n",
"16889 240.0 77.983425 22.914365 9.571823 \n",
"\n",
" unemployment_rate \n",
"0 1.456930 \n",
"1 2.263122 \n",
"2 2.263122 \n",
"3 0.000000 \n",
"4 0.374640 \n",
"... ... \n",
"16885 2.263122 \n",
"16886 2.263122 \n",
"16887 2.263122 \n",
"16888 1.541138 \n",
"16889 3.314917 \n",
"\n",
"[16890 rows x 118 columns]>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spills_df.head"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'spills_gdf' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Ensure the date columns are in datetime format\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m spills_gdf[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDate of Discovery\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mto_datetime(\u001b[43mspills_gdf\u001b[49m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDate of Discovery\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 3\u001b[0m spills_gdf[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mInitial Report Date\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mto_datetime(spills_gdf[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mInitial Report Date\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Calculate the time difference in days\u001b[39;00m\n",
"\u001b[0;31mNameError\u001b[0m: name 'spills_gdf' is not defined"
]
}
],
"source": [
"# Ensure the date columns are in datetime format\n",
"spills_gdf['Date of Discovery'] = pd.to_datetime(spills_gdf['Date of Discovery'])\n",
"spills_gdf['Initial Report Date'] = pd.to_datetime(spills_gdf['Initial Report Date'])\n",
"\n",
"# Calculate the time difference in days\n",
"spills_gdf['Report Delay (Days)'] = (spills_gdf['Initial Report Date'] - spills_gdf['Date of Discovery']).dt.days\n",
"\n",
"# Display the first few rows to check the new column\n",
"spills_gdf[['Date of Discovery', 'Initial Report Date', 'Report Delay (Days)']].head()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "funkyfunk",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}