updates with Matt

This commit is contained in:
2024-10-22 09:18:11 -07:00
parent e3c7d05cec
commit 863640efbe
4 changed files with 603 additions and 27 deletions

View File

@@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
@@ -26,7 +26,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -62,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -78,7 +78,7 @@
" dtype='object', length=194)"
]
},
"execution_count": 11,
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -91,7 +91,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -119,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -531,7 +531,7 @@
"[409 rows x 194 columns]"
]
},
"execution_count": 15,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -543,7 +543,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@@ -553,7 +553,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -576,7 +576,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -589,14 +589,14 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_3460201/3570709754.py:7: FutureWarning: The provided callable <function sum at 0x7cfd6910e980> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
"/tmp/ipykernel_2135561/3570709754.py:7: FutureWarning: The provided callable <function sum at 0x7f549753e8e0> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n",
" pivot_table = pd.pivot_table(climate_investment, values='Total Project Cost', index='County_y', aggfunc=np.sum)\n"
]
},
@@ -629,7 +629,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -645,7 +645,7 @@
" dtype='object', length=194)"
]
},
"execution_count": 22,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -657,7 +657,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
@@ -668,28 +668,380 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# print the first 5 rows \n",
"climate_investment.head().to_csv('first_5_rows.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# frequency table for Project Description\n",
"climate_investment['Project Description'].value_counts().to_csv('project_description_freq.csv')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 None\n",
"1 None\n",
"2 None\n",
"3 None\n",
"4 None\n",
"Name: Lat Long, dtype: object"
"count 119356\n",
"unique 2\n",
"top False\n",
"freq 90315\n",
"Name: Is Benefit DAC1550Communities, dtype: object"
]
},
"execution_count": 29,
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# head of column 'Lat Long' \n",
"climate_investment['Lat Long'].head()\n"
"climate_investment[\"Is Benefit DAC1550Communities\"].describe()\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ZIP</th>\n",
" <th>TotPop19</th>\n",
" <th>CIscore</th>\n",
" <th>CIscoreP</th>\n",
" <th>Ozone</th>\n",
" <th>OzoneP</th>\n",
" <th>PM2_5</th>\n",
" <th>PM2_5_P</th>\n",
" <th>DieselPM</th>\n",
" <th>DieselPM_P</th>\n",
" <th>...</th>\n",
" <th>Indirect Jobs Fte</th>\n",
" <th>Induced Jobs Fte</th>\n",
" <th>Compost Produced Tons</th>\n",
" <th>Compost Produced Tons Yr</th>\n",
" <th>Net Density DUA</th>\n",
" <th>Applicants Assisted</th>\n",
" <th>Invasive Cover 12 Months</th>\n",
" <th>Invasive Cover 36 Months</th>\n",
" <th>Project Acreage</th>\n",
" <th>Intermediary Admin Expenses Calc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.000000</td>\n",
" <td>...</td>\n",
" <td>119356.0</td>\n",
" <td>119356.0</td>\n",
" <td>119356.000000</td>\n",
" <td>119356.0</td>\n",
" <td>119356.0</td>\n",
" <td>119356.0</td>\n",
" <td>119356.0</td>\n",
" <td>119356.0</td>\n",
" <td>119356.0</td>\n",
" <td>1.193560e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>92876.728577</td>\n",
" <td>5309.716411</td>\n",
" <td>23.935192</td>\n",
" <td>45.734280</td>\n",
" <td>0.048625</td>\n",
" <td>50.911545</td>\n",
" <td>10.405602</td>\n",
" <td>53.148956</td>\n",
" <td>0.240707</td>\n",
" <td>52.309922</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.022881</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.059589e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2343.339535</td>\n",
" <td>2527.349328</td>\n",
" <td>71.431858</td>\n",
" <td>76.628615</td>\n",
" <td>0.010285</td>\n",
" <td>28.485539</td>\n",
" <td>2.077885</td>\n",
" <td>28.427845</td>\n",
" <td>0.338975</td>\n",
" <td>28.401653</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.627501</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>8.701853e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>32.000000</td>\n",
" <td>0.000000</td>\n",
" <td>-999.000000</td>\n",
" <td>-999.000000</td>\n",
" <td>0.026554</td>\n",
" <td>0.186683</td>\n",
" <td>1.875092</td>\n",
" <td>0.012446</td>\n",
" <td>0.000214</td>\n",
" <td>0.037337</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>91506.000000</td>\n",
" <td>3758.000000</td>\n",
" <td>14.757710</td>\n",
" <td>24.911750</td>\n",
" <td>0.041252</td>\n",
" <td>24.878656</td>\n",
" <td>8.659819</td>\n",
" <td>28.357187</td>\n",
" <td>0.075040</td>\n",
" <td>27.940261</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>92691.000000</td>\n",
" <td>4943.000000</td>\n",
" <td>26.049788</td>\n",
" <td>51.033787</td>\n",
" <td>0.047536</td>\n",
" <td>50.989421</td>\n",
" <td>11.021278</td>\n",
" <td>55.084007</td>\n",
" <td>0.157610</td>\n",
" <td>53.005600</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>94558.000000</td>\n",
" <td>6345.000000</td>\n",
" <td>40.367234</td>\n",
" <td>75.617751</td>\n",
" <td>0.056800</td>\n",
" <td>75.121344</td>\n",
" <td>11.983522</td>\n",
" <td>77.523335</td>\n",
" <td>0.308042</td>\n",
" <td>77.349098</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>96161.000000</td>\n",
" <td>38754.000000</td>\n",
" <td>93.183570</td>\n",
" <td>100.000000</td>\n",
" <td>0.073132</td>\n",
" <td>100.000000</td>\n",
" <td>16.394748</td>\n",
" <td>100.000000</td>\n",
" <td>14.611221</td>\n",
" <td>100.000000</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>306.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.923000e+07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 141 columns</p>\n",
"</div>"
],
"text/plain": [
" ZIP TotPop19 CIscore CIscoreP \\\n",
"count 119356.000000 119356.000000 119356.000000 119356.000000 \n",
"mean 92876.728577 5309.716411 23.935192 45.734280 \n",
"std 2343.339535 2527.349328 71.431858 76.628615 \n",
"min 32.000000 0.000000 -999.000000 -999.000000 \n",
"25% 91506.000000 3758.000000 14.757710 24.911750 \n",
"50% 92691.000000 4943.000000 26.049788 51.033787 \n",
"75% 94558.000000 6345.000000 40.367234 75.617751 \n",
"max 96161.000000 38754.000000 93.183570 100.000000 \n",
"\n",
" Ozone OzoneP PM2_5 PM2_5_P \\\n",
"count 119356.000000 119356.000000 119356.000000 119356.000000 \n",
"mean 0.048625 50.911545 10.405602 53.148956 \n",
"std 0.010285 28.485539 2.077885 28.427845 \n",
"min 0.026554 0.186683 1.875092 0.012446 \n",
"25% 0.041252 24.878656 8.659819 28.357187 \n",
"50% 0.047536 50.989421 11.021278 55.084007 \n",
"75% 0.056800 75.121344 11.983522 77.523335 \n",
"max 0.073132 100.000000 16.394748 100.000000 \n",
"\n",
" DieselPM DieselPM_P ... Indirect Jobs Fte Induced Jobs Fte \\\n",
"count 119356.000000 119356.000000 ... 119356.0 119356.0 \n",
"mean 0.240707 52.309922 ... 0.0 0.0 \n",
"std 0.338975 28.401653 ... 0.0 0.0 \n",
"min 0.000214 0.037337 ... 0.0 0.0 \n",
"25% 0.075040 27.940261 ... 0.0 0.0 \n",
"50% 0.157610 53.005600 ... 0.0 0.0 \n",
"75% 0.308042 77.349098 ... 0.0 0.0 \n",
"max 14.611221 100.000000 ... 0.0 0.0 \n",
"\n",
" Compost Produced Tons Compost Produced Tons Yr Net Density DUA \\\n",
"count 119356.000000 119356.0 119356.0 \n",
"mean 0.022881 0.0 0.0 \n",
"std 1.627501 0.0 0.0 \n",
"min 0.000000 0.0 0.0 \n",
"25% 0.000000 0.0 0.0 \n",
"50% 0.000000 0.0 0.0 \n",
"75% 0.000000 0.0 0.0 \n",
"max 306.000000 0.0 0.0 \n",
"\n",
" Applicants Assisted Invasive Cover 12 Months \\\n",
"count 119356.0 119356.0 \n",
"mean 0.0 0.0 \n",
"std 0.0 0.0 \n",
"min 0.0 0.0 \n",
"25% 0.0 0.0 \n",
"50% 0.0 0.0 \n",
"75% 0.0 0.0 \n",
"max 0.0 0.0 \n",
"\n",
" Invasive Cover 36 Months Project Acreage \\\n",
"count 119356.0 119356.0 \n",
"mean 0.0 0.0 \n",
"std 0.0 0.0 \n",
"min 0.0 0.0 \n",
"25% 0.0 0.0 \n",
"50% 0.0 0.0 \n",
"75% 0.0 0.0 \n",
"max 0.0 0.0 \n",
"\n",
" Intermediary Admin Expenses Calc \n",
"count 1.193560e+05 \n",
"mean 1.059589e+03 \n",
"std 8.701853e+04 \n",
"min 0.000000e+00 \n",
"25% 0.000000e+00 \n",
"50% 0.000000e+00 \n",
"75% 0.000000e+00 \n",
"max 1.923000e+07 \n",
"\n",
"[8 rows x 141 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# summarize the dataframe\n",
"climate_investment.describe()\n"
]
},
{