DATA201_projects/capstone_project.ipynb

1891 lines
53 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "58dfeab5",
"metadata": {},
"source": [
"# Working title: working subtitle\n",
"\n",
"## initial remarks"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "fe05b4a4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "39a4ce3f",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('~/python_class/DOB_Sustainability_Compliance_Map__Local_Law_33.csv')"
]
},
{
"cell_type": "markdown",
"id": "e0e97c85",
"metadata": {},
"source": [
"## Part 1: Data Exploration"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6b430c20",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(21681, 11)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "917a6779",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>BoroughName</th>\n",
" <th>BBL</th>\n",
" <th>ENERGY STAR Score</th>\n",
" <th>LetterScore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>Y4</td>\n",
" <td>0</td>\n",
" <td>124</td>\n",
" <td>2598091</td>\n",
" <td>920 GRESHAM ROAD</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000010010</td>\n",
" <td>1</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>23</td>\n",
" <td>T2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>24346</td>\n",
" <td>20 SOUTH STREET</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000020023</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>7501</td>\n",
" <td>R0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2542563</td>\n",
" <td>1 WATER STREET</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000047501</td>\n",
" <td>61</td>\n",
" <td>C</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"0 1 10 Y4 0 124 \n",
"1 2 23 T2 0 1 \n",
"2 4 7501 R0 2 1 \n",
"\n",
" DOF_Gross_Square_Footage Address BoroughName BBL \\\n",
"0 2598091 920 GRESHAM ROAD MANHATTAN 1000010010 \n",
"1 24346 20 SOUTH STREET MANHATTAN 1000020023 \n",
"2 2542563 1 WATER STREET MANHATTAN 1000047501 \n",
"\n",
" ENERGY STAR Score LetterScore \n",
"0 1 D \n",
"1 0 F \n",
"2 61 C "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "38d0ac47",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Block', 'Lot', 'Building_Class', 'Tax_Class', 'Building_Count',\n",
" 'DOF_Gross_Square_Footage', 'Address', 'BoroughName', 'BBL',\n",
" 'ENERGY STAR Score', 'LetterScore'],\n",
" dtype='object')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "adf4092b",
"metadata": {},
"outputs": [],
"source": [
"# Columns seem to be self-explanatory, except BBL. According to NYC OpenData:\n",
"# \"Borough Block and Lot identifier as assigned by NYC Department of Finance\"."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "276d9619",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MANHATTAN 7858\n",
"BROOKLYN 5469\n",
"BRONX 4349\n",
"QUEENS 3659\n",
"STATEN ISLAND 346\n",
"Name: BoroughName, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Is this dataset citywide? or just Manhattan?\n",
"\n",
"df['BoroughName'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d3c8c305",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block 0\n",
"Lot 0\n",
"Building_Class 2\n",
"Tax_Class 0\n",
"Building_Count 0\n",
"DOF_Gross_Square_Footage 0\n",
"Address 7\n",
"BoroughName 0\n",
"BBL 0\n",
"ENERGY STAR Score 0\n",
"LetterScore 0\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Missing data?\n",
"\n",
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "64eb852e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>BoroughName</th>\n",
" <th>BBL</th>\n",
" <th>ENERGY STAR Score</th>\n",
" <th>LetterScore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4254</th>\n",
" <td>1595</td>\n",
" <td>7501</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1330 5 AVENUE</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1015950031</td>\n",
" <td>64</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8124</th>\n",
" <td>3016</td>\n",
" <td>7502</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1926 LONGFELLOW AVENUE</td>\n",
" <td>BRONX</td>\n",
" <td>2030160038</td>\n",
" <td>100</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"4254 1595 7501 NaN 0 0 \n",
"8124 3016 7502 NaN 0 0 \n",
"\n",
" DOF_Gross_Square_Footage Address BoroughName \\\n",
"4254 0 1330 5 AVENUE MANHATTAN \n",
"8124 0 1926 LONGFELLOW AVENUE BRONX \n",
"\n",
" BBL ENERGY STAR Score LetterScore \n",
"4254 1015950031 64 C \n",
"8124 2030160038 100 A "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['Building_Class'].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cdf678d2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>BoroughName</th>\n",
" <th>BBL</th>\n",
" <th>ENERGY STAR Score</th>\n",
" <th>LetterScore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1228</th>\n",
" <td>506</td>\n",
" <td>12</td>\n",
" <td>W3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>49475</td>\n",
" <td>NaN</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1005060012</td>\n",
" <td>10</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7145</th>\n",
" <td>1734</td>\n",
" <td>1</td>\n",
" <td>I1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>1017118</td>\n",
" <td>NaN</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1017340001</td>\n",
" <td>7</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9225</th>\n",
" <td>2758</td>\n",
" <td>6</td>\n",
" <td>N9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>17200</td>\n",
" <td>NaN</td>\n",
" <td>BRONX</td>\n",
" <td>2027580006</td>\n",
" <td>89</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9226</th>\n",
" <td>2758</td>\n",
" <td>36</td>\n",
" <td>N9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>37060</td>\n",
" <td>NaN</td>\n",
" <td>BRONX</td>\n",
" <td>2027580036</td>\n",
" <td>66</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13711</th>\n",
" <td>1769</td>\n",
" <td>72</td>\n",
" <td>C1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>30720</td>\n",
" <td>NaN</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15056</th>\n",
" <td>1602</td>\n",
" <td>13</td>\n",
" <td>C1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>14720</td>\n",
" <td>NaN</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16381</th>\n",
" <td>3755</td>\n",
" <td>22</td>\n",
" <td>C1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>25564</td>\n",
" <td>NaN</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"1228 506 12 W3 0 1 \n",
"7145 1734 1 I1 0 5 \n",
"9225 2758 6 N9 0 1 \n",
"9226 2758 36 N9 0 1 \n",
"13711 1769 72 C1 0 1 \n",
"15056 1602 13 C1 0 1 \n",
"16381 3755 22 C1 0 1 \n",
"\n",
" DOF_Gross_Square_Footage Address BoroughName BBL \\\n",
"1228 49475 NaN MANHATTAN 1005060012 \n",
"7145 1017118 NaN MANHATTAN 1017340001 \n",
"9225 17200 NaN BRONX 2027580006 \n",
"9226 37060 NaN BRONX 2027580036 \n",
"13711 30720 NaN BROOKLYN -2147483648 \n",
"15056 14720 NaN BROOKLYN -2147483648 \n",
"16381 25564 NaN BROOKLYN -2147483648 \n",
"\n",
" ENERGY STAR Score LetterScore \n",
"1228 10 D \n",
"7145 7 D \n",
"9225 89 A \n",
"9226 66 C \n",
"13711 0 F \n",
"15056 0 F \n",
"16381 0 F "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['Address'].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e205df03",
"metadata": {},
"outputs": [],
"source": [
"# Missing Address is not a big deal because the rest of the values are complete.\n",
"# But missing Building Class could be significant.\n",
"\n",
"# The two offending rows also have Building Count = 0.\n",
"# How is that possible, since they have Energy Star scores?\n",
"\n",
"# In the next secion we may decide to drop those two rows."
]
},
{
"cell_type": "markdown",
"id": "4d539a8c",
"metadata": {},
"source": [
"## Part 2: Data Cleaning"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "614dbd9f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block int64\n",
"Lot int64\n",
"Building_Class object\n",
"Tax_Class int64\n",
"Building_Count int64\n",
"DOF_Gross_Square_Footage int64\n",
"Address object\n",
"BoroughName object\n",
"BBL int64\n",
"ENERGY STAR Score int64\n",
"LetterScore object\n",
"dtype: object"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Which columns are informative?\n",
"\n",
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6c58a084",
"metadata": {},
"outputs": [],
"source": [
"# Interesting for analysis:\n",
"\n",
"# DOF_Gross_Square_Footage\n",
"# ENERGY STAR Score\n",
"# LetterScore\n",
"\n",
"# Other columns are less interesting:\n",
"\n",
"# Building_Count is the number of buildings in one Block.\n",
"# A Block can have more than one Lot, but a Lot only has one Block.\n",
"# Block, Lot and BBL are identifiers assigned by the city.\n",
"\n",
"# A good visual reference is the Digital Tax Map put out by the NYC Department of Finance:\n",
"# http://gis.nyc.gov/taxmap/map.htm"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "14213bd2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Can any identifiers be used as an index?\n",
"\n",
"df['Block'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "1e1a5e9b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Lot'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "67b7f633",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['BBL'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "c4469ca8",
"metadata": {},
"outputs": [],
"source": [
"# Since their values are not unique, they cannot be used as an index."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "782190b5",
"metadata": {},
"outputs": [],
"source": [
"# Shall we rename or discard any columns from this dataset?\n",
"\n",
"# BBL could be eliminated. However, there are only 11 columns total, and since df.head() is easily readable on my monitor without scrolling horizontally (as you're doing now), I see no harm in keeping it."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "e085ba33",
"metadata": {},
"outputs": [],
"source": [
"# Rename columns containing whitespace or camelcase\n",
"\n",
"df.rename(columns = {\"BoroughName\": \"Borough_Name\",\n",
" \"ENERGY STAR Score\": \"Energy_Star_Score\",\n",
" \"LetterScore\": \"Letter_Score\"\n",
" }, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "c4a8ebb7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough_Name</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Star_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>Y4</td>\n",
" <td>0</td>\n",
" <td>124</td>\n",
" <td>2598091</td>\n",
" <td>920 GRESHAM ROAD</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000010010</td>\n",
" <td>1</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"0 1 10 Y4 0 124 \n",
"\n",
" DOF_Gross_Square_Footage Address Borough_Name BBL \\\n",
"0 2598091 920 GRESHAM ROAD MANHATTAN 1000010010 \n",
"\n",
" Energy_Star_Score Letter_Score \n",
"0 1 D "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "38de98e9",
"metadata": {},
"outputs": [],
"source": [
"# Unforseen consequence of renaming: now I have to scroll horizontally."
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "c0b5504f",
"metadata": {},
"outputs": [],
"source": [
"# Rename columns once more\n",
"\n",
"df.rename(columns = {\"DOF_Gross_Square_Footage\": \"Sq_Footage\",\n",
" \"Energy_Star_Score\": \"Energy_Score\",\n",
" \"Borough_Name\": \"Borough\",\n",
" \"Building_Class\": \"Bldg_Class\",\n",
" \"Building_Count\": \"Bldg_Count\"\n",
" }, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "0d3cf300",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>Y4</td>\n",
" <td>0</td>\n",
" <td>124</td>\n",
" <td>2598091</td>\n",
" <td>920 GRESHAM ROAD</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000010010</td>\n",
" <td>1</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage Address \\\n",
"0 1 10 Y4 0 124 2598091 920 GRESHAM ROAD \n",
"\n",
" Borough BBL Energy_Score Letter_Score \n",
"0 MANHATTAN 1000010010 1 D "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c1c2e027",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block 0\n",
"Lot 0\n",
"Bldg_Class 2\n",
"Tax_Class 0\n",
"Bldg_Count 0\n",
"Sq_Footage 0\n",
"Address 7\n",
"Borough 0\n",
"BBL 0\n",
"Energy_Score 0\n",
"Letter_Score 0\n",
"dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Repeat the search for missing data\n",
"\n",
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "5debf1d6",
"metadata": {},
"outputs": [],
"source": [
"# Ignore the 7 missing addresses, but drop the 2 rows with missing Building Class.\n",
"# Building Class is a feature that will be used in the df.groupby() function.\n",
"\n",
"df.dropna(subset = ['Bldg_Class'], inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "5d2eb339",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block 0\n",
"Lot 0\n",
"Bldg_Class 0\n",
"Tax_Class 0\n",
"Bldg_Count 0\n",
"Sq_Footage 0\n",
"Address 7\n",
"Borough 0\n",
"BBL 0\n",
"Energy_Score 0\n",
"Letter_Score 0\n",
"dtype: int64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "3b5525b0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search for unexpected data\n",
"\n",
"# df['Energy_Score'].min() # looks good\n",
"# df['Energy_Score'].max() # looks good\n",
"# df['Sq_Footage'].max() # looks good\n",
"df['Sq_Footage'].min()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "c1f3edc4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"Letter_Score \n",
"A 3 3 3 3 3 3 \n",
"B 1 1 1 1 1 1 \n",
"C 5 5 5 5 5 5 \n",
"D 6 6 6 6 6 6 \n",
"F 14 14 14 14 14 14 \n",
"\n",
" Address Borough BBL Energy_Score \n",
"Letter_Score \n",
"A 3 3 3 3 \n",
"B 1 1 1 1 \n",
"C 5 5 5 5 \n",
"D 6 6 6 6 \n",
"F 14 14 14 14 "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A building cannot have zero square feet of floorspace.\n",
"# What's going on?\n",
"\n",
"df[df['Sq_Footage'] == 0].groupby(['Letter_Score']).count()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e27467ce",
"metadata": {},
"outputs": [],
"source": [
"# The ones with F can be explained:\n",
"# An F grade means that the building owner \"didnt submit required benchmarking information\",\n",
"# according to Local Law 95 of 2019. So it's not that the building has no square footage,\n",
"# but that the data was not submitted. Thus the failing grade.\n",
"\n",
"# We'll leave 0 square feet with F grade untouched.\n",
"\n",
"# For more information, see https://www1.nyc.gov/site/buildings/codes/benchmarking.page"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "b73e15d9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"Letter_Score \n",
"A 3 3 3 3 3 3 \n",
"B 1 1 1 1 1 1 \n",
"C 5 5 5 5 5 5 \n",
"D 6 6 6 6 6 6 \n",
"\n",
" Address Borough BBL Energy_Score \n",
"Letter_Score \n",
"A 3 3 3 3 \n",
"B 1 1 1 1 \n",
"C 5 5 5 5 \n",
"D 6 6 6 6 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# What to do with the others?\n",
"\n",
"df[(df['Sq_Footage'] == 0) & (df['Letter_Score'] != 'F')].groupby(['Letter_Score']).count()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "2d9bea8f",
"metadata": {},
"outputs": [],
"source": [
"# 15 rows remain with 0 square feet of floorspace.\n",
"# Can we impute values from the mean square footage for each grade?\n",
"\n",
"# (There must be an elegant way to do this. What you see below is not.)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "6eb73792",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sq_Footage</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>111197.291071</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>133270.963702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>128833.575964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>108170.778312</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sq_Footage\n",
"Letter_Score \n",
"A 111197.291071\n",
"B 133270.963702\n",
"C 128833.575964\n",
"D 108170.778312"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# First, get averages\n",
"\n",
"subset0 = df[['Letter_Score', 'Sq_Footage']]\n",
"subset1 = subset0[(subset0['Letter_Score'] != 'F') & (subset0['Sq_Footage'] != 0)]\n",
"subset1.groupby(['Letter_Score']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "8666f836",
"metadata": {},
"outputs": [],
"source": [
"# Assign variables, rounding to whole numbers\n",
"\n",
"mean_A = 111197\n",
"mean_B = 133271\n",
"mean_C = 128834\n",
"mean_D = 108171"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "73900853",
"metadata": {},
"outputs": [],
"source": [
"# Replace 0 values with mean_A, mean_B, etc.\n",
"\n",
"df.loc[(df['Letter_Score'] == 'A') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_A\n",
"df.loc[(df['Letter_Score'] == 'B') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_B\n",
"df.loc[(df['Letter_Score'] == 'C') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_C\n",
"df.loc[(df['Letter_Score'] == 'D') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_D"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "37eb13ea",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"Letter_Score \n",
"F 14 14 14 14 14 14 \n",
"\n",
" Address Borough BBL Energy_Score \n",
"Letter_Score \n",
"F 14 14 14 14 "
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Now the only 0 values should be for F grades\n",
"\n",
"df[df['Sq_Footage'] == 0].groupby(['Letter_Score']).count()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "913b2ae6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block int64\n",
"Lot int64\n",
"Bldg_Class object\n",
"Tax_Class int64\n",
"Bldg_Count int64\n",
"Sq_Footage int64\n",
"Address object\n",
"Borough object\n",
"BBL int64\n",
"Energy_Score int64\n",
"Letter_Score object\n",
"dtype: object"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# delete this cell\n",
"\n",
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "007f1189",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11319</th>\n",
" <td>149</td>\n",
" <td>7502</td>\n",
" <td>U7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>138 WILLOUGHBY STREET</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11611</th>\n",
" <td>165</td>\n",
" <td>7504</td>\n",
" <td>U7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>133271</td>\n",
" <td>35 HOYT STREET</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>75</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13351</th>\n",
" <td>5804</td>\n",
" <td>2</td>\n",
" <td>U6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>COLONIAL ROAD</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14570</th>\n",
" <td>5322</td>\n",
" <td>4</td>\n",
" <td>V1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>111197</td>\n",
" <td>23 OCEAN PARKWAY</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>100</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14668</th>\n",
" <td>5799</td>\n",
" <td>59</td>\n",
" <td>D9</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>38315</td>\n",
" <td>3641 JOHNSON AVENUE</td>\n",
" <td>BRONX</td>\n",
" <td>2057990059</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15726</th>\n",
" <td>4282</td>\n",
" <td>100</td>\n",
" <td>V1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25-70 REAR WHITESTONE EXPRESSWAY SR WEST</td>\n",
" <td>QUEENS</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"11319 149 7502 U7 0 0 0 \n",
"11611 165 7504 U7 0 0 133271 \n",
"13351 5804 2 U6 0 0 0 \n",
"14570 5322 4 V1 0 0 111197 \n",
"14668 5799 59 D9 0 0 38315 \n",
"15726 4282 100 V1 0 0 0 \n",
"\n",
" Address Borough BBL \\\n",
"11319 138 WILLOUGHBY STREET BROOKLYN -2147483648 \n",
"11611 35 HOYT STREET BROOKLYN -2147483648 \n",
"13351 COLONIAL ROAD BROOKLYN -2147483648 \n",
"14570 23 OCEAN PARKWAY BROOKLYN -2147483648 \n",
"14668 3641 JOHNSON AVENUE BRONX 2057990059 \n",
"15726 25-70 REAR WHITESTONE EXPRESSWAY SR WEST QUEENS -2147483648 \n",
"\n",
" Energy_Score Letter_Score \n",
"11319 0 F \n",
"11611 75 B \n",
"13351 0 F \n",
"14570 100 A \n",
"14668 0 F \n",
"15726 0 F "
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Unexpected values, continued\n",
"\n",
"df[df['Bldg_Count'] == 0]"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "3479152f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11611</th>\n",
" <td>165</td>\n",
" <td>7504</td>\n",
" <td>U7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>133271</td>\n",
" <td>35 HOYT STREET</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>75</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14570</th>\n",
" <td>5322</td>\n",
" <td>4</td>\n",
" <td>V1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>111197</td>\n",
" <td>23 OCEAN PARKWAY</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>100</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"11611 165 7504 U7 0 0 133271 \n",
"14570 5322 4 V1 0 0 111197 \n",
"\n",
" Address Borough BBL Energy_Score Letter_Score \n",
"11611 35 HOYT STREET BROOKLYN -2147483648 75 B \n",
"14570 23 OCEAN PARKWAY BROOKLYN -2147483648 100 A "
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# How can a block have zero buildings?\n",
"# Again, we'll leave the F grades as is.\n",
"\n",
"df[(df['Bldg_Count'] == 0) & (df['Letter_Score'] != 'F')]"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "6adfd92a",
"metadata": {},
"outputs": [],
"source": [
"# Have a peek at the Department of Finance Tax Map: http://gis.nyc.gov/taxmap/map.htm\n",
"\n",
"# Looks like Bldg_Count = 1 for both. However, I'm not comfortable with imputing data\n",
"# by eyeballing it."
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "7503be1f",
"metadata": {},
"outputs": [],
"source": [
"# Let's just drop them\n",
"\n",
"df.drop([11611, 14570], inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab6ecc85",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}