DATA201_projects/capstone_project.ipynb

2566 lines
264 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "7530e3b7",
"metadata": {},
"source": [
"# NYC Building Energy Ratings"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "fe05b4a4",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "39a4ce3f",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('~/Downloads/DOB_Sustainability_Compliance_Map__Local_Law_33.csv')"
]
},
{
"cell_type": "markdown",
"id": "e0e97c85",
"metadata": {},
"source": [
"## Part 1: Data Exploration"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6b430c20",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(21681, 11)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "917a6779",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>BoroughName</th>\n",
" <th>BBL</th>\n",
" <th>ENERGY STAR Score</th>\n",
" <th>LetterScore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>Y4</td>\n",
" <td>0</td>\n",
" <td>124</td>\n",
" <td>2598091</td>\n",
" <td>920 GRESHAM ROAD</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000010010</td>\n",
" <td>1</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>23</td>\n",
" <td>T2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>24346</td>\n",
" <td>20 SOUTH STREET</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000020023</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>7501</td>\n",
" <td>R0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2542563</td>\n",
" <td>1 WATER STREET</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000047501</td>\n",
" <td>61</td>\n",
" <td>C</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"0 1 10 Y4 0 124 \n",
"1 2 23 T2 0 1 \n",
"2 4 7501 R0 2 1 \n",
"\n",
" DOF_Gross_Square_Footage Address BoroughName BBL \\\n",
"0 2598091 920 GRESHAM ROAD MANHATTAN 1000010010 \n",
"1 24346 20 SOUTH STREET MANHATTAN 1000020023 \n",
"2 2542563 1 WATER STREET MANHATTAN 1000047501 \n",
"\n",
" ENERGY STAR Score LetterScore \n",
"0 1 D \n",
"1 0 F \n",
"2 61 C "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "38d0ac47",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Block', 'Lot', 'Building_Class', 'Tax_Class', 'Building_Count',\n",
" 'DOF_Gross_Square_Footage', 'Address', 'BoroughName', 'BBL',\n",
" 'ENERGY STAR Score', 'LetterScore'],\n",
" dtype='object')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "adf4092b",
"metadata": {},
"outputs": [],
"source": [
"# Columns seem to be self-explanatory, except BBL. According to NYC OpenData:\n",
"# \"Borough Block and Lot identifier as assigned by NYC Department of Finance\"."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "276d9619",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MANHATTAN 7858\n",
"BROOKLYN 5469\n",
"BRONX 4349\n",
"QUEENS 3659\n",
"STATEN ISLAND 346\n",
"Name: BoroughName, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Is this citywide or just Manhattan?\n",
"\n",
"df['BoroughName'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d3c8c305",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block 0\n",
"Lot 0\n",
"Building_Class 2\n",
"Tax_Class 0\n",
"Building_Count 0\n",
"DOF_Gross_Square_Footage 0\n",
"Address 7\n",
"BoroughName 0\n",
"BBL 0\n",
"ENERGY STAR Score 0\n",
"LetterScore 0\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Missing data?\n",
"\n",
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "64eb852e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>BoroughName</th>\n",
" <th>BBL</th>\n",
" <th>ENERGY STAR Score</th>\n",
" <th>LetterScore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4254</th>\n",
" <td>1595</td>\n",
" <td>7501</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1330 5 AVENUE</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1015950031</td>\n",
" <td>64</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8124</th>\n",
" <td>3016</td>\n",
" <td>7502</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1926 LONGFELLOW AVENUE</td>\n",
" <td>BRONX</td>\n",
" <td>2030160038</td>\n",
" <td>100</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"4254 1595 7501 NaN 0 0 \n",
"8124 3016 7502 NaN 0 0 \n",
"\n",
" DOF_Gross_Square_Footage Address BoroughName \\\n",
"4254 0 1330 5 AVENUE MANHATTAN \n",
"8124 0 1926 LONGFELLOW AVENUE BRONX \n",
"\n",
" BBL ENERGY STAR Score LetterScore \n",
"4254 1015950031 64 C \n",
"8124 2030160038 100 A "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['Building_Class'].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cdf678d2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>BoroughName</th>\n",
" <th>BBL</th>\n",
" <th>ENERGY STAR Score</th>\n",
" <th>LetterScore</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1228</th>\n",
" <td>506</td>\n",
" <td>12</td>\n",
" <td>W3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>49475</td>\n",
" <td>NaN</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1005060012</td>\n",
" <td>10</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7145</th>\n",
" <td>1734</td>\n",
" <td>1</td>\n",
" <td>I1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>1017118</td>\n",
" <td>NaN</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1017340001</td>\n",
" <td>7</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9225</th>\n",
" <td>2758</td>\n",
" <td>6</td>\n",
" <td>N9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>17200</td>\n",
" <td>NaN</td>\n",
" <td>BRONX</td>\n",
" <td>2027580006</td>\n",
" <td>89</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9226</th>\n",
" <td>2758</td>\n",
" <td>36</td>\n",
" <td>N9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>37060</td>\n",
" <td>NaN</td>\n",
" <td>BRONX</td>\n",
" <td>2027580036</td>\n",
" <td>66</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13711</th>\n",
" <td>1769</td>\n",
" <td>72</td>\n",
" <td>C1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>30720</td>\n",
" <td>NaN</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15056</th>\n",
" <td>1602</td>\n",
" <td>13</td>\n",
" <td>C1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>14720</td>\n",
" <td>NaN</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16381</th>\n",
" <td>3755</td>\n",
" <td>22</td>\n",
" <td>C1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>25564</td>\n",
" <td>NaN</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"1228 506 12 W3 0 1 \n",
"7145 1734 1 I1 0 5 \n",
"9225 2758 6 N9 0 1 \n",
"9226 2758 36 N9 0 1 \n",
"13711 1769 72 C1 0 1 \n",
"15056 1602 13 C1 0 1 \n",
"16381 3755 22 C1 0 1 \n",
"\n",
" DOF_Gross_Square_Footage Address BoroughName BBL \\\n",
"1228 49475 NaN MANHATTAN 1005060012 \n",
"7145 1017118 NaN MANHATTAN 1017340001 \n",
"9225 17200 NaN BRONX 2027580006 \n",
"9226 37060 NaN BRONX 2027580036 \n",
"13711 30720 NaN BROOKLYN -2147483648 \n",
"15056 14720 NaN BROOKLYN -2147483648 \n",
"16381 25564 NaN BROOKLYN -2147483648 \n",
"\n",
" ENERGY STAR Score LetterScore \n",
"1228 10 D \n",
"7145 7 D \n",
"9225 89 A \n",
"9226 66 C \n",
"13711 0 F \n",
"15056 0 F \n",
"16381 0 F "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['Address'].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e205df03",
"metadata": {},
"outputs": [],
"source": [
"# Missing Address is not a big deal because the rest of the values are complete.\n",
"# But missing Building Class could be a problem.\n",
"\n",
"# The two offending rows also have Building Count = 0.\n",
"# How is that possible, since they have Energy Star scores?\n",
"\n",
"# In the next secion we may decide to drop those two rows."
]
},
{
"cell_type": "markdown",
"id": "4d539a8c",
"metadata": {},
"source": [
"## Part 2: Data Cleaning"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "614dbd9f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block int64\n",
"Lot int64\n",
"Building_Class object\n",
"Tax_Class int64\n",
"Building_Count int64\n",
"DOF_Gross_Square_Footage int64\n",
"Address object\n",
"BoroughName object\n",
"BBL int64\n",
"ENERGY STAR Score int64\n",
"LetterScore object\n",
"dtype: object"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Which columns are informative?\n",
"\n",
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6c58a084",
"metadata": {},
"outputs": [],
"source": [
"# Interesting for analysis:\n",
"\n",
"# DOF_Gross_Square_Footage\n",
"# ENERGY STAR Score\n",
"# LetterScore\n",
"\n",
"# Other columns are less interesting:\n",
"\n",
"# Building_Count is the number of buildings in one Block.\n",
"# Block can have more than one Lot, but Lot only has one Block.\n",
"# Block, Lot and BBL are identifiers assigned by the city.\n",
"\n",
"# A good visual reference is the Digital Tax Map put out by the NYC Department of Finance:\n",
"# http://gis.nyc.gov/taxmap/map.htm"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "14213bd2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Can any identifiers be used as an index?\n",
"\n",
"df['Block'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "1e1a5e9b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Lot'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "67b7f633",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['BBL'].is_unique"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "c4469ca8",
"metadata": {},
"outputs": [],
"source": [
"# Since their values are not unique, they cannot be used as an index."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "782190b5",
"metadata": {},
"outputs": [],
"source": [
"# Shall we rename or discard any columns from this dataset?\n",
"\n",
"# BBL and Tax Class could be eliminated. However, there are only 11 columns total, and since df.head() is easily readable on my monitor without scrolling horizontally (as you're doing now), I see no harm in keeping them."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "e085ba33",
"metadata": {},
"outputs": [],
"source": [
"# Rename columns containing whitespace or camelcase\n",
"\n",
"df.rename(columns = {\"BoroughName\": \"Borough_Name\",\n",
" \"ENERGY STAR Score\": \"Energy_Star_Score\",\n",
" \"LetterScore\": \"Letter_Score\"\n",
" }, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "c4a8ebb7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Building_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Building_Count</th>\n",
" <th>DOF_Gross_Square_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough_Name</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Star_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>Y4</td>\n",
" <td>0</td>\n",
" <td>124</td>\n",
" <td>2598091</td>\n",
" <td>920 GRESHAM ROAD</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000010010</td>\n",
" <td>1</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Building_Class Tax_Class Building_Count \\\n",
"0 1 10 Y4 0 124 \n",
"\n",
" DOF_Gross_Square_Footage Address Borough_Name BBL \\\n",
"0 2598091 920 GRESHAM ROAD MANHATTAN 1000010010 \n",
"\n",
" Energy_Star_Score Letter_Score \n",
"0 1 D "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "38de98e9",
"metadata": {},
"outputs": [],
"source": [
"# Unforseen consequence of renaming: now I have to scroll horizontally."
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "c0b5504f",
"metadata": {},
"outputs": [],
"source": [
"# Rename columns once more\n",
"\n",
"df.rename(columns = {\"DOF_Gross_Square_Footage\": \"Sq_Footage\",\n",
" \"Energy_Star_Score\": \"Energy_Score\",\n",
" \"Borough_Name\": \"Borough\",\n",
" \"Building_Class\": \"Bldg_Class\",\n",
" \"Building_Count\": \"Bldg_Count\"\n",
" }, inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "0d3cf300",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>Y4</td>\n",
" <td>0</td>\n",
" <td>124</td>\n",
" <td>2598091</td>\n",
" <td>920 GRESHAM ROAD</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1000010010</td>\n",
" <td>1</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage Address \\\n",
"0 1 10 Y4 0 124 2598091 920 GRESHAM ROAD \n",
"\n",
" Borough BBL Energy_Score Letter_Score \n",
"0 MANHATTAN 1000010010 1 D "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c1c2e027",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block 0\n",
"Lot 0\n",
"Bldg_Class 2\n",
"Tax_Class 0\n",
"Bldg_Count 0\n",
"Sq_Footage 0\n",
"Address 7\n",
"Borough 0\n",
"BBL 0\n",
"Energy_Score 0\n",
"Letter_Score 0\n",
"dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Repeat the search for missing data\n",
"\n",
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "5debf1d6",
"metadata": {},
"outputs": [],
"source": [
"# Ignore the 7 missing addresses, but drop the 2 rows with missing Building Class.\n",
"# Building Class is a feature that will be used in the df.groupby() function.\n",
"\n",
"df.dropna(subset = ['Bldg_Class'], inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "5d2eb339",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Block 0\n",
"Lot 0\n",
"Bldg_Class 0\n",
"Tax_Class 0\n",
"Bldg_Count 0\n",
"Sq_Footage 0\n",
"Address 7\n",
"Borough 0\n",
"BBL 0\n",
"Energy_Score 0\n",
"Letter_Score 0\n",
"dtype: int64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "632701c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search for unexpected data\n",
"\n",
"# df['Energy_Score'].min() # looks good\n",
"# df['Energy_Score'].max() # looks good\n",
"# df['Sq_Footage'].max() # looks good\n",
"df['Sq_Footage'].min()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "c1f3edc4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"Letter_Score \n",
"A 3 3 3 3 3 3 \n",
"B 1 1 1 1 1 1 \n",
"C 5 5 5 5 5 5 \n",
"D 6 6 6 6 6 6 \n",
"F 14 14 14 14 14 14 \n",
"\n",
" Address Borough BBL Energy_Score \n",
"Letter_Score \n",
"A 3 3 3 3 \n",
"B 1 1 1 1 \n",
"C 5 5 5 5 \n",
"D 6 6 6 6 \n",
"F 14 14 14 14 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# A building cannot have zero square feet of floorspace.\n",
"# What's going on?\n",
"\n",
"df[df['Sq_Footage'] == 0].groupby(['Letter_Score']).count()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "e27467ce",
"metadata": {},
"outputs": [],
"source": [
"# The ones with F can be explained:\n",
"# An F grade means that the building owner \"didnt submit required benchmarking information\",\n",
"# according to Local Law 95 of 2019. So it's not that the building has no square footage,\n",
"# but that the data was not submitted. Thus the failing grade.\n",
"\n",
"# We'll leave 0 square feet with F grade untouched.\n",
"\n",
"# For more information, see https://www1.nyc.gov/site/buildings/codes/benchmarking.page"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "b73e15d9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"Letter_Score \n",
"A 3 3 3 3 3 3 \n",
"B 1 1 1 1 1 1 \n",
"C 5 5 5 5 5 5 \n",
"D 6 6 6 6 6 6 \n",
"\n",
" Address Borough BBL Energy_Score \n",
"Letter_Score \n",
"A 3 3 3 3 \n",
"B 1 1 1 1 \n",
"C 5 5 5 5 \n",
"D 6 6 6 6 "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# What to do with the others?\n",
"\n",
"df[(df['Sq_Footage'] == 0) & (df['Letter_Score'] != 'F')].groupby(['Letter_Score']).count()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "47145374",
"metadata": {},
"outputs": [],
"source": [
"# 15 rows remain with 0 square feet of floorspace.\n",
"# Can we impute values from the mean square footage for each grade?\n",
"\n",
"# (There must be an elegant way to do this. What you see below is not.)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "2d643fd6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sq_Footage</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>111197.291071</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>133270.963702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>128833.575964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>108170.778312</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sq_Footage\n",
"Letter_Score \n",
"A 111197.291071\n",
"B 133270.963702\n",
"C 128833.575964\n",
"D 108170.778312"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# First, get averages\n",
"\n",
"subset0 = df[['Letter_Score', 'Sq_Footage']]\n",
"subset1 = subset0[(subset0['Letter_Score'] != 'F') & (subset0['Sq_Footage'] != 0)]\n",
"subset1.groupby(['Letter_Score']).mean()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "87c2ba5e",
"metadata": {},
"outputs": [],
"source": [
"# Assign variables, rounding to whole numbers\n",
"\n",
"mean_A = 111197\n",
"mean_B = 133271\n",
"mean_C = 128834\n",
"mean_D = 108171"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "27912675",
"metadata": {},
"outputs": [],
"source": [
"# Replace 0 values with mean_A, mean_B, etc.\n",
"\n",
"df.loc[(df['Letter_Score'] == 'A') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_A\n",
"df.loc[(df['Letter_Score'] == 'B') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_B\n",
"df.loc[(df['Letter_Score'] == 'C') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_C\n",
"df.loc[(df['Letter_Score'] == 'D') & (df['Sq_Footage'] == 0), 'Sq_Footage'] = mean_D"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "8124743f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Letter_Score</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"Letter_Score \n",
"F 14 14 14 14 14 14 \n",
"\n",
" Address Borough BBL Energy_Score \n",
"Letter_Score \n",
"F 14 14 14 14 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Now the only 0 values should be for F grades\n",
"\n",
"df[df['Sq_Footage'] == 0].groupby(['Letter_Score']).count()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "b83622b1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11319</th>\n",
" <td>149</td>\n",
" <td>7502</td>\n",
" <td>U7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>138 WILLOUGHBY STREET</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11611</th>\n",
" <td>165</td>\n",
" <td>7504</td>\n",
" <td>U7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>133271</td>\n",
" <td>35 HOYT STREET</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>75</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13351</th>\n",
" <td>5804</td>\n",
" <td>2</td>\n",
" <td>U6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>COLONIAL ROAD</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14570</th>\n",
" <td>5322</td>\n",
" <td>4</td>\n",
" <td>V1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>111197</td>\n",
" <td>23 OCEAN PARKWAY</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>100</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14668</th>\n",
" <td>5799</td>\n",
" <td>59</td>\n",
" <td>D9</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>38315</td>\n",
" <td>3641 JOHNSON AVENUE</td>\n",
" <td>BRONX</td>\n",
" <td>2057990059</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15726</th>\n",
" <td>4282</td>\n",
" <td>100</td>\n",
" <td>V1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25-70 REAR WHITESTONE EXPRESSWAY SR WEST</td>\n",
" <td>QUEENS</td>\n",
" <td>-2147483648</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"11319 149 7502 U7 0 0 0 \n",
"11611 165 7504 U7 0 0 133271 \n",
"13351 5804 2 U6 0 0 0 \n",
"14570 5322 4 V1 0 0 111197 \n",
"14668 5799 59 D9 0 0 38315 \n",
"15726 4282 100 V1 0 0 0 \n",
"\n",
" Address Borough BBL \\\n",
"11319 138 WILLOUGHBY STREET BROOKLYN -2147483648 \n",
"11611 35 HOYT STREET BROOKLYN -2147483648 \n",
"13351 COLONIAL ROAD BROOKLYN -2147483648 \n",
"14570 23 OCEAN PARKWAY BROOKLYN -2147483648 \n",
"14668 3641 JOHNSON AVENUE BRONX 2057990059 \n",
"15726 25-70 REAR WHITESTONE EXPRESSWAY SR WEST QUEENS -2147483648 \n",
"\n",
" Energy_Score Letter_Score \n",
"11319 0 F \n",
"11611 75 B \n",
"13351 0 F \n",
"14570 100 A \n",
"14668 0 F \n",
"15726 0 F "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Unexpected values, continued\n",
"\n",
"df[df['Bldg_Count'] == 0]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "01c231f3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11611</th>\n",
" <td>165</td>\n",
" <td>7504</td>\n",
" <td>U7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>133271</td>\n",
" <td>35 HOYT STREET</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>75</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14570</th>\n",
" <td>5322</td>\n",
" <td>4</td>\n",
" <td>V1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>111197</td>\n",
" <td>23 OCEAN PARKWAY</td>\n",
" <td>BROOKLYN</td>\n",
" <td>-2147483648</td>\n",
" <td>100</td>\n",
" <td>A</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"11611 165 7504 U7 0 0 133271 \n",
"14570 5322 4 V1 0 0 111197 \n",
"\n",
" Address Borough BBL Energy_Score Letter_Score \n",
"11611 35 HOYT STREET BROOKLYN -2147483648 75 B \n",
"14570 23 OCEAN PARKWAY BROOKLYN -2147483648 100 A "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# How can a block have zero buildings?\n",
"# Again, we'll leave the F grades as is.\n",
"\n",
"df[(df['Bldg_Count'] == 0) & (df['Letter_Score'] != 'F')]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "2bc61602",
"metadata": {},
"outputs": [],
"source": [
"# Have a peek at the Department of Finance Tax Map: http://gis.nyc.gov/taxmap/map.htm\n",
"\n",
"# Looks like Bldg_Count = 1 for both. However, rather than eyeballing it, let's just drop them."
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "396a0fdd",
"metadata": {},
"outputs": [],
"source": [
"# Drop two rows\n",
"\n",
"df.drop([11611, 14570], inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "4e874379",
"metadata": {},
"outputs": [],
"source": [
"# Finish cleaning\n",
"\n",
"# df['Tax_Class'].value_counts() # looks good\n",
"# df['Bldg_Class'].value_counts() # looks good"
]
},
{
"cell_type": "markdown",
"id": "d22ba85a",
"metadata": {},
"source": [
"## Part 3: Analysis"
]
},
{
"cell_type": "markdown",
"id": "1e5fdc73",
"metadata": {},
"source": [
"### What is the relationship between a building's size and its energy rating?"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "73f50d5c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Y4', 'T2', 'R0', 'O4', 'W1', 'O6', 'D5', 'D9', 'D0', 'O3', 'H3',\n",
" 'H2', 'D7', 'V1', 'D6', 'K6', 'D8', 'W8', 'O2', 'H1', 'D3', 'K4',\n",
" 'H9', 'K3', 'HB', 'RM', 'H5', 'O5', 'D4', 'HS', 'E9', 'D2', 'O7',\n",
" 'W5', 'U7', 'M1', 'I1', 'K2', 'Z1', 'W6', 'K9', 'Z3', 'H8', 'S4',\n",
" 'E1', 'C7', 'W2', 'C9', 'D1', 'C1', 'HR', 'O9', 'I9', 'K1', 'I6',\n",
" 'G1', 'N2', 'Y2', 'I7', 'M9', 'G2', 'I5', 'C4', 'E7', 'P9', 'W9',\n",
" 'P5', 'N9', 'S3', 'W3', 'J4', 'C6', 'M2', 'P7', 'W7', 'J3', 'H6',\n",
" 'P8', 'F9', 'G9', 'Y8', 'J8', 'F5', 'C5', 'N4', 'I3', 'P3', 'J6',\n",
" 'P2', 'W4', 'RC', 'I2', 'K5', 'J5', 'I4', 'M4', 'G8', 'J7', 'HH',\n",
" 'O8', 'M3', 'U0', 'O1', 'F1', 'F2', 'F4', 'H4', 'E2', 'Y1', 'Y6',\n",
" 'Z9', 'R2', 'Q6', 'K7', 'U6', 'RD', 'Y9', 'Q1', 'T9', 'V9', 'U9',\n",
" 'K8', 'U5', 'R4', 'G7', 'F8', 'J9', 'N3', 'P6', 'J2', 'GW', 'T1',\n",
" 'R3', 'C8', 'RS', 'Q2', 'V7', 'Q4', 'Y7'], dtype=object)"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# There are many building types\n",
"\n",
"df['Bldg_Class'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "5449a9dd",
"metadata": {},
"outputs": [],
"source": [
"# It wouldn't make sense to compare, say, residential with commercial buildings.\n",
"# For an apple to apples comparison, let's look at office buildings.\n",
"\n",
"# O1\tOFFICE ONLY - 1 STORY\n",
"# O2\tOFFICE ONLY 2 - 6 STORIES\n",
"# O3\tOFFICE ONLY 7 - 19 STORIES\n",
"# O4\tOFFICE ONLY WITH OR WITHOUT COMM - 20 STORIES OR MORE\n",
"# O5\tOFFICE WITH COMM - 1 TO 6 STORIES\n",
"# O6\tOFFICE WITH COMM 7 - 19 STORIES\n",
"# O7\tPROFESSIONAL BUILDINGS/STAND ALONE FUNERAL HOMES\n",
"# O8\tOFFICE WITH APARTMENTS ONLY (NO COMM)\n",
"# O9\tMISCELLANEOUS AND OLD STYLE BANK BLDGS\n",
"\n",
"# Building glossary: https://www1.nyc.gov/assets/finance/jump/hlpbldgcode.html"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "bc229011",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Block</th>\n",
" <th>Lot</th>\n",
" <th>Bldg_Class</th>\n",
" <th>Tax_Class</th>\n",
" <th>Bldg_Count</th>\n",
" <th>Sq_Footage</th>\n",
" <th>Address</th>\n",
" <th>Borough</th>\n",
" <th>BBL</th>\n",
" <th>Energy_Score</th>\n",
" <th>Letter_Score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1473</th>\n",
" <td>702</td>\n",
" <td>10</td>\n",
" <td>O4</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1835464</td>\n",
" <td>501 WEST 30 STREET</td>\n",
" <td>MANHATTAN</td>\n",
" <td>1007020010</td>\n",
" <td>58</td>\n",
" <td>C</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Block Lot Bldg_Class Tax_Class Bldg_Count Sq_Footage \\\n",
"1473 702 10 O4 4 1 1835464 \n",
"\n",
" Address Borough BBL Energy_Score Letter_Score \n",
"1473 501 WEST 30 STREET MANHATTAN 1007020010 58 C "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Context: 10 Hudson Yards. A new building with a poor energy rating.\n",
"\n",
"# It's one of the large glass and steel buildings that have recently cropped up in Manhattan (2016).\n",
"# Unfortunately, the dataset does not contain the newest behemoths to arise since then,\n",
"# like 30 Hudson Yards.\n",
"\n",
"df[(df['Block'] == 702) & (df['Lot'] == 10)]"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "a344e1a3",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQQUlEQVR4nO3dbbBdVX3H8e9PgvJgGaFcaAzQC50MiIwUjJaKtdboqKAE26GNUzqppaYdaUXrjAZ1in3hDJ2xPnRarREfIloUkUIqrRXj0/RFwfAwFQg0VDBEIrlqK0odAvrvi7NZvaYJOdzcc3Zyz/czkzl7r7PP2f81CefHWnufdVJVSJIE8KS+C5Ak7TsMBUlSYyhIkhpDQZLUGAqSpGZR3wXsjSOPPLKmp6f7LkOS9is33XTTd6tqalfP7dehMD09zcaNG/suQ5L2K0m+tbvnnD6SJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNfv1N5ol7Vum11zXy3nvvfTsXs67EDlSkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSc3IQiHJR5JsT3LbrLYjklyfZHP3ePis5y5OcneSu5K8dFR1SZJ2b5QjhY8BL9upbQ2woaqWAhu6fZKcDKwEntm95v1JDhhhbZKkXRhZKFTV14Dv79S8AljXba8Dzp3V/qmqeriq7gHuBp47qtokSbs27msKR1fVNoDu8aiufQlw36zjtnZt/0+S1Uk2Jtk4MzMz0mIladLsKxeas4u22tWBVbW2qpZV1bKpqakRlyVJk2XcofBAksUA3eP2rn0rcOys444B7h9zbZI08cYdCuuBVd32KuDaWe0rkzwlyfHAUuDGMdcmSRNvZD/HmeQK4IXAkUm2ApcAlwJXJrkA2AKcB1BVtye5ErgDeBS4sKp+MqraJEm7NrJQqKpX7+ap5bs5/p3AO0dVjyRpz/aVC82SpH2AoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktT0EgpJ3pjk9iS3JbkiyUFJjkhyfZLN3ePhfdQmSZNs7KGQZAnwemBZVZ0CHACsBNYAG6pqKbCh25ckjVFf00eLgIOTLAIOAe4HVgDruufXAef2U5okTa6xh0JVfRt4F7AF2Ab8oKq+ABxdVdu6Y7YBR+3q9UlWJ9mYZOPMzMy4ypakidDH9NHhDEYFxwNPBw5Ncv6wr6+qtVW1rKqWTU1NjapMSZpIfUwfvRi4p6pmquoR4GrgecADSRYDdI/be6hNkiZaH6GwBTgjySFJAiwHNgHrgVXdMauAa3uoTZIm2qJxn7CqbkhyFXAz8ChwC7AWeCpwZZILGATHeeOuTZIm3dhDAaCqLgEu2an5YQajBklST/xGsySpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1PRyS6qk0Zlec13fJWg/5khBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1AwVCklOGXUhkqT+DTtS+LskNyZ5XZKnjbIgSVJ/hgqFqno+8LvAscDGJH+f5CUjrUySNHZDX1Ooqs3A24G3AL8O/HWSO5P85qiKkySN17DXFJ6V5D3AJuBFwCur6hnd9ntGWJ8kaYyGXTr7b4APAW+tqh8/1lhV9yd5+0gqkySN3bChcBbw46r6CUCSJwEHVdX/VNXlI6tOkjRWw15T+CJw8Kz9Q7o2SdICMmwoHFRVP3psp9s+ZDQlSZL6MmwoPJTk9Md2kjwb+PHjHC9J2g8Ne03hDcBnktzf7S8GfmckFUmSejNUKFTV15OcBJwIBLizqh4ZaWWSpLEbdqQA8BxgunvNaUmoqo+PpCpJUi+GCoUklwO/BNwK/KRrLsBQkKQFZNiRwjLg5Kqq+Thpt6jeZcApDMLlD4C7gE8zGI3cC/x2Vf3XfJxPkjScYe8+ug34hXk87/uAz1fVScCpDJbPWANsqKqlwIZuX5I0RsOOFI4E7khyI/DwY41Vdc4TPWGSw4AXAL/fvccOYEeSFcALu8PWAV9hsPieJGlMhg2Fd8zjOU8AZoCPJjkVuAm4CDi6qrYBVNW2JEft6sVJVgOrAY477rh5LEvS/mp6zXW9nPfeS8/u5byjNOzvKXyVwTz/gd3214Gb53jORcDpwAeq6jTgIZ7AVFFVra2qZVW1bGpqao4lSJJ2Zdils18LXAV8sGtaAlwzx3NuBbZW1Q3d/lUMQuKBJIu78y0Gts/x/SVJczTsheYLgTOBB6H94M4up3f2pKq+A9yX5MSuaTlwB7AeWNW1rQKuncv7S5LmbthrCg9X1Y4kACRZxOBW0rn6U+CTSZ4MfBN4DYOAujLJBcAW4Ly9eH9J0hwMGwpfTfJW4ODut5lfB/zjXE9aVbcy+O7DzpbP9T0lSXtv2OmjNQzuGPoG8EfAPzH4vWZJ0gIy7IJ4P2Xwc5wfGm05kqQ+Dbv20T3s4hpCVZ0w7xVJknrzRNY+esxBDC4CHzH/5UiS+jTsl9e+N+vPt6vqvcCLRluaJGnchp0+On3W7pMYjBx+biQVSZJ6M+z00V/N2n6Ubmnrea9GktSrYe8++o1RFyJJ6t+w00d/9njPV9W756ccSVKfnsjdR89hsD4RwCuBrwH3jaIoSVI/nsiP7JxeVT8ESPIO4DNV9YejKkySNH7DLnNxHLBj1v4OBr+lLElaQIYdKVwO3JjkHxh8s/lVwMdHVpUkqRfD3n30ziT/DPxa1/SaqrpldGVJkvow7PQRwCHAg1X1PmBrkuNHVJMkqSfD/hznJcBbgIu7pgOBT4yqKElSP4YdKbwKOAd4CKCq7sdlLiRpwRn2QvOOqqokBZDk0BHWJC0I02uu67sE6QkbdqRwZZIPAk9L8lrgi/iDO5K04OxxpJAkwKeBk4AHgROBP6+q60dcmyRpzPYYCt200TVV9WzAIJCkBWzY6aN/S/KckVYiSerdsBeafwP44yT3MrgDKQwGEc8aVWGSpPF73FBIclxVbQFePqZ6JEk92tNI4RoGq6N+K8lnq+q3xlCTJKkne7qmkFnbJ4yyEElS//YUCrWbbUnSArSn6aNTkzzIYMRwcLcN/3eh+bCRVidJGqvHDYWqOmBchUiS+vdEls6eV0kOSHJLks91+0ckuT7J5u7x8L5qk6RJ1VsoABcBm2btrwE2VNVSYEO3L0kao15CIckxwNnAZbOaVwDruu11wLljLkuSJl5fI4X3Am8Gfjqr7eiq2gbQPR61qxcmWZ1kY5KNMzMzIy9UkibJ2EMhySuA7VV101xeX1Vrq2pZVS2bmpqa5+okabINu/bRfDoTOCfJWcBBwGFJPgE8kGRxVW1LshjY3kNtkjTRxj5SqKqLq+qYqpoGVgJfqqrzgfXAqu6wVcC1465NkiZdn3cf7exS4CVJNgMv6fYlSWPUx/RRU1VfAb7SbX8PWN5nPZI06falkYIkqWeGgiSpMRQkSY2hIElqDAVJUmMoSJKaXm9JlUZtes11fZcg7VccKUiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEnN2EMhybFJvpxkU5Lbk1zUtR+R5Pokm7vHw8ddmyRNuj5GCo8Cb6qqZwBnABcmORlYA2yoqqXAhm5fkjRGYw+FqtpWVTd32z8ENgFLgBXAuu6wdcC5465NkiZdr9cUkkwDpwE3AEdX1TYYBAdw1G5eszrJxiQbZ2ZmxlarJE2C3kIhyVOBzwJvqKoHh31dVa2tqmVVtWxqamp0BUrSBOolFJIcyCAQPllVV3fNDyRZ3D2/GNjeR22SNMn6uPsowIeBTVX17llPrQdWddurgGvHXZskTbpFPZzzTOD3gG8kubVreytwKXBlkguALcB5PdQmSRNt7KFQVf8KZDdPLx9nLZKkn+U3miVJjaEgSWoMBUlSYyhIkpo+7j7SBJpec13fJUgagiMFSVJjKEiSGkNBktQYCpKkxlCQJDXefSRJc9TnXXX3Xnr2SN7XkYIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSc1EL3PR11fUR/X1dEnaW44UJEmNoSBJagwFSVJjKEiSGkNBktRM9N1Hk6bPHwSRtH9wpCBJahwp9MD/Y5e0r9rnRgpJXpbkriR3J1nTdz2SNEn2qVBIcgDwt8DLgZOBVyc5ud+qJGly7FOhADwXuLuqvllVO4BPASt6rkmSJsa+dk1hCXDfrP2twK/MPiDJamB1t/ujJHftxfmOBL67F6/f30xaf8E+T4qJ63P+cq/6/Iu7e2JfC4Xsoq1+ZqdqLbB2Xk6WbKyqZfPxXvuDSesv2OdJYZ/nz742fbQVOHbW/jHA/T3VIkkTZ18Lha8DS5Mcn+TJwEpgfc81SdLE2Kemj6rq0SR/AvwLcADwkaq6fYSnnJdpqP3IpPUX7POksM/zJFW156MkSRNhX5s+kiT1yFCQJDUTGQqTsJRGkmOTfDnJpiS3J7moaz8iyfVJNnePh/dd63xKckCSW5J8rttf0P0FSPK0JFclubP7+/7VhdzvJG/s/k3fluSKJActtP4m+UiS7Ulum9W22z4mubj7PLsryUv35twTFwoTtJTGo8CbquoZwBnAhV0/1wAbqmopsKHbX0guAjbN2l/o/QV4H/D5qjoJOJVB/xdkv5MsAV4PLKuqUxjckLKShdffjwEv26ltl33s/rteCTyze837u8+5OZm4UGBCltKoqm1VdXO3/UMGHxRLGPR1XXfYOuDcXgocgSTHAGcDl81qXrD9BUhyGPAC4MMAVbWjqv6bhd3vRcDBSRYBhzD4LtOC6m9VfQ34/k7Nu+vjCuBTVfVwVd0D3M3gc25OJjEUdrWUxpKeahmLJNPAacANwNFVtQ0GwQEc1WNp8+29wJuBn85qW8j9BTgBmAE+2k2bXZbkUBZov6vq28C7gC3ANuAHVfUFFmh/d7K7Ps7rZ9okhsIel9JYSJI8Ffgs8IaqerDvekYlySuA7VV1U9+1jNki4HTgA1V1GvAQ+//UyW518+grgOOBpwOHJjm/36p6N6+faZMYChOzlEaSAxkEwier6uqu+YEki7vnFwPb+6pvnp0JnJPkXgZTgi9K8gkWbn8fsxXYWlU3dPtXMQiJhdrvFwP3VNVMVT0CXA08j4Xb39l218d5/UybxFCYiKU0koTBPPOmqnr3rKfWA6u67VXAteOubRSq6uKqOqaqphn8nX6pqs5ngfb3MVX1HeC+JCd2TcuBO1i4/d4CnJHkkO7f+HIG18sWan9n210f1wMrkzwlyfHAUuDGOZ+lqibuD3AW8B/AfwJv67ueEfXx+QyGkP8O3Nr9OQv4eQZ3LmzuHo/ou9YR9P2FwOe67Uno7y8DG7u/62uAwxdyv4G/AO4EbgMuB56y0PoLXMHgmskjDEYCFzxeH4G3dZ9ndwEv35tzu8yFJKmZxOkjSdJuGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVLzv/FkPTLimwUdAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# That building belongs to class O4.\n",
"# What's the distribution of scores?\n",
"\n",
"office = df[df['Bldg_Class'] == 'O4']\n",
"office['Energy_Score'].plot(kind = 'hist')\n",
"plt.savefig('office.png')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "1dabd257",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Letter_Score\n",
"A 33\n",
"B 147\n",
"C 102\n",
"D 75\n",
"F 5\n",
"Name: Block, dtype: int64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Distribution of grades\n",
"\n",
"office.groupby(['Letter_Score'])['Block'].count()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "9a9bcc23",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x648 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Square Footage versus Energy Score\n",
"\n",
"office.plot(kind = 'scatter', x = 'Sq_Footage', y = 'Energy_Score', figsize = (12, 9))\n",
"plt.savefig('scatter.png')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "a5c7986c",
"metadata": {},
"outputs": [],
"source": [
"# I can't tell from the plot whether there's any relationship between those two variables."
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "3b33f87e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:ylabel='Frequency'>"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD4CAYAAADrRI2NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQCUlEQVR4nO3df6xkZX3H8feHH8oPNUK50C2wXrQEpUZ+eKW0WKugLT+qQBtbSLWblro2wRRakrqiqRrThCYKamqti1C2qFgUBArWilsrMbHgRakuLhSrKwJb9qq1oDUg+O0fc7a97u7dnV3umeHO834lkznnmTlzvk/u7uee+8wzz6SqkCS1Y7dxFyBJGi2DX5IaY/BLUmMMfklqjMEvSY3ZY9wFDOOAAw6o6enpcZchSUvK7bff/p2qmtqyfUkE//T0NLOzs+MuQ5KWlCTf2la7Qz2S1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMb0Ff5K9ktyW5N+S3Jnk7V37/kluTnJPd79fXzVIkrbW5xX/I8CJVXUUcDRwcpLjgVXA2qo6HFjb7UuSRqS34K+BH3S7e3a3Ak4H1nTta4Az+qpBkrS1Xj+5m2R34Hbg54H3VdWtSQ6qqo0AVbUxyYELHLsSWAmwfPnyXa5hetVNu3zsE7XhotPGdm5JWkivb+5W1eNVdTRwCHBckufvxLGrq2qmqmamprZaakKStItGMqunqr4P/AtwMvBgkmUA3f2mUdQgSRroc1bPVJJndtt7Ay8H7gJuAFZ0T1sBXN9XDZKkrfU5xr8MWNON8+8GXF1VNyb5AnB1knOAe4FX91iDJGkLvQV/VX0FOGYb7d8FTurrvJKk7fOTu5LUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmN6CP8mhST6bZH2SO5Oc17W/Lcn9Se7obqf2VYMkaWt79PjajwEXVNWXkjwduD3Jzd1jl1TVO3s8tyRpAb0Ff1VtBDZ22w8nWQ8c3Nf5JEnDGckYf5Jp4Bjg1q7pDUm+kuTyJPstcMzKJLNJZufm5kZRpiQ1offgT/I04Brg/Kp6CHg/8BzgaAZ/EbxrW8dV1eqqmqmqmampqb7LlKRm9Br8SfZkEPofrqprAarqwap6vKp+AlwKHNdnDZKkn9bnrJ4AlwHrq+riee3L5j3tTGBdXzVIkrbW56yeE4DXAl9NckfXdiFwdpKjgQI2AK/vsQZJ0hb6nNXzeSDbeOiTfZ1TkrRjfnJXkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSY3oL/iSHJvlskvVJ7kxyXte+f5Kbk9zT3e/XVw2SpK31ecX/GHBBVT0POB44N8mRwCpgbVUdDqzt9iVJI9Jb8FfVxqr6Urf9MLAeOBg4HVjTPW0NcEZfNUiStjaSMf4k08AxwK3AQVW1EQa/HIADFzhmZZLZJLNzc3OjKFOSmtB78Cd5GnANcH5VPTTscVW1uqpmqmpmamqqvwIlqTG9Bn+SPRmE/oer6tqu+cEky7rHlwGb+qxBkvTT+pzVE+AyYH1VXTzvoRuAFd32CuD6vmqQJG1tjx5f+wTgtcBXk9zRtV0IXARcneQc4F7g1T3WIEnaQm/BX1WfB7LAwyf1dV5J0vYNNdST5Pl9FyJJGo1hr/j/JslTgCuAj1TV93urSNKT2vSqm8ZdwshtuOi0cZewqIa64q+qFwO/CxwKzCb5SJJX9FqZJKkXQ8/qqap7gLcAbwR+FXhvkruS/GZfxUmSFt+wY/wvSHIJg2UXTgRe2a3BcyJwSY/1SZIW2bBj/H8FXApcWFU/2txYVQ8keUsvlUmSejFs8J8K/KiqHgdIshuwV1X9T1Vd2Vt1kqRFN+wY/2eAveft79O1SZKWmGGDf6+q+sHmnW57n35KkiT1adjg/2GSYzfvJHkh8KPtPF+S9CQ17Bj/+cDHkjzQ7S8DfqeXiiRJvRoq+Kvqi0meCxzBYP2du6rqx71WJknqxc4s0vYiYLo75pgkVNXf9VKVJKk3QwV/kiuB5wB3AI93zQUY/JK0xAx7xT8DHFlV1WcxkqT+DTurZx3ws30WIkkajWGv+A8AvpbkNuCRzY1V9apeqpIk9WbY4H9bn0VIkkZn2Omcn0vyLODwqvpMkn2A3fstTZLUh2GXZX4d8HHgA13TwcB1PdUkSerRsG/ungucADwE//elLAf2VZQkqT/DBv8jVfXo5p0kezCYxy9JWmKGDf7PJbkQ2Lv7rt2PAf/QX1mSpL4MG/yrgDngq8DrgU8y+P5dSdISM+ysnp8w+OrFS/stR5LUt2Fn9XwzyTe2vO3gmMuTbEqybl7b25Lcn+SO7nbqE+2AJGnn7MxaPZvtBbwa2H8Hx1zB4Evat1zI7ZKqeueQ55UkLbKhrvir6rvzbvdX1buBE3dwzC3A9xahRknSIhp2WeZj5+3uxuAvgKfv4jnfkOT3gFnggqr6rwXOuRJYCbB8+fJdPJUkaUvDDvW8a972Y8AG4Ld34XzvB97B4DMA7+he9w+29cSqWg2sBpiZmfEzA5K0SIad1fOyxThZVT24eTvJpcCNi/G6kqThDTvU86fbe7yqLh7ydZZV1cZu90wG6/xLkkZoZ2b1vAi4odt/JXAL8O2FDkhyFfBS4IAk9wFvBV6a5GgGQz0bGHwYTJI0QjvzRSzHVtXDMJiPD3ysqv5woQOq6uxtNF+20xVKkhbVsEs2LAcenbf/KDC96NVIkno37BX/lcBtST7BYJjmTLb+YJYkTaTpVTeN7dwbLjpt0V9z2Fk9f5HkH4Ff6Zp+v6q+vOjVSJJ6N+xQD8A+wENV9R7gviSH9VSTJKlHwy7S9lbgjcCbuqY9gQ/1VZQkqT/DXvGfCbwK+CFAVT3Ari/ZIEkao2GD/9GqKrqvW0yyb38lSZL6NGzwX53kA8Azk7wO+Ax+KYskLUk7nNWTJMDfA88FHgKOAP68qm7uuTZJ2zHOKYZa2nYY/FVVSa6rqhcChr0kLXHDDvX8a5IX9VqJJGkkhv3k7suAP0qygcHMnjD4Y+AFfRUmSerHdoM/yfKquhc4ZUT1SJJ6tqMr/usYrMr5rSTXVNVvjaAmSVKPdjTGn3nbz+6zEEnSaOzoir8W2JaEUyq1NO0o+I9K8hCDK/+9u234/zd3n9FrdZKkRbfd4K+q3UdViCRpNHZmWWZJ0gQw+CWpMQa/JDXG4Jekxgy7ZIP0pOa0Sml4XvFLUmMMfklqjMEvSY3pLfiTXJ5kU5J189r2T3Jzknu6+/36Or8kadv6vOK/Ajh5i7ZVwNqqOhxY2+1Lkkaot+CvqluA723RfDqwptteA5zR1/klSds26jH+g6pqI0B3f+BCT0yyMslsktm5ubmRFShJk+5J++ZuVa2uqpmqmpmamhp3OZI0MUYd/A8mWQbQ3W8a8fklqXmjDv4bgBXd9grg+hGfX5Ka1+d0zquALwBHJLkvyTnARcArktwDvKLblySNUG9r9VTV2Qs8dFJf55Qk7diT9s1dSVI/DH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TG9LYss9ozveqmcZcgaQhe8UtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqzFiWbEiyAXgYeBx4rKpmxlGHJLVonGv1vKyqvjPG80tSkxzqkaTGjCv4C/h0ktuTrNzWE5KsTDKbZHZubm7E5UnS5BpX8J9QVccCpwDnJnnJlk+oqtVVNVNVM1NTU6OvUJIm1FiCv6oe6O43AZ8AjhtHHZLUopEHf5J9kzx98zbwa8C6UdchSa0ax6yeg4BPJNl8/o9U1afGUIckNWnkwV9V3wCOGvV5JUkDTueUpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNWac6/GrJ9Orbhp3CZKexLzil6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY1xOmePnFYp6cnIK35JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjRlL8Cc5OcndSb6eZNU4apCkVo08+JPsDrwPOAU4Ejg7yZGjrkOSWjWOK/7jgK9X1Teq6lHgo8DpY6hDkpo0jtU5Dwa+PW//PuAXt3xSkpXAym73B0nu3sXzHQB8ZxePXarscxvscwPyl0+oz8/aVuM4gj/baKutGqpWA6uf8MmS2aqaeaKvs5TY5zbY5zb00edxDPXcBxw6b/8Q4IEx1CFJTRpH8H8RODzJYUmeApwF3DCGOiSpSSMf6qmqx5K8AfgnYHfg8qq6s8dTPuHhoiXIPrfBPrdh0fucqq2G1yVJE8xP7kpSYwx+SWrMRAf/pC8NkeTQJJ9Nsj7JnUnO69r3T3Jzknu6+/3GXetiS7J7ki8nubHbn+g+J3lmko8nuav7ef9SA33+k+7f9bokVyXZa9L6nOTyJJuSrJvXtmAfk7ypy7O7k/z6rp53YoO/kaUhHgMuqKrnAccD53Z9XAWsrarDgbXd/qQ5D1g/b3/S+/we4FNV9VzgKAZ9n9g+JzkY+GNgpqqez2AiyFlMXp+vAE7eom2bfez+b58F/EJ3zF93ObfTJjb4aWBpiKraWFVf6rYfZhAGBzPo55ruaWuAM8ZSYE+SHAKcBnxwXvPE9jnJM4CXAJcBVNWjVfV9JrjPnT2AvZPsAezD4PM+E9XnqroF+N4WzQv18XTgo1X1SFV9E/g6g5zbaZMc/NtaGuLgMdXSuyTTwDHArcBBVbURBr8cgAPHWFof3g38GfCTeW2T3OdnA3PA33bDWx9Msi8T3Oequh94J3AvsBH476r6NBPc53kW6uOiZdokB/9QS0NMgiRPA64Bzq+qh8ZdT5+S/AawqapuH3ctI7QHcCzw/qo6BvghS3+IY7u6ce3TgcOAnwP2TfKa8VY1douWaZMc/E0sDZFkTwah/+GqurZrfjDJsu7xZcCmcdXXgxOAVyXZwGD47sQkH2Ky+3wfcF9V3drtf5zBL4JJ7vPLgW9W1VxV/Ri4FvhlJrvPmy3Ux0XLtEkO/olfGiJJGIz7rq+qi+c9dAOwotteAVw/6tr6UlVvqqpDqmqawc/0n6vqNUx2n/8T+HaSI7qmk4CvMcF9ZjDEc3ySfbp/5ycxeA9rkvu82UJ9vAE4K8lTkxwGHA7ctktnqKqJvQGnAv8O/Afw5nHX00P/XszgT72vAHd0t1OBn2EwG+Ce7n7/cdfaU/9fCtzYbU90n4GjgdnuZ30dsF8DfX47cBewDrgSeOqk9Rm4isF7GD9mcEV/zvb6CLy5y7O7gVN29bwu2SBJjZnkoR5J0jYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4Jakx/wsl+yTqJaJfugAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# How about smaller office buildings?\n",
"\n",
"office_small = df[df['Bldg_Class'] == 'O2']\n",
"office_small['Energy_Score'].plot(kind = 'hist')"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "41db8282",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Letter_Score\n",
"A 21\n",
"B 31\n",
"C 17\n",
"D 38\n",
"F 23\n",
"Name: Block, dtype: int64"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Distribution of grades\n",
"\n",
"office_small.groupby(['Letter_Score'])['Block'].count()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "490d052a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Sq_Footage', ylabel='Energy_Score'>"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Square Footage versus Energy Score\n",
"\n",
"office_small.plot(kind = 'scatter', x = 'Sq_Footage', y = 'Energy_Score', figsize = (20, 10))"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "a75608c7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD4CAYAAADrRI2NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQEUlEQVR4nO3df7BcZX3H8feHH5Uf6gDlQlMgvWgzKHXkh1dKi7UK2vKjCrRjC1NtpqXGTnEKLTM1olN1nM7QGQXt1FqDUFNULApCitaKqZVxxoKJUg0GitWIgZRErQ1aBwS//WNP2muSm2zCPbvcfd6vmZ0959k99/k+k+STc5999pxUFZKkduwz7gIkSaNl8EtSYwx+SWqMwS9JjTH4Jakx+427gGEcfvjhNT09Pe4yJGlBWbt27beqamr79gUR/NPT06xZs2bcZUjSgpLkGztrd6pHkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNaa34E9yQJI7k/xbkruTvKVrPyzJbUnu654P7asGSdKO+jzjfwQ4vapOAE4EzkxyKrAcWF1VS4DV3b4kaUR6C/4a+F63u3/3KOBcYGXXvhI4r68aJEk76vWbu0n2BdYCPwu8q6ruSHJkVW0CqKpNSY6Y49hlwDKAxYsX73UN08s/ttfHPlEbrjhnbH1L0lx6/XC3qh6vqhOBo4FTkjxnD45dUVUzVTUzNbXDpSYkSXtpJKt6quq7wL8AZwIPJVkE0D1vHkUNkqSBPlf1TCU5pNs+EHgJcA+wCljavW0pcEtfNUiSdtTnHP8iYGU3z78PcENV3Zrkc8ANSS4C7gde0WMNkqTt9Bb8VfUl4KSdtH8bOKOvfiVJu+Y3dyWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMb0Ff5Jjknw6yfokdye5pGt/c5IHktzVPc7uqwZJ0o726/FnPwZcVlVfSPI0YG2S27rXrqqqt/XYtyRpDr0Ff1VtAjZ12w8nWQ8c1Vd/kqThjGSOP8k0cBJwR9f02iRfSnJtkkPnOGZZkjVJ1mzZsmUUZUpSE3oP/iRPBW4ELq2qrcC7gWcCJzL4jeDtOzuuqlZU1UxVzUxNTfVdpiQ1o9fgT7I/g9D/QFXdBFBVD1XV41X1I+Bq4JQ+a5Ak/bg+V/UEuAZYX1VXzmpfNOtt5wPr+qpBkrSjPlf1nAa8Cvhykru6tsuBC5OcCBSwAXhNjzVIkrbT56qezwLZyUsf76tPSdLu+c1dSWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9Jjekt+JMck+TTSdYnuTvJJV37YUluS3Jf93xoXzVIknbU5xn/Y8BlVfVs4FTg4iTHA8uB1VW1BFjd7UuSRqS34K+qTVX1hW77YWA9cBRwLrCye9tK4Ly+apAk7Wgkc/xJpoGTgDuAI6tqEwz+cwCOmOOYZUnWJFmzZcuWUZQpSU3oPfiTPBW4Ebi0qrYOe1xVraiqmaqamZqa6q9ASWpMr8GfZH8Gof+Bqrqpa34oyaLu9UXA5j5rkCT9uD5X9QS4BlhfVVfOemkVsLTbXgrc0lcNkqQd7dfjzz4NeBXw5SR3dW2XA1cANyS5CLgfeEWPNUiSttNb8FfVZ4HM8fIZffUrSdq1oaZ6kjyn70IkSaMx7Bz/3yS5M8kfJjmkz4IkSf0aKvir6gXAbwPHAGuSfDDJS3utTJLUi6FX9VTVfcAbgdcBvwz8ZZJ7kvx6X8VJkubfsHP8z01yFYPLLpwOvKy7Bs/pwFU91idJmmfDrur5K+Bq4PKq+sG2xqp6MMkbe6lMktSLYYP/bOAHVfU4QJJ9gAOq6n+q6rreqpMkzbth5/g/BRw4a/+grk2StMAMG/wHVNX3tu102wf1U5IkqU/DBv/3k5y8bSfJ84Af7OL9kqQnqWHn+C8FPpzkwW5/EfBbvVQkSerVUMFfVZ9P8izgOAbX37mnqn7Ya2WSpF7syUXang9Md8eclISq+rteqpIk9Wao4E9yHfBM4C7g8a65AINfkhaYYc/4Z4Djq6r6LEaS1L9hV/WsA36qz0IkSaMx7Bn/4cBXktwJPLKtsape3ktVkqTeDBv8b+6zCEnS6Ay7nPMzSX4GWFJVn0pyELBvv6VJkvow7GWZXw18BHhP13QUcHNPNUmSejTsh7sXA6cBW+H/bspyRF9FSZL6M2zwP1JVj27bSbIfg3X8kqQFZtjg/0ySy4EDu3vtfhj4h/7KkiT1ZdjgXw5sAb4MvAb4OIP770qSFphhV/X8iMGtF6/utxxJUt+GXdXz9SRf2/6xm2OuTbI5ybpZbW9O8kCSu7rH2U90AJKkPbMn1+rZ5gDgFcBhuznmfQxu0r79hdyuqqq3DdmvJGmeDXXGX1XfnvV4oKreAZy+m2NuB74zDzVKkubRsJdlPnnW7j4MfgN42l72+dokvwOsAS6rqv+ao89lwDKAxYsX72VX4zW9/GNj6XfDFeeMpV9JC8OwUz1vn7X9GLAB+M296O/dwFsZfAfgrd3P/b2dvbGqVgArAGZmZvzOgCTNk2FX9bx4Pjqrqoe2bSe5Grh1Pn6uJGl4w071/MmuXq+qK4f8OYuqalO3ez6D6/xLkkZoT1b1PB9Y1e2/DLgd+OZcByS5HngRcHiSjcCbgBclOZHBVM8GBl8GkySN0J7ciOXkqnoYBuvxgQ9X1e/PdUBVXbiT5mv2uEJJ0rwa9pINi4FHZ+0/CkzPezWSpN4Ne8Z/HXBnko8ymKY5nx2/mKUnCZeRStqVYVf1/HmSfwR+qWv63ar6Yn9lSZL6MuxUD8BBwNaqeiewMcmxPdUkSerRsBdpexPwOuD1XdP+wPv7KkqS1J9hz/jPB14OfB+gqh5k7y/ZIEkao2GD/9GqKrrbLSY5uL+SJEl9Gjb4b0jyHuCQJK8GPoU3ZZGkBWm3q3qSBPh74FnAVuA44M+q6raea5Mk9WC3wV9VleTmqnoeYNhL0gI37FTPvyZ5fq+VSJJGYthv7r4Y+IMkGxis7AmDXwae21dhkqR+7DL4kyyuqvuBs0ZUjySpZ7s747+ZwVU5v5Hkxqr6jRHUJEnq0e7m+DNr+xl9FiJJGo3dBX/NsS1JWqB2N9VzQpKtDM78D+y24f8/3H16r9VJkubdLoO/qvYdVSGSpNHYk8syS5ImgMEvSY0x+CWpMQa/JDVm2Es2SLs1rpu8gzd6l/aEZ/yS1BiDX5IaY/BLUmN6C/4k1ybZnGTdrLbDktyW5L7u+dC++pck7VyfZ/zvA87crm05sLqqlgCru31J0gj1FvxVdTvwne2azwVWdtsrgfP66l+StHOjXs55ZFVtAqiqTUmOmOuNSZYBywAWL148ovK0UI1rKanLSLUQPWk/3K2qFVU1U1UzU1NT4y5HkibGqIP/oSSLALrnzSPuX5KaN+rgXwUs7baXAreMuH9Jal6fyzmvBz4HHJdkY5KLgCuAlya5D3hpty9JGqHePtytqgvneOmMvvqUJO3ek/bDXUlSPwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5Ia483WpSfAG8xrIfKMX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmLHciCXJBuBh4HHgsaqaGUcdktSicd6B68VV9a0x9i9JTXKqR5IaM67gL+CTSdYmWbazNyRZlmRNkjVbtmwZcXmSNLnGFfynVdXJwFnAxUleuP0bqmpFVc1U1czU1NToK5SkCTWW4K+qB7vnzcBHgVPGUYcktWjkwZ/k4CRP27YN/AqwbtR1SFKrxrGq50jgo0m29f/BqvrEGOqQpCaNPPir6mvACaPuV5I04HJOSWrMOL/AJUkLwvTyj42t7w1XnDPvP9MzfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYl3NKC9S4lhj2sbxwWONcVjlJPOOXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjXE5p6Q94pLKhc8zfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNGUvwJzkzyb1Jvppk+ThqkKRWjTz4k+wLvAs4CzgeuDDJ8aOuQ5JaNY4z/lOAr1bV16rqUeBDwLljqEOSmjSOq3MeBXxz1v5G4Oe3f1OSZcCybvd7Se7dy/4OB761l8cuVI65DY65AfmLJzTmn9lZ4ziCPztpqx0aqlYAK55wZ8maqpp5oj9nIXHMbXDMbehjzOOY6tkIHDNr/2jgwTHUIUlNGkfwfx5YkuTYJD8BXACsGkMdktSkkU/1VNVjSV4L/BOwL3BtVd3dY5dPeLpoAXLMbXDMbZj3Madqh+l1SdIE85u7ktQYg1+SGjPRwT/pl4ZIckySTydZn+TuJJd07YcluS3Jfd3zoeOudb4l2TfJF5Pc2u1P9JiTHJLkI0nu6f68f6GBMf9x9/d6XZLrkxwwaWNOcm2SzUnWzWqbc4xJXt/l2b1JfnVv+53Y4G/k0hCPAZdV1bOBU4GLuzEuB1ZX1RJgdbc/aS4B1s/an/QxvxP4RFU9CziBwdgndsxJjgL+CJipqucwWAhyAZM35vcBZ27XttMxdv+2LwB+rjvmr7uc22MTG/w0cGmIqtpUVV/oth9mEAZHMRjnyu5tK4HzxlJgT5IcDZwDvHdW88SOOcnTgRcC1wBU1aNV9V0meMyd/YADk+wHHMTg+z4TNeaquh34znbNc43xXOBDVfVIVX0d+CqDnNtjkxz8O7s0xFFjqqV3SaaBk4A7gCOrahMM/nMAjhhjaX14B/CnwI9mtU3ymJ8BbAH+tpveem+Sg5ngMVfVA8DbgPuBTcB/V9UnmeAxzzLXGOct0yY5+Ie6NMQkSPJU4Ebg0qraOu56+pTk14DNVbV23LWM0H7AycC7q+ok4Pss/CmOXermtc8FjgV+Gjg4ySvHW9XYzVumTXLwN3FpiCT7Mwj9D1TVTV3zQ0kWda8vAjaPq74enAa8PMkGBtN3pyd5P5M95o3Axqq6o9v/CIP/CCZ5zC8Bvl5VW6rqh8BNwC8y2WPeZq4xzlumTXLwT/ylIZKEwbzv+qq6ctZLq4Cl3fZS4JZR19aXqnp9VR1dVdMM/kz/uapeyWSP+T+BbyY5rms6A/gKEzxmBlM8pyY5qPt7fgaDz7AmeczbzDXGVcAFSZ6S5FhgCXDnXvVQVRP7AM4G/h34D+AN466nh/G9gMGvel8C7uoeZwM/yWA1wH3d82HjrrWn8b8IuLXbnugxAycCa7o/65uBQxsY81uAe4B1wHXAUyZtzMD1DD7D+CGDM/qLdjVG4A1dnt0LnLW3/XrJBklqzCRP9UiSdsLgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY35X+VnJyvU+3dKAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Something different: hotels\n",
"\n",
"hotels = df[df['Bldg_Class'] == 'H2']\n",
"hotels['Energy_Score'].plot(kind = 'hist')\n",
"plt.savefig('hotels.png')"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "a6c9fbd4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Letter_Score\n",
"A 6\n",
"B 3\n",
"C 9\n",
"D 64\n",
"F 18\n",
"Name: Block, dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Distribution of grades\n",
"\n",
"hotels.groupby(['Letter_Score'])['Block'].count()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "5d0ea74d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Sq_Footage', ylabel='Energy_Score'>"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Square Footage versus Energy Score\n",
"\n",
"hotels.plot(kind = 'scatter', x = 'Sq_Footage', y = 'Energy_Score', figsize = (20, 10))"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "e12fed9d",
"metadata": {},
"outputs": [],
"source": [
"# Again, no apparent connection between score and building size.\n",
"# But it's obvious that hotels score worse than office buildings."
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "9719cc6e",
"metadata": {},
"outputs": [],
"source": [
"# Lastly, let's take a look at residential buildings.\n",
"# This time we'll zoom out and consider two building classes:\n",
"# Walk up apartments and elevator apartments."
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "ca8d4f3e",
"metadata": {},
"outputs": [],
"source": [
"walk_up = df[df['Bldg_Class'].str.contains('C')]\n",
"elevator = df[df['Bldg_Class'].str.contains('D')]"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "182f44b0",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:ylabel='Frequency'>"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUBElEQVR4nO3dfbAV933f8ffHICNLiUciuqgUUMEdRjbyWA/GqlOlaWySCtuJUDpVez11h0lpSKe0sdvMpOBk6vQPZtSZ1rEzrdLITyG2YxUrtkWd1gklcTydaYSvbE0skCjEyOgGAjdKXeSHQUb+9o+zbI/gXjjA3Xvgnvdr5s7u/s5vd7+/QT4f78PZTVUhSRLAK4ZdgCTpymEoSJJahoIkqWUoSJJahoIkqbVw2AVcjptuuqlWrlw57DIk6aryxBNP/EVVjU332VUdCitXrmRiYmLYZUjSVSXJN2b6zNNHkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTWVf2L5su1cuvvDmW/zz74jqHsV5IuxCMFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVKrs1BIcmuSJ/v+TiZ5T5LFSXYnOdhMb+xbZ1uSQ0kOJLm3q9okSdPrLBSq6kBV3VFVdwBvBL4DfBbYCuypqtXAnmaZJGuAceA2YD3wUJIFXdUnSTrXXJ0+Wgf8aVV9A9gA7GjadwD3N/MbgEeq6lRVHQYOAXfPUX2SJOYuFMaBTzXzN1fVMYBmuqRpXwY817fOZNP2Mkk2J5lIMjE1NdVhyZI0ejoPhSSvBO4DPn2hrtO01TkNVQ9X1dqqWjs2NjYbJUqSGnNxpPA24CtVdbxZPp5kKUAzPdG0TwIr+tZbDhydg/okSY25CIV38v9PHQHsAjY28xuBx/rax5MsSrIKWA3snYP6JEmNTl+yk+Q64CeAn+trfhDYmWQTcAR4AKCq9iXZCewHTgNbquqlLuuTJL1cp6FQVd8Bfuistufp3Y00Xf/twPYua5IkzcxfNEuSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWp2GQpIbkjya5JkkTyf54SSLk+xOcrCZ3tjXf1uSQ0kOJLm3y9okSefq+kjhg8AXquq1wO3A08BWYE9VrQb2NMskWQOMA7cB64GHkizouD5JUp/OQiHJq4EfBT4CUFUvVtU3gQ3AjqbbDuD+Zn4D8EhVnaqqw8Ah4O6u6pMknavLI4XXAFPAx5J8NcmHk1wP3FxVxwCa6ZKm/zLgub71J5u2l0myOclEkompqakOy5ek0dNlKCwE7gJ+varuBL5Nc6poBpmmrc5pqHq4qtZW1dqxsbHZqVSSBHQbCpPAZFU93iw/Si8kjidZCtBMT/T1X9G3/nLgaIf1SZLO0lkoVNWfA88lubVpWgfsB3YBG5u2jcBjzfwuYDzJoiSrgNXA3q7qkySda2HH2/8XwCeTvBL4OvAz9IJoZ5JNwBHgAYCq2pdkJ73gOA1sqaqXOq5PktSn01CoqieBtdN8tG6G/tuB7V3WJEmamb9oliS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUqvTUEjybJKvJXkyyUTTtjjJ7iQHm+mNff23JTmU5ECSe7usTZJ0rrk4UnhLVd1RVWfe1bwV2FNVq4E9zTJJ1gDjwG3AeuChJAvmoD5JUmMYp482ADua+R3A/X3tj1TVqao6DBwC7p778iRpdHUdCgX8fpInkmxu2m6uqmMAzXRJ074MeK5v3cmmTZI0RxZ2vP17qupokiXA7iTPnKdvpmmrczr1wmUzwC233DI7VUqSgI6PFKrqaDM9AXyW3umg40mWAjTTE033SWBF3+rLgaPTbPPhqlpbVWvHxsa6LF+SRk5noZDk+iQ/eGYe+DvAU8AuYGPTbSPwWDO/CxhPsijJKmA1sLer+iRJ5+ry9NHNwGeTnNnPb1fVF5J8GdiZZBNwBHgAoKr2JdkJ7AdOA1uq6qUO65MknaWzUKiqrwO3T9P+PLBuhnW2A9u7qkmSdH7+olmS1BooFJK8vutCJEnDN+iRwn9OsjfJP0tyQ5cFSZKGZ6BQqKofAf4hvVtGJ5L8dpKf6LQySdKcG/iaQlUdBH4Z+NfA3wZ+LckzSf5uV8VJkubWoNcU3pDkV4GngbcCP1VVr2vmf7XD+iRJc2jQW1L/I/Ah4L1V9d0zjc0jLH65k8okSXNu0FB4O/DdMz8mS/IK4Nqq+k5Vfbyz6iRJc2rQawr/A3hV3/J1TZskaR4ZNBSurapvnVlo5q/rpiRJ0rAMGgrfTnLXmYUkbwS+e57+kqSr0KDXFN4DfDrJmUdZLwX+QScVSZKGZqBQqKovJ3ktcCu9l+E8U1Xf67QySdKcu5inpL4JWNmsc2cSquq3OqlKkjQUA4VCko8Dfx14EjjzjoMCDAVJmkcGPVJYC6ypqnPemSxJmj8GvfvoKeCvdFmIJGn4Bj1SuAnYn2QvcOpMY1Xd10lVkqShGDQUfqXLIiRJV4ZB36fwR8CzwDXN/JeBrwyybpIFSb6a5PPN8uIku5McbKY39vXdluRQkgNJ7r3o0UiSLsugj87+WeBR4DeapmXA5wbcx7vpPXL7jK3AnqpaDexplkmyBhgHbgPWAw8lWTDgPiRJs2DQC81bgHuAk9C+cGfJhVZKshx4B/DhvuYNwI5mfgdwf1/7I1V1qqoOA4eAuwesT5I0CwYNhVNV9eKZhSQL6f1O4UI+APwi8P2+tpur6hhAMz0TLsuA5/r6TTZtL5Nkc5KJJBNTU1MDli9JGsSgofBHSd4LvKp5N/Ongf96vhWS/CRwoqqeGHAfmabtnOCpqoeram1VrR0bGxtw05KkQQx699FWYBPwNeDngP/Gy08JTece4L4kbweuBV6d5BPA8SRLq+pYkqXAiab/JLCib/3lwFEkSXNm0LuPvl9VH6qqB6rq7zXz5z19VFXbqmp5Va2kdwH5D6rqXcAuYGPTbSPwWDO/CxhPsijJKmA1sPcSxiRJukSDPvvoMNOfynnNJezzQWBnkk3AEeCBZlv7kuwE9gOngS1nXv8pSZobF/PsozOupfdFvnjQnVTVF4EvNvPPA+tm6Lcd2D7odiVJs2vQ00fP9/39WVV9AHhrt6VJkubaoKeP7upbfAW9I4cf7KQiSdLQDHr66D/0zZ+m98iLvz/r1UiShmrQ13G+petCJEnDN+jpo391vs+r6v2zU44kaZgu5u6jN9H7LQHATwFf4uWPpZAkXeUu5iU7d1XVCwBJfgX4dFX9k64KkyTNvUGffXQL8GLf8ovAylmvRpI0VIMeKXwc2Jvks/R+2fzTwG91VpUkaSgGvftoe5L/Dvytpulnquqr3ZUlSRqGQU8fAVwHnKyqDwKTzUPrJEnzyKC3pL6P3h1ItwIfA64BPkHv8di6Sqzc+rtD2/ezD75jaPuWNLhBjxR+GrgP+DZAVR3Fx1xI0rwzaCi82Lw/oQCSXN9dSZKkYRn07qOdSX4DuCHJzwL/GPhQd2VJs8NTZtLFuWAoJAnwX4DXAifpXVf4N1W1u+Pa5q1hflFJmj3z8f90XDAUqqqSfK6q3ggYBJI0jw16TeGPk7yp00okSUM36DWFtwD/NMmz9O5ACr2DiDd0VZjmF0+ZSVeH84ZCkluq6gjwtovdcJJr6T1JdVGzn0er6n1JFtO7RrGS5mU9VfV/mnW2AZuAl4Cfr6rfu9j9SpIu3YVOH30OoKq+Aby/qr7R/3eBdU8Bb62q24E7gPVJ3gxsBfZU1WpgT7NMkjXAOHAbsB54KMmCSxuWJOlSXCgU0jf/movZcPV8q1m8pvkrYAOwo2nfAdzfzG8AHqmqU1V1GDgE3H0x+5QkXZ4LhULNMD+QJAuSPAmcAHZX1ePAzVV1DKCZLmm6L+PlL+2ZbNrO3ubmJBNJJqampi62JEnSeVwoFG5PcjLJC8AbmvmTSV5IcvJCG6+ql6rqDmA5cHeS15+ne6ZpOyeIqurhqlpbVWvHxsYuVIIk6SKc90JzVc3KOf2q+maSL9K7VnA8ydKqOpZkKb2jCOgdGazoW205cHQ29i9JGszFPDr7oiQZS3JDM/8q4MeBZ+i953lj020j8FgzvwsYT7KoeSz3amBvV/VJks416O8ULsVSYEdzB9ErgJ1V9fkk/4ves5Q2AUeABwCqal+SncB+4DSwpape6rA+SdJZOguFqvoT4M5p2p8H1s2wznZge1c1SZLOr8sjBWmkDetX3D6dVZejs2sKkqSrj6EgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklr9TkOaZ+fgyec0djxQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSa3OQiHJiiR/mOTpJPuSvLtpX5xkd5KDzfTGvnW2JTmU5ECSe7uqTZI0vS6PFE4Dv1BVrwPeDGxJsgbYCuypqtXAnmaZ5rNx4DZgPfBQkgUd1idJOktnoVBVx6rqK838C8DTwDJgA7Cj6bYDuL+Z3wA8UlWnquowcAi4u6v6JEnnmpMH4iVZCdwJPA7cXFXHoBccSZY03ZYBf9y32mTTJukqMayH8fkgvtnT+YXmJD8A/A7wnqo6eb6u07TVNNvbnGQiycTU1NRslSlJouNQSHINvUD4ZFV9pmk+nmRp8/lS4ETTPgms6Ft9OXD07G1W1cNVtbaq1o6NjXVXvCSNoC7vPgrwEeDpqnp/30e7gI3N/Ebgsb728SSLkqwCVgN7u6pPknSuLq8p3AP8I+BrSZ5s2t4LPAjsTLIJOAI8AFBV+5LsBPbTu3NpS1W91GF9kqSzdBYKVfU/mf46AcC6GdbZDmzvqiZJ0vn5i2ZJUst3NEu66g3zvdTzjUcKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqRWZ6GQ5KNJTiR5qq9tcZLdSQ420xv7PtuW5FCSA0nu7aouSdLMujxS+E1g/VltW4E9VbUa2NMsk2QNMA7c1qzzUJIFHdYmSZpGZ6FQVV8C/vKs5g3AjmZ+B3B/X/sjVXWqqg4Dh4C7u6pNkjS9ub6mcHNVHQNopkua9mXAc339Jpu2cyTZnGQiycTU1FSnxUrSqLlSLjRnmraarmNVPVxVa6tq7djYWMdlSdJometQOJ5kKUAzPdG0TwIr+votB47OcW2SNPLmOhR2ARub+Y3AY33t40kWJVkFrAb2znFtkjTyFna14SSfAn4MuCnJJPA+4EFgZ5JNwBHgAYCq2pdkJ7AfOA1sqaqXuqpNkjS9zkKhqt45w0frZui/HdjeVT2SpAu7Ui40S5KuAIaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWldcKCRZn+RAkkNJtg67HkkaJVdUKCRZAPwn4G3AGuCdSdYMtypJGh1XVCgAdwOHqurrVfUi8AiwYcg1SdLIWDjsAs6yDHiub3kS+Bv9HZJsBjY3i99KcuAy9ncT8BeXsf7VZtTGC455VIzcmPPvLmvMf22mD660UMg0bfWyhaqHgYdnZWfJRFWtnY1tXQ1GbbzgmEeFY549V9rpo0lgRd/ycuDokGqRpJFzpYXCl4HVSVYleSUwDuwack2SNDKuqNNHVXU6yT8Hfg9YAHy0qvZ1uMtZOQ11FRm18YJjHhWOeZakqi7cS5I0Eq6000eSpCEyFCRJrZEMhVF4lEaSFUn+MMnTSfYleXfTvjjJ7iQHm+mNw651NiVZkOSrST7fLM/r8QIkuSHJo0meaf69f3g+jzvJv2z+m34qyaeSXDvfxpvko0lOJHmqr23GMSbZ1nyfHUhy7+Xse+RCYYQepXEa+IWqeh3wZmBLM86twJ6qWg3saZbnk3cDT/ctz/fxAnwQ+EJVvRa4nd745+W4kywDfh5YW1Wvp3dDyjjzb7y/Caw/q23aMTb/ux4HbmvWeaj5nrskIxcKjMijNKrqWFV9pZl/gd4XxTJ6Y93RdNsB3D+UAjuQZDnwDuDDfc3zdrwASV4N/CjwEYCqerGqvsn8HvdC4FVJFgLX0fst07wab1V9CfjLs5pnGuMG4JGqOlVVh4FD9L7nLskohsJ0j9JYNqRa5kSSlcCdwOPAzVV1DHrBASwZYmmz7QPALwLf72ubz+MFeA0wBXysOW324STXM0/HXVV/Bvx74AhwDPi/VfX7zNPxnmWmMc7qd9oohsIFH6UxnyT5AeB3gPdU1clh19OVJD8JnKiqJ4ZdyxxbCNwF/HpV3Ql8m6v/1MmMmvPoG4BVwF8Frk/yruFWNXSz+p02iqEwMo/SSHINvUD4ZFV9pmk+nmRp8/lS4MSw6ptl9wD3JXmW3inBtyb5BPN3vGdMApNV9Xiz/Ci9kJiv4/5x4HBVTVXV94DPAH+T+TvefjONcVa/00YxFEbiURpJQu8889NV9f6+j3YBG5v5jcBjc11bF6pqW1Utr6qV9P5N/6Cq3sU8He8ZVfXnwHNJbm2a1gH7mb/jPgK8Ocl1zX/j6+hdL5uv4+030xh3AeNJFiVZBawG9l7yXqpq5P6AtwP/G/hT4JeGXU9HY/wReoeQfwI82fy9HfghencuHGymi4ddawdj/zHg8838KIz3DmCi+bf+HHDjfB438G+BZ4CngI8Di+bbeIFP0btm8j16RwKbzjdG4Jea77MDwNsuZ98+5kKS1BrF00eSpBkYCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWr9P5dH57sU3dB8AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"walk_up['Energy_Score'].plot(kind = 'hist')"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "81203621",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Letter_Score\n",
"A 403\n",
"B 302\n",
"C 388\n",
"D 1909\n",
"F 285\n",
"Name: Block, dtype: int64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"walk_up.groupby(['Letter_Score'])['Block'].count()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "5d84db66",
"metadata": {},
"outputs": [],
"source": [
"# Many Ds and Fs, and the rest are evenly distributed."
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "727698e7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Sq_Footage', ylabel='Energy_Score'>"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"walk_up.plot(kind = 'scatter', x = 'Sq_Footage', y = 'Energy_Score', figsize = (20, 10))"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "31162192",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:ylabel='Frequency'>"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD6CAYAAABQ6WtbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAU4UlEQVR4nO3df/BddX3n8edLovzQZYAl0JCEfmMniwZGF/jK0NptrdQlVUtod9nGqWvG0mbrZrfabUcT7SzdPzLDzrb4o13YpkgNKtCIVrJFXDGtZXdGiUHsQghZsoaSr4nk2+5YqeuEgu/9454sl3iTc/PN9977zfc+HzOZe87n/Hp/INwX53zOPSdVhSRJx/KSURcgSZr7DAtJUivDQpLUyrCQJLUyLCRJrQwLSVKrgYVFktuSHEzyaI9lv5mkkpzb1bYhyZ4ku5Nc3dV+eZJHmmUfSZJB1SxJ6m3BAPf9MeD3gdu7G5MsBd4EPNXVtgJYDVwMXAB8Mck/qqrngVuAtcBXgM8BK4H72g5+7rnn1sTExGz0Q5LGxkMPPfTXVbXwyPaBhUVVPZBkoseiDwLvBe7palsF3FVVh4C9SfYAVyR5Ejizqr4MkOR24Fr6CIuJiQl27NhxQn2QpHGT5K96tQ91zCLJNcA3q+ovj1i0GNjXNT/VtC1upo9slyQN0SAvQ71IkjOADwD/tNfiHm11jPajHWMtnUtWXHjhhTOoUpLUyzDPLH4EWAb8ZXN5aQnwtSQ/ROeMYWnXukuA/U37kh7tPVXVpqqarKrJhQt/4JKbJGmGhhYWVfVIVZ1XVRNVNUEnCC6rqm8BW4HVSU5NsgxYDmyvqgPAM0mubO6CegcvHuuQJA3BIG+dvRP4MnBRkqkk1x9t3araCWwBHgM+D6xr7oQCeBdwK7AH+N/0MbgtSZpdma+PKJ+cnCzvhpKk45PkoaqaPLLdX3BLkloZFpKkVoaFJKnV0H5ncTKZWH/vSI775I1vGclxJamNZxaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqdXAwiLJbUkOJnm0q+0/JXk8yf9M8idJzupatiHJniS7k1zd1X55kkeaZR9JkkHVLEnqbZBnFh8DVh7Rdj9wSVW9BvhfwAaAJCuA1cDFzTY3Jzml2eYWYC2wvPlz5D4lSQO2YFA7rqoHkkwc0faFrtmvAP+8mV4F3FVVh4C9SfYAVyR5Ejizqr4MkOR24FrgvkHVLWn2Tay/dyTHffLGt4zkuPPRKMcsfokXvvQXA/u6lk01bYub6SPbJUlDNJKwSPIB4Dngk4ebeqxWx2g/2n7XJtmRZMf09PSJFypJAkYQFknWAG8FfrGqDn/xTwFLu1ZbAuxv2pf0aO+pqjZV1WRVTS5cuHB2C5ekMTawMYtekqwE3gf8ZFX9365FW4E7ktwEXEBnIHt7VT2f5JkkVwIPAu8Afm+YNUvzyajGDnTyG1hYJLkTeANwbpIp4AY6dz+dCtzf3AH7lar61aramWQL8Bidy1Prqur5ZlfvonNn1el0xjgc3JbUl1GG43wbXB/k3VBv69H80WOsvxHY2KN9B3DJLJYmSTpO/oJbktRqqGMWkhw30MnJsJCkAZhvP0T0MpQkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaDSwsktyW5GCSR7vazklyf5Inms+zu5ZtSLInye4kV3e1X57kkWbZR5JkUDVLknob5JnFx4CVR7StB7ZV1XJgWzNPkhXAauDiZpubk5zSbHMLsBZY3vw5cp+SpAFbMKgdV9UDSSaOaF4FvKGZ3gx8CXhf035XVR0C9ibZA1yR5EngzKr6MkCS24FrgfsGVbfGx8T6e0ddgnTSGPaYxflVdQCg+TyvaV8M7Otab6ppW9xMH9neU5K1SXYk2TE9PT2rhUvSOJsrA9y9xiHqGO09VdWmqpqsqsmFCxfOWnGSNO6GHRZPJ1kE0HwebNqngKVd6y0B9jftS3q0S5KGaNhhsRVY00yvAe7pal+d5NQky+gMZG9vLlU9k+TK5i6od3RtI0kakoENcCe5k85g9rlJpoAbgBuBLUmuB54CrgOoqp1JtgCPAc8B66rq+WZX76JzZ9XpdAa2HdyWpCEb5N1QbzvKoquOsv5GYGOP9h3AJbNYmiTpOM2VAW5J0hxmWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWfYVFEt8nIUljrN8zi/+SZHuSf53krEEWJEmae/oKi6r6ceAXgaXAjiR3JHnTQCuTJM0ZfY9ZVNUTwG8B7wN+EvhIkseT/PygipMkzQ39jlm8JskHgV3AG4GfrapXN9MfHGB9kqQ5YEGf6/0+8IfA+6vqe4cbq2p/kt8aSGWSpDmj38tQbwbuOBwUSV6S5AyAqvr48R40ya8n2Znk0SR3JjktyTlJ7k/yRPN5dtf6G5LsSbI7ydXHezxJ0onpNyy+CJzeNX9G03bckiwGfg2YrKpLgFOA1cB6YFtVLQe2NfMkWdEsvxhYCdyc5JSZHFuSNDP9hsVpVfV3h2ea6TNO4LgLgNOTLGj2sx9YBWxulm8Grm2mVwF3VdWhqtoL7AGuOIFjS5KOU79h8d0klx2eSXI58L1jrH9UVfVN4HeAp4ADwN9W1ReA86vqQLPOAeC8ZpPFwL6uXUw1bZKkIel3gPs9wKeS7G/mFwG/MJMDNmMRq4BlwLeb/b79WJv0aKuj7HstsBbgwgsvnEl5kqQe+gqLqvpqklcBF9H58n68qv5+hsf8aWBvVU0DJPkM8GPA00kWVdWBJIuAg836U3R+DHjYEjqXrXrVuQnYBDA5OdkzUCRJx6/fMwuA1wETzTaXJqGqbp/BMZ8CrmzupvoecBWwA/gusAa4sfm8p1l/K3BHkpuAC4DlwPYZHFdz0MT6e0ddgqQ+9BUWST4O/AjwdeD5prmA4w6Lqnowyd3A14DngIfpnA28AtiS5Ho6gXJds/7OJFuAx5r111XV8z13LkkaiH7PLCaBFVU1K5d2quoG4IYjmg/ROcvotf5GYONsHFuSdPz6vRvqUeCHBlmIJGnu6vfM4lzgsSTb6ZwBAFBV1wykKknSnNJvWPz2IIuQJM1t/d46+xdJfhhYXlVfbO5k8pEbkjQm+n1E+a8AdwN/0DQtBj47oJokSXNMvwPc64DXA9+B//8ipPOOuYUkad7oNywOVdWzh2eaBwD6C2lJGhP9hsVfJHk/nSfFvgn4FPBfB1eWJGku6Tcs1gPTwCPAvwI+R+d93JKkMdDv3VDfp/Na1T8cbDmSpLmo32dD7aXHGEVVvXLWK5IkzTnH82yow06j85C/c2a/HEnSXNTXmEVV/U3Xn29W1YeANw62NEnSXNHvZajLumZfQudM4x8MpCJJ0pzT72Wo3+2afg54EvgXs16NJGlO6vduqJ8adCEaLd9YJ+lY+r0M9e+OtbyqbpqdciRJc9Hx3A31Ojrvwwb4WeABYN8gipIkzS3H8/Kjy6rqGYAkvw18qqp+eVCFSZLmjn4f93Eh8GzX/LPAxKxXI0mak/o9s/g4sD3Jn9D5JffPAbcPrCpJ0pzS791QG5PcB/yTpumdVfXw4MqSJM0l/V6GAjgD+E5VfRiYSrJspgdNclaSu5M8nmRXkh9Nck6S+5M80Xye3bX+hiR7kuxOcvVMjytJmpl+X6t6A/A+YEPT9FLgEydw3A8Dn6+qVwGvBXbReQz6tqpaDmxr5kmyAlgNXAysBG5O4vu/JWmI+j2z+DngGuC7AFW1nxk+7iPJmcBPAB9t9vVsVX0bWAVsblbbDFzbTK8C7qqqQ1W1F9gDXDGTY0uSZqbfsHi2qormMeVJXn4Cx3wlnRcp/VGSh5Pc2uzv/Ko6ANB8Hn7H92Je/HuOqaZNkjQk/YbFliR/AJyV5FeALzLzFyEtAC4DbqmqS+mcraw/xvrp0dbz/d9J1ibZkWTH9PT0DMuTJB2pNSySBPhj4G7g08BFwL+vqt+b4TGngKmqerCZv5tOeDydZFFzzEXAwa71l3ZtvwTY32vHVbWpqiaranLhwoUzLE+SdKTWW2erqpJ8tqouB+4/0QNW1beS7EtyUVXtBq4CHmv+rAFubD7vaTbZCtyR5CbgAmA5sP1E65Ak9a/fH+V9Jcnrquqrs3Tcfwt8MsnLgG8A76RzlrMlyfXAU3TexkdV7UyyhU6YPAesq6rnZ6kOSVIf+g2LnwJ+NcmTdMYYQuek4zUzOWhVfZ0Xv6r1sKuOsv5GYONMjiVJOnHHDIskF1bVU8DPDKkeSdIc1HZm8Vk6T5v9qySfrqp/NoSaJElzTNvdUN23rb5ykIVIkuautrCoo0xLksZI22Wo1yb5Dp0zjNObaXhhgPvMgVYnSZoTjhkWVeUD+yRJfd86qyGYWH/vqEuQpJ6O530WkqQxZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkViMLiySnJHk4yZ828+ckuT/JE83n2V3rbkiyJ8nuJFePqmZJGlejPLN4N7Cra349sK2qlgPbmnmSrABWAxcDK4Gbk/gGP0kaopGERZIlwFuAW7uaVwGbm+nNwLVd7XdV1aGq2gvsAa4YUqmSJEZ3ZvEh4L3A97vazq+qAwDN53lN+2JgX9d6U03bD0iyNsmOJDump6dnvWhJGldDD4skbwUOVtVD/W7So616rVhVm6pqsqomFy5cOOMaJUkvtmAEx3w9cE2SNwOnAWcm+QTwdJJFVXUgySLgYLP+FLC0a/slwP6hVixJY27oZxZVtaGqllTVBJ2B6z+rqrcDW4E1zWprgHua6a3A6iSnJlkGLAe2D7lsSRprozizOJobgS1JrgeeAq4DqKqdSbYAjwHPAeuq6vnRlSlJ42ekYVFVXwK+1Ez/DXDVUdbbCGwcWmGSpBfxF9ySpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVkMPiyRLk/x5kl1JdiZ5d9N+TpL7kzzRfJ7dtc2GJHuS7E5y9bBrlqRxN4ozi+eA36iqVwNXAuuSrADWA9uqajmwrZmnWbYauBhYCdyc5JQR1C1JY2voYVFVB6rqa830M8AuYDGwCtjcrLYZuLaZXgXcVVWHqmovsAe4YqhFS9KYG+mYRZIJ4FLgQeD8qjoAnUABzmtWWwzs69psqmmTJA3JyMIiySuATwPvqarvHGvVHm11lH2uTbIjyY7p6enZKFOSxIjCIslL6QTFJ6vqM03z00kWNcsXAQeb9ilgadfmS4D9vfZbVZuqarKqJhcuXDiY4iVpDI3ibqgAHwV2VdVNXYu2Amua6TXAPV3tq5OcmmQZsBzYPqx6JUmwYATHfD3wL4FHkny9aXs/cCOwJcn1wFPAdQBVtTPJFuAxOndSrauq54detSSNsaGHRVX9D3qPQwBcdZRtNgIbB1aUJOmY/AW3JKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqddKERZKVSXYn2ZNk/ajrkaRxclKERZJTgP8M/AywAnhbkhWjrUqSxsdJERbAFcCeqvpGVT0L3AWsGnFNkjQ2TpawWAzs65qfatokSUOwYNQF9Ck92uoHVkrWAmub2b9LsnuGxzsX+OsZbnuyss/jYdz6PG79Jf/xhPv8w70aT5awmAKWds0vAfYfuVJVbQI2nejBkuyoqskT3c/JxD6Ph3Hr87j1FwbX55PlMtRXgeVJliV5GbAa2DrimiRpbJwUZxZV9VySfwP8N+AU4Laq2jnisiRpbJwUYQFQVZ8DPjekw53wpayTkH0eD+PW53HrLwyoz6n6gXFiSZJe5GQZs5AkjZBh0WUcHimSZGmSP0+yK8nOJO9u2s9Jcn+SJ5rPs0dd62xLckqSh5P8aTM/r/uc5Kwkdyd5vPn3/aNj0Odfb/5eP5rkziSnzbc+J7ktycEkj3a1HbWPSTY032m7k1w90+MaFo0xeqTIc8BvVNWrgSuBdU0/1wPbqmo5sK2Zn2/eDezqmp/vff4w8PmqehXwWjp9n7d9TrIY+DVgsqouoXMzzGrmX58/Bqw8oq1nH5v/tlcDFzfb3Nx81x03w+IFY/FIkao6UFVfa6afofMFsphOXzc3q20Grh1JgQOSZAnwFuDWruZ52+ckZwI/AXwUoKqerapvM4/73FgAnJ5kAXAGnd9jzas+V9UDwP85ovlofVwF3FVVh6pqL7CHznfdcTMsXjB2jxRJMgFcCjwInF9VB6ATKMB5IyxtED4EvBf4flfbfO7zK4Fp4I+aS2+3Jnk587jPVfVN4HeAp4ADwN9W1ReYx33ucrQ+ztr3mmHxgr4eKTJfJHkF8GngPVX1nVHXM0hJ3gocrKqHRl3LEC0ALgNuqapLge9y8l9+OabmOv0qYBlwAfDyJG8fbVUjN2vfa4bFC/p6pMh8kOSldILik1X1mab56SSLmuWLgIOjqm8AXg9ck+RJOpcX35jkE8zvPk8BU1X1YDN/N53wmM99/mlgb1VNV9XfA58Bfoz53efDjtbHWfteMyxeMBaPFEkSOtexd1XVTV2LtgJrmuk1wD3Drm1QqmpDVS2pqgk6/17/rKrezvzu87eAfUkuapquAh5jHveZzuWnK5Oc0fw9v4rOmNx87vNhR+vjVmB1klOTLAOWA9tncgB/lNclyZvpXNs+/EiRjaOtaPYl+XHgvwOP8ML1+/fTGbfYAlxI5z+666rqyEG0k16SNwC/WVVvTfIPmcd9TvKP6Qzovwz4BvBOOv+DOJ/7/B+AX6Bz19/DwC8Dr2Ae9TnJncAb6DxR92ngBuCzHKWPST4A/BKdfybvqar7ZnRcw0KS1MbLUJKkVoaFJKmVYSFJamVYSJJaGRaSpFaGhSSplWEhSWplWEiSWv0/J0Ib3VanmfkAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"elevator['Energy_Score'].plot(kind = 'hist')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "c95f189b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Letter_Score\n",
"A 1683\n",
"B 1863\n",
"C 1803\n",
"D 4136\n",
"F 756\n",
"Name: Block, dtype: int64"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"elevator.groupby(['Letter_Score'])['Block'].count()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "bc9de94c",
"metadata": {},
"outputs": [],
"source": [
"# Elevator apartments fared better overall than walk ups."
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "4bc3c3d4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='Sq_Footage', ylabel='Energy_Score'>"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x720 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"elevator.plot(kind = 'scatter', x = 'Sq_Footage', y = 'Energy_Score', figsize = (20, 10))"
]
},
{
"cell_type": "markdown",
"id": "29a22a46",
"metadata": {},
"source": [
"## Findings"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "87a96d0f",
"metadata": {},
"outputs": [],
"source": [
"# Scatter plots suggest that the relationship between a building's size and its energy rating\n",
"# is not simple. I had hoped the plots would show either a positive or negative relationship between square footage\n",
"# and energy rating.\n",
"\n",
"# The analysis elaborated here cannot answer the research question. However, some trends were uncovered in the\n",
"# process. Hotels scored worse overall than office buildings. And elevator buildings performed better than walk up\n",
"# apartments."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}