2022-07-26 02:20:35 +00:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1,
|
|
|
|
"id": "a8d466b1",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"pd.set_option('display.max_columns', None)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 2,
|
|
|
|
"id": "1feb2733",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stderr",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
2022-07-26 21:22:25 +00:00
|
|
|
"/tmp/ipykernel_6745/2268714868.py:1: DtypeWarning: Columns (18,20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
|
2022-07-26 02:20:35 +00:00
|
|
|
" df = pd.read_csv('~/Downloads/NYPD_Complaint_Data_Historic.csv')\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df = pd.read_csv('~/Downloads/NYPD_Complaint_Data_Historic.csv')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 3,
|
|
|
|
"id": "5b1cdbba",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Index(['CMPLNT_NUM', 'CMPLNT_FR_DT', 'CMPLNT_FR_TM', 'CMPLNT_TO_DT',\n",
|
|
|
|
" 'CMPLNT_TO_TM', 'ADDR_PCT_CD', 'RPT_DT', 'KY_CD', 'OFNS_DESC', 'PD_CD',\n",
|
|
|
|
" 'PD_DESC', 'CRM_ATPT_CPTD_CD', 'LAW_CAT_CD', 'BORO_NM',\n",
|
|
|
|
" 'LOC_OF_OCCUR_DESC', 'PREM_TYP_DESC', 'JURIS_DESC', 'JURISDICTION_CODE',\n",
|
|
|
|
" 'PARKS_NM', 'HADEVELOPT', 'HOUSING_PSA', 'X_COORD_CD', 'Y_COORD_CD',\n",
|
|
|
|
" 'SUSP_AGE_GROUP', 'SUSP_RACE', 'SUSP_SEX', 'TRANSIT_DISTRICT',\n",
|
|
|
|
" 'Latitude', 'Longitude', 'Lat_Lon', 'PATROL_BORO', 'STATION_NAME',\n",
|
|
|
|
" 'VIC_AGE_GROUP', 'VIC_RACE', 'VIC_SEX'],\n",
|
|
|
|
" dtype='object')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df.columns\n",
|
|
|
|
"# df.dtypes\n",
|
|
|
|
"# df.shape"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"id": "1ac30b35",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"False"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# According to the data dictionary, CMPLNT_NUM (Complaint Number) is randomly generated and persistent.\n",
|
|
|
|
"# Is it unique?\n",
|
|
|
|
"\n",
|
|
|
|
"df['CMPLNT_NUM'].is_unique"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
|
|
|
"id": "f0c76e18",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"False 7821537\n",
|
|
|
|
"True 3962\n",
|
|
|
|
"dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# It's not unique. That's unexpected.\n",
|
|
|
|
"\n",
|
|
|
|
"df.duplicated(subset = 'CMPLNT_NUM').value_counts()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 6,
|
|
|
|
"id": "253ab2f0",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Since CMPLNT_NUM is not unique, we can't use it as an index.\n",
|
|
|
|
"# Let's drop it.\n",
|
|
|
|
"\n",
|
|
|
|
"df.drop('CMPLNT_NUM', axis = 1, inplace = True)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 7,
|
|
|
|
"id": "7859f04c",
|
|
|
|
"metadata": {
|
|
|
|
"scrolled": true
|
|
|
|
},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>CMPLNT_FR_DT</th>\n",
|
|
|
|
" <th>CMPLNT_FR_TM</th>\n",
|
|
|
|
" <th>CMPLNT_TO_DT</th>\n",
|
|
|
|
" <th>CMPLNT_TO_TM</th>\n",
|
|
|
|
" <th>ADDR_PCT_CD</th>\n",
|
|
|
|
" <th>RPT_DT</th>\n",
|
|
|
|
" <th>KY_CD</th>\n",
|
|
|
|
" <th>OFNS_DESC</th>\n",
|
|
|
|
" <th>PD_CD</th>\n",
|
|
|
|
" <th>PD_DESC</th>\n",
|
|
|
|
" <th>CRM_ATPT_CPTD_CD</th>\n",
|
|
|
|
" <th>LAW_CAT_CD</th>\n",
|
|
|
|
" <th>BORO_NM</th>\n",
|
|
|
|
" <th>LOC_OF_OCCUR_DESC</th>\n",
|
|
|
|
" <th>PREM_TYP_DESC</th>\n",
|
|
|
|
" <th>JURIS_DESC</th>\n",
|
|
|
|
" <th>JURISDICTION_CODE</th>\n",
|
|
|
|
" <th>PARKS_NM</th>\n",
|
|
|
|
" <th>HADEVELOPT</th>\n",
|
|
|
|
" <th>HOUSING_PSA</th>\n",
|
|
|
|
" <th>X_COORD_CD</th>\n",
|
|
|
|
" <th>Y_COORD_CD</th>\n",
|
|
|
|
" <th>SUSP_AGE_GROUP</th>\n",
|
|
|
|
" <th>SUSP_RACE</th>\n",
|
|
|
|
" <th>SUSP_SEX</th>\n",
|
|
|
|
" <th>TRANSIT_DISTRICT</th>\n",
|
|
|
|
" <th>Latitude</th>\n",
|
|
|
|
" <th>Longitude</th>\n",
|
|
|
|
" <th>Lat_Lon</th>\n",
|
|
|
|
" <th>PATROL_BORO</th>\n",
|
|
|
|
" <th>STATION_NAME</th>\n",
|
|
|
|
" <th>VIC_AGE_GROUP</th>\n",
|
|
|
|
" <th>VIC_RACE</th>\n",
|
|
|
|
" <th>VIC_SEX</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
|
|
|
" <td>12/31/2019</td>\n",
|
|
|
|
" <td>17:30:00</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>32.0</td>\n",
|
|
|
|
" <td>12/31/2019</td>\n",
|
|
|
|
" <td>118</td>\n",
|
|
|
|
" <td>DANGEROUS WEAPONS</td>\n",
|
|
|
|
" <td>793.0</td>\n",
|
|
|
|
" <td>WEAPONS POSSESSION 3</td>\n",
|
|
|
|
" <td>COMPLETED</td>\n",
|
|
|
|
" <td>FELONY</td>\n",
|
|
|
|
" <td>MANHATTAN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>STREET</td>\n",
|
|
|
|
" <td>N.Y. POLICE DEPT</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>999937.0</td>\n",
|
|
|
|
" <td>238365.0</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>40.820927</td>\n",
|
|
|
|
" <td>-73.943324</td>\n",
|
|
|
|
" <td>(40.82092679700002, -73.94332421899996)</td>\n",
|
|
|
|
" <td>PATROL BORO MAN NORTH</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>UNKNOWN</td>\n",
|
|
|
|
" <td>UNKNOWN</td>\n",
|
|
|
|
" <td>E</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
|
|
|
" <td>12/29/2019</td>\n",
|
|
|
|
" <td>16:31:00</td>\n",
|
|
|
|
" <td>12/29/2019</td>\n",
|
|
|
|
" <td>16:54:00</td>\n",
|
|
|
|
" <td>47.0</td>\n",
|
|
|
|
" <td>12/29/2019</td>\n",
|
|
|
|
" <td>113</td>\n",
|
|
|
|
" <td>FORGERY</td>\n",
|
|
|
|
" <td>729.0</td>\n",
|
|
|
|
" <td>FORGERY,ETC.,UNCLASSIFIED-FELO</td>\n",
|
|
|
|
" <td>COMPLETED</td>\n",
|
|
|
|
" <td>FELONY</td>\n",
|
|
|
|
" <td>BRONX</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>STREET</td>\n",
|
|
|
|
" <td>N.Y. POLICE DEPT</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1022508.0</td>\n",
|
|
|
|
" <td>261990.0</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>40.885701</td>\n",
|
|
|
|
" <td>-73.861640</td>\n",
|
|
|
|
" <td>(40.885701406000074, -73.86164032499995)</td>\n",
|
|
|
|
" <td>PATROL BORO BRONX</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>UNKNOWN</td>\n",
|
|
|
|
" <td>UNKNOWN</td>\n",
|
|
|
|
" <td>E</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
|
|
|
" <td>12/15/2019</td>\n",
|
|
|
|
" <td>18:45:00</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>109.0</td>\n",
|
|
|
|
" <td>12/29/2019</td>\n",
|
|
|
|
" <td>578</td>\n",
|
|
|
|
" <td>HARRASSMENT 2</td>\n",
|
|
|
|
" <td>638.0</td>\n",
|
|
|
|
" <td>HARASSMENT,SUBD 3,4,5</td>\n",
|
|
|
|
" <td>COMPLETED</td>\n",
|
|
|
|
" <td>VIOLATION</td>\n",
|
|
|
|
" <td>QUEENS</td>\n",
|
|
|
|
" <td>FRONT OF</td>\n",
|
|
|
|
" <td>STREET</td>\n",
|
|
|
|
" <td>N.Y. POLICE DEPT</td>\n",
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1034178.0</td>\n",
|
|
|
|
" <td>209758.0</td>\n",
|
|
|
|
" <td>25-44</td>\n",
|
|
|
|
" <td>UNKNOWN</td>\n",
|
|
|
|
" <td>M</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>40.742281</td>\n",
|
|
|
|
" <td>-73.819824</td>\n",
|
|
|
|
" <td>(40.74228115600005, -73.81982408)</td>\n",
|
|
|
|
" <td>PATROL BORO QUEENS NORTH</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>25-44</td>\n",
|
|
|
|
" <td>WHITE HISPANIC</td>\n",
|
|
|
|
" <td>F</td>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" CMPLNT_FR_DT CMPLNT_FR_TM CMPLNT_TO_DT CMPLNT_TO_TM ADDR_PCT_CD \\\n",
|
|
|
|
"0 12/31/2019 17:30:00 NaN NaN 32.0 \n",
|
|
|
|
"1 12/29/2019 16:31:00 12/29/2019 16:54:00 47.0 \n",
|
|
|
|
"2 12/15/2019 18:45:00 NaN NaN 109.0 \n",
|
|
|
|
"\n",
|
|
|
|
" RPT_DT KY_CD OFNS_DESC PD_CD \\\n",
|
|
|
|
"0 12/31/2019 118 DANGEROUS WEAPONS 793.0 \n",
|
|
|
|
"1 12/29/2019 113 FORGERY 729.0 \n",
|
|
|
|
"2 12/29/2019 578 HARRASSMENT 2 638.0 \n",
|
|
|
|
"\n",
|
|
|
|
" PD_DESC CRM_ATPT_CPTD_CD LAW_CAT_CD BORO_NM \\\n",
|
|
|
|
"0 WEAPONS POSSESSION 3 COMPLETED FELONY MANHATTAN \n",
|
|
|
|
"1 FORGERY,ETC.,UNCLASSIFIED-FELO COMPLETED FELONY BRONX \n",
|
|
|
|
"2 HARASSMENT,SUBD 3,4,5 COMPLETED VIOLATION QUEENS \n",
|
|
|
|
"\n",
|
|
|
|
" LOC_OF_OCCUR_DESC PREM_TYP_DESC JURIS_DESC JURISDICTION_CODE \\\n",
|
|
|
|
"0 NaN STREET N.Y. POLICE DEPT 0.0 \n",
|
|
|
|
"1 NaN STREET N.Y. POLICE DEPT 0.0 \n",
|
|
|
|
"2 FRONT OF STREET N.Y. POLICE DEPT 0.0 \n",
|
|
|
|
"\n",
|
|
|
|
" PARKS_NM HADEVELOPT HOUSING_PSA X_COORD_CD Y_COORD_CD SUSP_AGE_GROUP \\\n",
|
|
|
|
"0 NaN NaN NaN 999937.0 238365.0 NaN \n",
|
|
|
|
"1 NaN NaN NaN 1022508.0 261990.0 NaN \n",
|
|
|
|
"2 NaN NaN NaN 1034178.0 209758.0 25-44 \n",
|
|
|
|
"\n",
|
|
|
|
" SUSP_RACE SUSP_SEX TRANSIT_DISTRICT Latitude Longitude \\\n",
|
|
|
|
"0 NaN NaN NaN 40.820927 -73.943324 \n",
|
|
|
|
"1 NaN NaN NaN 40.885701 -73.861640 \n",
|
|
|
|
"2 UNKNOWN M NaN 40.742281 -73.819824 \n",
|
|
|
|
"\n",
|
|
|
|
" Lat_Lon PATROL_BORO \\\n",
|
|
|
|
"0 (40.82092679700002, -73.94332421899996) PATROL BORO MAN NORTH \n",
|
|
|
|
"1 (40.885701406000074, -73.86164032499995) PATROL BORO BRONX \n",
|
|
|
|
"2 (40.74228115600005, -73.81982408) PATROL BORO QUEENS NORTH \n",
|
|
|
|
"\n",
|
|
|
|
" STATION_NAME VIC_AGE_GROUP VIC_RACE VIC_SEX \n",
|
|
|
|
"0 NaN UNKNOWN UNKNOWN E \n",
|
|
|
|
"1 NaN UNKNOWN UNKNOWN E \n",
|
|
|
|
"2 NaN 25-44 WHITE HISPANIC F "
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"df.head(3)\n",
|
|
|
|
"# df.columns\n",
|
|
|
|
"# df.dtypes"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 8,
|
|
|
|
"id": "5fd666ad",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"BORO CRIME_CLASS\n",
|
|
|
|
"BRONX MISDEMEANOR 1000078\n",
|
|
|
|
" FELONY 466248\n",
|
|
|
|
" VIOLATION 227655\n",
|
|
|
|
"BROOKLYN MISDEMEANOR 1249836\n",
|
|
|
|
" FELONY 754414\n",
|
|
|
|
" VIOLATION 308893\n",
|
|
|
|
"MANHATTAN MISDEMEANOR 1075687\n",
|
|
|
|
" FELONY 597184\n",
|
|
|
|
" VIOLATION 209421\n",
|
|
|
|
"QUEENS MISDEMEANOR 826883\n",
|
|
|
|
" FELONY 516528\n",
|
|
|
|
" VIOLATION 218301\n",
|
|
|
|
"STATEN ISLAND MISDEMEANOR 210270\n",
|
|
|
|
" FELONY 81032\n",
|
|
|
|
" VIOLATION 70589\n",
|
|
|
|
"Name: CRIME_CLASS, dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 8,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# Question 1\n",
|
|
|
|
"\n",
|
|
|
|
"# How does each borough compare according to the class of crime committed?\n",
|
|
|
|
"# But first, rename some columns to make the table more readable.\n",
|
|
|
|
"\n",
|
|
|
|
"df.rename(columns = {'LAW_CAT_CD': 'CRIME_CLASS', 'BORO_NM': 'BORO'}, inplace = True)\n",
|
|
|
|
"df.groupby(['BORO'])['CRIME_CLASS'].value_counts()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-07-26 21:22:25 +00:00
|
|
|
"execution_count": 12,
|
2022-07-26 02:20:35 +00:00
|
|
|
"id": "3cbcbd7c",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"<AxesSubplot:xlabel='BORO,CRIME_CLASS'>"
|
|
|
|
]
|
|
|
|
},
|
2022-07-26 21:22:25 +00:00
|
|
|
"execution_count": 12,
|
2022-07-26 02:20:35 +00:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"data": {
|
2022-07-26 21:22:25 +00:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlMAAAHmCAYAAABXrguzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABijUlEQVR4nO3dedytc73/8dfbUBSiUI5po0ESmarTjOpwVJrE1pzSQCNFpbmk0ogGldQ5RRxlSJ0GkTRTZI4QUj+SdCplev/++F5r77XXXvfa97budX2vxfv5eOyHta5r3ff1cV/rWutzfYfPV7aJiIiIiDtmmdoBREREREyzJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY6iaTEk6QtK1ks6b5eufI+kCSedL+sqk44uIiIhYEtWsMyXpccDfgC/Z3nQJr30AcAywne0bJK1p+9o24oyIiIiYSdWWKdunA3/u3yZpI0n/K+ksST+UtHGz62XAYbZvaH42iVRERERU18UxU4cDr7a9FbAv8Mlm+wOBB0r6kaSfStqhWoQRERERjeVqB9BP0krAo4BjJfU2373573LAA4AnAOsAP5S0qe2/tBxmRERExAKdSqYoLWV/sf2wIfuuBn5q+xbgckkXU5KrX7QYX0RERMQiOtXNZ/uvlERpFwAVmze7jwe2bbavTun2u6xGnBERERE9tUsjHAX8BHiQpKsl7QE8F9hD0jnA+cDOzcu/DVwv6QLgVOCNtq+vEXdERERET9XSCBERERHTrlPdfBERERHTJslURERExBiqzeZbffXVPW/evFqHj4iIiJi1s84660+21xi2r1oyNW/ePM4888xah4+IiIiYNUm/m2lfuvkiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixlBtbb6485u3/8lz9ruuOGinOftdERERcyktUxERERFjSDIVERERMYYkUxERERFjSDIVERERMYYkUxERERFjWGIyJekISddKOm+G/c+V9Ovm348lbT73YUZERER002xapo4Edhix/3Lg8bY3A94DHD4HcUVERERMhSXWmbJ9uqR5I/b/uO/pT4F15iCuiIiIiKkw12Om9gC+Nce/MyIiIqKz5qwCuqRtKcnUY0a8Zk9gT4D11ltvrg4dERERUc2ctExJ2gz4HLCz7etnep3tw21vbXvrNdZYYy4OHREREVHV2MmUpPWArwHPt/2b8UOKiIiImB5L7OaTdBTwBGB1SVcD7wCWB7D9aeDtwH2AT0oCuNX21pMKOCIiIqJLZjObb/4S9r8UeOmcRRQRERExRVIBPSIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMy9UOICK6ad7+J8/J77nioJ3m5PdERHRVWqYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixpBkKiIiImIMSaYiIiIixrDEZErSEZKulXTeDPsl6ROSLpX0a0lbzn2YEREREd00m5apI4EdRuzfEXhA829P4FPjhxURERExHZaYTNk+HfjziJfsDHzJxU+BVSWtNVcBRkRERHTZXIyZWhu4qu/51c22iIiIiDu95ebgd2jINg99obQnpSuQ9dZbb1a/fN7+J9/hwPpdcdBOc/J7IiIiIvrNRcvU1cC6fc/XAa4Z9kLbh9ve2vbWa6yxxhwcOiIiIqKuuUimTgRe0MzqeyRwo+0/zMHvjYiIiOi8JXbzSToKeAKwuqSrgXcAywPY/jTwTeA/gUuBfwAvnlSwEREREV2zxGTK9vwl7Dew15xFFBERETFFUgE9IiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgxJpiIiIiLGkGQqIiIiYgzL1Q4gIiLm1rz9T56z33XFQTvN2e+KuLOaVcuUpB0kXSzpUkn7D9l/L0knSTpH0vmSXjz3oUZERER0zxKTKUnLAocBOwKbAPMlbTLwsr2AC2xvDjwB+LCku81xrBERERGdM5uWqYcDl9q+zPbNwNHAzgOvMbCyJAErAX8Gbp3TSCMiIiI6aDbJ1NrAVX3Pr2629TsUeDBwDXAu8Frbt89JhBEREREdNptkSkO2eeD5fwBnA/8GPAw4VNIqi/0iaU9JZ0o687rrrlvKUCMiIiK6ZzbJ1NXAun3P16G0QPV7MfA1F5cClwMbD/4i24fb3tr21musscYdjTkiIiKiM2aTTP0CeICkDZpB5bsBJw685kpgewBJ9wUeBFw2l4FGREREdNES60zZvlXS3sC3gWWBI2yfL+kVzf5PA+8BjpR0LqVbcD/bf5pg3BERERGdMKuinba/CXxzYNun+x5fAzx5bkOLiIiI6L4sJxMRERExhiRTEREREWNIMhURERExhiRTEREREWNIMhURERExhlnN5ou4s5i3/8lz9ruuOGinOftdERExvdIyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGJFMRERERY0gyFRERETGGWSVTknaQdLGkSyXtP8NrniDpbEnnS/rB3IYZERER0U3LLekFkpYFDgOeBFwN/ELSibYv6HvNqsAngR1sXylpzQnFGxEREdEps2mZejhwqe3LbN8MHA3sPPCa3YGv2b4SwPa1cxtmRERERDfNJplaG7iq7/nVzbZ+DwRWk3SapLMkvWCuAoyIiIjosiV28wEass1Dfs9WwPbAisBPJP3U9m8W+UXSnsCeAOutt97SRxsRd2nz9j95zn7XFQftNGe/KyLu2mbTMnU1sG7f83WAa4a85n9t/932n4DTgc0Hf5Htw21vbXvrNdZY447GHBEREdEZs0mmfgE8QNIGku4G7AacOPCaE4DHSlpO0j2ARwAXzm2oEREREd2zxG4+27dK2hv4NrAscITt8yW9otn/adsXSvpf4NfA7cDnbJ83ycAjIiIiumA2Y6aw/U3gmwPbPj3w/EPAh+YutIiIiIjuSwX0iIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYw6wWOo6IiBjXvP1PnpPfc8VBO83J74mYK2mZioiIiBhDkqmIiIiIMSSZioiIiBhDkqmIiIiIMSSZioiIiBhDkqmIiIiIMSSZioiIiBhDkqmIiIiIMSSZioiIiBhDKqDfAaniGxERET1pmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYQ5KpiIiIiDEkmYqIiIgYw6ySKUk7SLpY0qWS9h/xum0k3Sb
|
2022-07-26 02:20:35 +00:00
|
|
|
"text/plain": [
|
2022-07-26 21:22:25 +00:00
|
|
|
"<Figure size 720x360 with 1 Axes>"
|
2022-07-26 02:20:35 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"metadata": {
|
|
|
|
"needs_background": "light"
|
|
|
|
},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# A quick visualization of the above\n",
|
|
|
|
"\n",
|
2022-07-26 21:22:25 +00:00
|
|
|
"df.groupby(['BORO'])['CRIME_CLASS'].value_counts().plot(kind = 'bar', figsize = (10, 5))"
|
2022-07-26 02:20:35 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2022-07-26 21:22:25 +00:00
|
|
|
"execution_count": 15,
|
|
|
|
"id": "b07d2228",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"6761"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 15,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# df.head(25)\n",
|
|
|
|
"# df['OFNS_DESC'].isna().sum()\n",
|
|
|
|
"df['PD_DESC'].isna().sum()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 18,
|
2022-07-26 02:20:35 +00:00
|
|
|
"id": "1ef4d375",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Question 2\n",
|
|
|
|
"\n",
|
|
|
|
"# Some incidents occurred in NYC parks, playgrounds or greenspaces.\n",
|
|
|
|
"# What crimes were reported most often and where?\n",
|
|
|
|
"\n",
|
|
|
|
"# Again, let's begin by renaming columns.\n",
|
|
|
|
"\n",
|
2022-07-26 21:22:25 +00:00
|
|
|
"df.rename(columns = {'PARKS_NM': 'PUBLIC_SPACE',\n",
|
|
|
|
" 'PD_DESC': 'DESCRIPTION',\n",
|
|
|
|
" 'ADDR_PCT_CD': 'PRECINCT',\n",
|
|
|
|
" 'Lat_Lon': 'LOCATION',\n",
|
|
|
|
" 'CMPLNT_FR_DT': 'DATE',\n",
|
|
|
|
" 'CMPLNT_FR_TM': 'TIME'\n",
|
|
|
|
" }, inplace = True)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 19,
|
|
|
|
"id": "b838819a",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# PD_DESC and OFNS_DESC are both descriptions of the incident.\n",
|
|
|
|
"# The former is more granular, according to the data dictionary.\n",
|
|
|
|
"# Also, it has fewer NaNs."
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 35,
|
|
|
|
"id": "e9c16848",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"CENTRAL PARK 1856\n",
|
|
|
|
"FLUSHING MEADOWS CORONA PARK 1532\n",
|
|
|
|
"CONEY ISLAND BEACH & BOARDWALK 1161\n",
|
|
|
|
"WASHINGTON SQUARE PARK 1063\n",
|
|
|
|
"RIVERSIDE PARK 680\n",
|
|
|
|
"PROSPECT PARK 616\n",
|
|
|
|
"UNION SQUARE PARK 599\n",
|
|
|
|
"MARCUS GARVEY PARK 469\n",
|
|
|
|
"RANDALL'S ISLAND PARK 454\n",
|
|
|
|
"SARA D. ROOSEVELT PARK 395\n",
|
|
|
|
"BRYANT PARK 354\n",
|
|
|
|
"ST. MARY'S PARK BRONX 354\n",
|
|
|
|
"CLAREMONT PARK 348\n",
|
|
|
|
"MACOMBS DAM PARK 341\n",
|
|
|
|
"CROTONA PARK 319\n",
|
|
|
|
"Name: PUBLIC_SPACE, dtype: int64"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 35,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"# Let's choose public spaces to compare.\n",
|
|
|
|
"\n",
|
|
|
|
"# df.head()\n",
|
|
|
|
"df['PUBLIC_SPACE'].sort_values(ascending = False).value_counts().head(15)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"id": "91ae572a",
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Each of the top four have more than 1000 incidents.\n",
|
|
|
|
"# We'll pick Central Park and Coney Island."
|
2022-07-26 02:20:35 +00:00
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.10.4"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 5
|
|
|
|
}
|