From b796479467d934750cfbefaab01d31a979197a44 Mon Sep 17 00:00:00 2001 From: gyoza1 Date: Mon, 25 Jul 2022 22:20:35 -0400 Subject: [PATCH] initial commit --- python_project_2.ipynb | 477 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 477 insertions(+) create mode 100644 python_project_2.ipynb diff --git a/python_project_2.ipynb b/python_project_2.ipynb new file mode 100644 index 0000000..2e58175 --- /dev/null +++ b/python_project_2.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a8d466b1", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1feb2733", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_18634/2268714868.py:1: DtypeWarning: Columns (18,20) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv('~/Downloads/NYPD_Complaint_Data_Historic.csv')\n" + ] + } + ], + "source": [ + "df = pd.read_csv('~/Downloads/NYPD_Complaint_Data_Historic.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5b1cdbba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['CMPLNT_NUM', 'CMPLNT_FR_DT', 'CMPLNT_FR_TM', 'CMPLNT_TO_DT',\n", + " 'CMPLNT_TO_TM', 'ADDR_PCT_CD', 'RPT_DT', 'KY_CD', 'OFNS_DESC', 'PD_CD',\n", + " 'PD_DESC', 'CRM_ATPT_CPTD_CD', 'LAW_CAT_CD', 'BORO_NM',\n", + " 'LOC_OF_OCCUR_DESC', 'PREM_TYP_DESC', 'JURIS_DESC', 'JURISDICTION_CODE',\n", + " 'PARKS_NM', 'HADEVELOPT', 'HOUSING_PSA', 'X_COORD_CD', 'Y_COORD_CD',\n", + " 'SUSP_AGE_GROUP', 'SUSP_RACE', 'SUSP_SEX', 'TRANSIT_DISTRICT',\n", + " 'Latitude', 'Longitude', 'Lat_Lon', 'PATROL_BORO', 'STATION_NAME',\n", + " 'VIC_AGE_GROUP', 'VIC_RACE', 'VIC_SEX'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns\n", + "# df.dtypes\n", + "# df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1ac30b35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# According to the data dictionary, CMPLNT_NUM (Complaint Number) is randomly generated and persistent.\n", + "# Is it unique?\n", + "\n", + "df['CMPLNT_NUM'].is_unique" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f0c76e18", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 7821537\n", + "True 3962\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# It's not unique. That's unexpected.\n", + "\n", + "df.duplicated(subset = 'CMPLNT_NUM').value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "253ab2f0", + "metadata": {}, + "outputs": [], + "source": [ + "# Since CMPLNT_NUM is not unique, we can't use it as an index.\n", + "# Let's drop it.\n", + "\n", + "df.drop('CMPLNT_NUM', axis = 1, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7859f04c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CMPLNT_FR_DTCMPLNT_FR_TMCMPLNT_TO_DTCMPLNT_TO_TMADDR_PCT_CDRPT_DTKY_CDOFNS_DESCPD_CDPD_DESCCRM_ATPT_CPTD_CDLAW_CAT_CDBORO_NMLOC_OF_OCCUR_DESCPREM_TYP_DESCJURIS_DESCJURISDICTION_CODEPARKS_NMHADEVELOPTHOUSING_PSAX_COORD_CDY_COORD_CDSUSP_AGE_GROUPSUSP_RACESUSP_SEXTRANSIT_DISTRICTLatitudeLongitudeLat_LonPATROL_BOROSTATION_NAMEVIC_AGE_GROUPVIC_RACEVIC_SEX
012/31/201917:30:00NaNNaN32.012/31/2019118DANGEROUS WEAPONS793.0WEAPONS POSSESSION 3COMPLETEDFELONYMANHATTANNaNSTREETN.Y. POLICE DEPT0.0NaNNaNNaN999937.0238365.0NaNNaNNaNNaN40.820927-73.943324(40.82092679700002, -73.94332421899996)PATROL BORO MAN NORTHNaNUNKNOWNUNKNOWNE
112/29/201916:31:0012/29/201916:54:0047.012/29/2019113FORGERY729.0FORGERY,ETC.,UNCLASSIFIED-FELOCOMPLETEDFELONYBRONXNaNSTREETN.Y. POLICE DEPT0.0NaNNaNNaN1022508.0261990.0NaNNaNNaNNaN40.885701-73.861640(40.885701406000074, -73.86164032499995)PATROL BORO BRONXNaNUNKNOWNUNKNOWNE
212/15/201918:45:00NaNNaN109.012/29/2019578HARRASSMENT 2638.0HARASSMENT,SUBD 3,4,5COMPLETEDVIOLATIONQUEENSFRONT OFSTREETN.Y. POLICE DEPT0.0NaNNaNNaN1034178.0209758.025-44UNKNOWNMNaN40.742281-73.819824(40.74228115600005, -73.81982408)PATROL BORO QUEENS NORTHNaN25-44WHITE HISPANICF
\n", + "
" + ], + "text/plain": [ + " CMPLNT_FR_DT CMPLNT_FR_TM CMPLNT_TO_DT CMPLNT_TO_TM ADDR_PCT_CD \\\n", + "0 12/31/2019 17:30:00 NaN NaN 32.0 \n", + "1 12/29/2019 16:31:00 12/29/2019 16:54:00 47.0 \n", + "2 12/15/2019 18:45:00 NaN NaN 109.0 \n", + "\n", + " RPT_DT KY_CD OFNS_DESC PD_CD \\\n", + "0 12/31/2019 118 DANGEROUS WEAPONS 793.0 \n", + "1 12/29/2019 113 FORGERY 729.0 \n", + "2 12/29/2019 578 HARRASSMENT 2 638.0 \n", + "\n", + " PD_DESC CRM_ATPT_CPTD_CD LAW_CAT_CD BORO_NM \\\n", + "0 WEAPONS POSSESSION 3 COMPLETED FELONY MANHATTAN \n", + "1 FORGERY,ETC.,UNCLASSIFIED-FELO COMPLETED FELONY BRONX \n", + "2 HARASSMENT,SUBD 3,4,5 COMPLETED VIOLATION QUEENS \n", + "\n", + " LOC_OF_OCCUR_DESC PREM_TYP_DESC JURIS_DESC JURISDICTION_CODE \\\n", + "0 NaN STREET N.Y. POLICE DEPT 0.0 \n", + "1 NaN STREET N.Y. POLICE DEPT 0.0 \n", + "2 FRONT OF STREET N.Y. POLICE DEPT 0.0 \n", + "\n", + " PARKS_NM HADEVELOPT HOUSING_PSA X_COORD_CD Y_COORD_CD SUSP_AGE_GROUP \\\n", + "0 NaN NaN NaN 999937.0 238365.0 NaN \n", + "1 NaN NaN NaN 1022508.0 261990.0 NaN \n", + "2 NaN NaN NaN 1034178.0 209758.0 25-44 \n", + "\n", + " SUSP_RACE SUSP_SEX TRANSIT_DISTRICT Latitude Longitude \\\n", + "0 NaN NaN NaN 40.820927 -73.943324 \n", + "1 NaN NaN NaN 40.885701 -73.861640 \n", + "2 UNKNOWN M NaN 40.742281 -73.819824 \n", + "\n", + " Lat_Lon PATROL_BORO \\\n", + "0 (40.82092679700002, -73.94332421899996) PATROL BORO MAN NORTH \n", + "1 (40.885701406000074, -73.86164032499995) PATROL BORO BRONX \n", + "2 (40.74228115600005, -73.81982408) PATROL BORO QUEENS NORTH \n", + "\n", + " STATION_NAME VIC_AGE_GROUP VIC_RACE VIC_SEX \n", + "0 NaN UNKNOWN UNKNOWN E \n", + "1 NaN UNKNOWN UNKNOWN E \n", + "2 NaN 25-44 WHITE HISPANIC F " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(3)\n", + "# df.columns\n", + "# df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5fd666ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BORO CRIME_CLASS\n", + "BRONX MISDEMEANOR 1000078\n", + " FELONY 466248\n", + " VIOLATION 227655\n", + "BROOKLYN MISDEMEANOR 1249836\n", + " FELONY 754414\n", + " VIOLATION 308893\n", + "MANHATTAN MISDEMEANOR 1075687\n", + " FELONY 597184\n", + " VIOLATION 209421\n", + "QUEENS MISDEMEANOR 826883\n", + " FELONY 516528\n", + " VIOLATION 218301\n", + "STATEN ISLAND MISDEMEANOR 210270\n", + " FELONY 81032\n", + " VIOLATION 70589\n", + "Name: CRIME_CLASS, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Question 1\n", + "\n", + "# How does each borough compare according to the class of crime committed?\n", + "# But first, rename some columns to make the table more readable.\n", + "\n", + "df.rename(columns = {'LAW_CAT_CD': 'CRIME_CLASS', 'BORO_NM': 'BORO'}, inplace = True)\n", + "df.groupby(['BORO'])['CRIME_CLASS'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3cbcbd7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# A quick visualization of the above\n", + "\n", + "df.groupby(['BORO'])['CRIME_CLASS'].value_counts().plot(kind = 'bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1ef4d375", + "metadata": {}, + "outputs": [], + "source": [ + "# Question 2\n", + "\n", + "# Some incidents occurred in NYC parks, playgrounds or greenspaces.\n", + "# What crimes were reported most often and where?\n", + "\n", + "# Again, let's begin by renaming columns.\n", + "\n", + "# df.rename(columns = {'PARKS_NM': 'PUBLIC_SPACE', '', inplace = True}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}