{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a8d466b1", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": 2, "id": "1feb2733", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_13417/2268714868.py:1: DtypeWarning: Columns (18,20) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv('~/Downloads/NYPD_Complaint_Data_Historic.csv')\n" ] } ], "source": [ "df = pd.read_csv('~/Downloads/NYPD_Complaint_Data_Historic.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "5b1cdbba", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['CMPLNT_NUM', 'CMPLNT_FR_DT', 'CMPLNT_FR_TM', 'CMPLNT_TO_DT',\n", " 'CMPLNT_TO_TM', 'ADDR_PCT_CD', 'RPT_DT', 'KY_CD', 'OFNS_DESC', 'PD_CD',\n", " 'PD_DESC', 'CRM_ATPT_CPTD_CD', 'LAW_CAT_CD', 'BORO_NM',\n", " 'LOC_OF_OCCUR_DESC', 'PREM_TYP_DESC', 'JURIS_DESC', 'JURISDICTION_CODE',\n", " 'PARKS_NM', 'HADEVELOPT', 'HOUSING_PSA', 'X_COORD_CD', 'Y_COORD_CD',\n", " 'SUSP_AGE_GROUP', 'SUSP_RACE', 'SUSP_SEX', 'TRANSIT_DISTRICT',\n", " 'Latitude', 'Longitude', 'Lat_Lon', 'PATROL_BORO', 'STATION_NAME',\n", " 'VIC_AGE_GROUP', 'VIC_RACE', 'VIC_SEX'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns\n", "# df.dtypes\n", "# df.shape" ] }, { "cell_type": "code", "execution_count": 4, "id": "1ac30b35", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# According to the data dictionary, CMPLNT_NUM (Complaint Number) is randomly generated and persistent.\n", "# Is it unique?\n", "\n", "df['CMPLNT_NUM'].is_unique" ] }, { "cell_type": "code", "execution_count": 5, "id": "f0c76e18", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False 7821537\n", "True 3962\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# It's not unique. That's unexpected.\n", "\n", "df.duplicated(subset = 'CMPLNT_NUM').value_counts()" ] }, { "cell_type": "code", "execution_count": 6, "id": "253ab2f0", "metadata": {}, "outputs": [], "source": [ "# Since CMPLNT_NUM is not unique, we can't use it as an index.\n", "# Let's drop it.\n", "\n", "df.drop('CMPLNT_NUM', axis = 1, inplace = True)" ] }, { "cell_type": "code", "execution_count": 7, "id": "7859f04c", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | CMPLNT_FR_DT | \n", "CMPLNT_FR_TM | \n", "CMPLNT_TO_DT | \n", "CMPLNT_TO_TM | \n", "ADDR_PCT_CD | \n", "RPT_DT | \n", "KY_CD | \n", "OFNS_DESC | \n", "PD_CD | \n", "PD_DESC | \n", "CRM_ATPT_CPTD_CD | \n", "LAW_CAT_CD | \n", "BORO_NM | \n", "LOC_OF_OCCUR_DESC | \n", "PREM_TYP_DESC | \n", "JURIS_DESC | \n", "JURISDICTION_CODE | \n", "PARKS_NM | \n", "HADEVELOPT | \n", "HOUSING_PSA | \n", "X_COORD_CD | \n", "Y_COORD_CD | \n", "SUSP_AGE_GROUP | \n", "SUSP_RACE | \n", "SUSP_SEX | \n", "TRANSIT_DISTRICT | \n", "Latitude | \n", "Longitude | \n", "Lat_Lon | \n", "PATROL_BORO | \n", "STATION_NAME | \n", "VIC_AGE_GROUP | \n", "VIC_RACE | \n", "VIC_SEX | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "12/31/2019 | \n", "17:30:00 | \n", "NaN | \n", "NaN | \n", "32.0 | \n", "12/31/2019 | \n", "118 | \n", "DANGEROUS WEAPONS | \n", "793.0 | \n", "WEAPONS POSSESSION 3 | \n", "COMPLETED | \n", "FELONY | \n", "MANHATTAN | \n", "NaN | \n", "STREET | \n", "N.Y. POLICE DEPT | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "999937.0 | \n", "238365.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "40.820927 | \n", "-73.943324 | \n", "(40.82092679700002, -73.94332421899996) | \n", "PATROL BORO MAN NORTH | \n", "NaN | \n", "UNKNOWN | \n", "UNKNOWN | \n", "E | \n", "
1 | \n", "12/29/2019 | \n", "16:31:00 | \n", "12/29/2019 | \n", "16:54:00 | \n", "47.0 | \n", "12/29/2019 | \n", "113 | \n", "FORGERY | \n", "729.0 | \n", "FORGERY,ETC.,UNCLASSIFIED-FELO | \n", "COMPLETED | \n", "FELONY | \n", "BRONX | \n", "NaN | \n", "STREET | \n", "N.Y. POLICE DEPT | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1022508.0 | \n", "261990.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "40.885701 | \n", "-73.861640 | \n", "(40.885701406000074, -73.86164032499995) | \n", "PATROL BORO BRONX | \n", "NaN | \n", "UNKNOWN | \n", "UNKNOWN | \n", "E | \n", "
2 | \n", "12/15/2019 | \n", "18:45:00 | \n", "NaN | \n", "NaN | \n", "109.0 | \n", "12/29/2019 | \n", "578 | \n", "HARRASSMENT 2 | \n", "638.0 | \n", "HARASSMENT,SUBD 3,4,5 | \n", "COMPLETED | \n", "VIOLATION | \n", "QUEENS | \n", "FRONT OF | \n", "STREET | \n", "N.Y. POLICE DEPT | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1034178.0 | \n", "209758.0 | \n", "25-44 | \n", "UNKNOWN | \n", "M | \n", "NaN | \n", "40.742281 | \n", "-73.819824 | \n", "(40.74228115600005, -73.81982408) | \n", "PATROL BORO QUEENS NORTH | \n", "NaN | \n", "25-44 | \n", "WHITE HISPANIC | \n", "F | \n", "