diff --git a/python_project_1.ipynb b/python_project_1.ipynb new file mode 100644 index 0000000..1e1f4ec --- /dev/null +++ b/python_project_1.ipynb @@ -0,0 +1,923 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "7acc26cb", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c821dd0a", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('https://raw.githubusercontent.com/CunyLaguardiaDataAnalytics/datasets/master/2014-15_To_2016-17_School-_Level_NYC_Regents_Report_For_All_Variables.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "e080ce64", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(212331, 15)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "57651a37", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "School DBN object\n", + "School Name object\n", + "School Level object\n", + "Regents Exam object\n", + "Year int64\n", + "Total Tested int64\n", + "Mean Score object\n", + "Number Scoring Below 65 object\n", + "Percent Scoring Below 65 object\n", + "Number Scoring 65 or Above object\n", + "Percent Scoring 65 or Above object\n", + "Number Scoring 80 or Above object\n", + "Percent Scoring 80 or Above object\n", + "Number Scoring CR object\n", + "Percent Scoring CR object\n", + "dtype: object" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "7147a7d1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2015 2017\n" + ] + } + ], + "source": [ + "# Time range should be 2014-2015 to 2016-2017 school year. Does the 'Year' column reflect this range?\n", + "\n", + "print(df['Year'].min(), df['Year'].max())" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "897e335b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "School DBN 0\n", + "School Name 0\n", + "School Level 0\n", + "Regents Exam 10\n", + "Year 0\n", + "Total Tested 0\n", + "Mean Score 0\n", + "Number Scoring Below 65 0\n", + "Percent Scoring Below 65 0\n", + "Number Scoring 65 or Above 0\n", + "Percent Scoring 65 or Above 0\n", + "Number Scoring 80 or Above 0\n", + "Percent Scoring 80 or Above 0\n", + "Number Scoring CR 0\n", + "Percent Scoring CR 0\n", + "dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Is there any missing data?\n", + "\n", + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "e0ab351b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
School DBNSchool NameSchool LevelRegents ExamYearTotal TestedMean ScoreNumber Scoring Below 65Percent Scoring Below 65Number Scoring 65 or AbovePercent Scoring 65 or AboveNumber Scoring 80 or AbovePercent Scoring 80 or AboveNumber Scoring CRPercent Scoring CR
1865484M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20159055.66167.82932.266.700
5275684M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20158155.95567.92632.156.200
5275784M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN2015953.4666.7333.3111.100
10086584M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20155sssssssnana
10086684M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20155457.434632037611.100
10086784M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN201531sssssssnana
13830084M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20155457.33463203759.300
13830184M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20153653.2277592512.800
20978584M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN2015125997532518.300
20978684M478Inwood Academy for Leadership Charter SchoolSecondary SchoolNaN20157855.15266.72633.356.400
\n", + "
" + ], + "text/plain": [ + " School DBN School Name \\\n", + "18654 84M478 Inwood Academy for Leadership Charter School \n", + "52756 84M478 Inwood Academy for Leadership Charter School \n", + "52757 84M478 Inwood Academy for Leadership Charter School \n", + "100865 84M478 Inwood Academy for Leadership Charter School \n", + "100866 84M478 Inwood Academy for Leadership Charter School \n", + "100867 84M478 Inwood Academy for Leadership Charter School \n", + "138300 84M478 Inwood Academy for Leadership Charter School \n", + "138301 84M478 Inwood Academy for Leadership Charter School \n", + "209785 84M478 Inwood Academy for Leadership Charter School \n", + "209786 84M478 Inwood Academy for Leadership Charter School \n", + "\n", + " School Level Regents Exam Year Total Tested Mean Score \\\n", + "18654 Secondary School NaN 2015 90 55.6 \n", + "52756 Secondary School NaN 2015 81 55.9 \n", + "52757 Secondary School NaN 2015 9 53.4 \n", + "100865 Secondary School NaN 2015 5 s \n", + "100866 Secondary School NaN 2015 54 57.4 \n", + "100867 Secondary School NaN 2015 31 s \n", + "138300 Secondary School NaN 2015 54 57.3 \n", + "138301 Secondary School NaN 2015 36 53.2 \n", + "209785 Secondary School NaN 2015 12 59 \n", + "209786 Secondary School NaN 2015 78 55.1 \n", + "\n", + " Number Scoring Below 65 Percent Scoring Below 65 \\\n", + "18654 61 67.8 \n", + "52756 55 67.9 \n", + "52757 6 66.7 \n", + "100865 s s \n", + "100866 34 63 \n", + "100867 s s \n", + "138300 34 63 \n", + "138301 27 75 \n", + "209785 9 75 \n", + "209786 52 66.7 \n", + "\n", + " Number Scoring 65 or Above Percent Scoring 65 or Above \\\n", + "18654 29 32.2 \n", + "52756 26 32.1 \n", + "52757 3 33.3 \n", + "100865 s s \n", + "100866 20 37 \n", + "100867 s s \n", + "138300 20 37 \n", + "138301 9 25 \n", + "209785 3 25 \n", + "209786 26 33.3 \n", + "\n", + " Number Scoring 80 or Above Percent Scoring 80 or Above \\\n", + "18654 6 6.7 \n", + "52756 5 6.2 \n", + "52757 1 11.1 \n", + "100865 s s \n", + "100866 6 11.1 \n", + "100867 s s \n", + "138300 5 9.3 \n", + "138301 1 2.8 \n", + "209785 1 8.3 \n", + "209786 5 6.4 \n", + "\n", + " Number Scoring CR Percent Scoring CR \n", + "18654 0 0 \n", + "52756 0 0 \n", + "52757 0 0 \n", + "100865 na na \n", + "100866 0 0 \n", + "100867 na na \n", + "138300 0 0 \n", + "138301 0 0 \n", + "209785 0 0 \n", + "209786 0 0 " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['Regents Exam'].isna()]" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "c9a49215", + "metadata": {}, + "outputs": [], + "source": [ + "# Cleaning the data\n", + "\n", + "# There are ten missing values. One school is responsible. We'll drop that school from the dataset\n", + "\n", + "df.dropna(inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "1f05cd97", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 137101 entries, 1 to 212325\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 School DBN 137101 non-null object \n", + " 1 School Name 137101 non-null object \n", + " 2 School Level 137101 non-null object \n", + " 3 Regents Exam 137101 non-null object \n", + " 4 Year 137101 non-null int64 \n", + " 5 Total Tested 137101 non-null int64 \n", + " 6 Mean Score 137101 non-null float64\n", + " 7 Number Scoring Below 65 137101 non-null object \n", + " 8 Percent Scoring Below 65 137101 non-null object \n", + " 9 Number Scoring 65 or Above 137101 non-null object \n", + " 10 Percent Scoring 65 or Above 137101 non-null object \n", + " 11 Number Scoring 80 or Above 137101 non-null object \n", + " 12 Percent Scoring 80 or Above 137101 non-null object \n", + " 13 Number Scoring CR 137101 non-null object \n", + " 14 Percent Scoring CR 137101 non-null object \n", + "dtypes: float64(1), int64(2), object(12)\n", + "memory usage: 16.7+ MB\n" + ] + } + ], + "source": [ + "# 'Mean Score'\n", + "\n", + "# We expect integers or floats. Instead we have objects.\n", + "# Can we convert them to floats?\n", + "\n", + "# Yes, but first we have to deal with the nonsense value 's'.\n", + "# We don't know what 's' means so let's make a subset then\n", + "# convert the mean scores to floats.\n", + "\n", + "df = df[df['Mean Score'] != 's']\n", + "df['Mean Score'] = pd.to_numeric(df['Mean Score'])\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "f62fbc11", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('O')" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 'Percent Scoring 80 or Above': an interesting column.\n", + "# This column's data type also needs to be recast.\n", + "\n", + "df['Percent Scoring 80 or Above'].dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "e3550ecb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Percent Scoring 80 or Above'] = pd.to_numeric(df['Percent Scoring 80 or Above'])\n", + "df['Percent Scoring 80 or Above'].dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "138ebba4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
School DBNSchool NameSchool LevelRegents ExamYearTotal TestedMean ScoreNumber Scoring Below 65Percent Scoring Below 65Number Scoring 65 or AbovePercent Scoring 65 or AboveNumber Scoring 80 or AbovePercent Scoring 80 or AboveNumber Scoring CRPercent Scoring CR
101M034P.S. 034 Franklin D. RooseveltK-8Living Environment20151677.916.31593.8743.8nana
201M034P.S. 034 Franklin D. RooseveltK-8Living Environment2016974.0111.1888.9222.2nana
501M140P.S. 140 Nathan StrausK-8Living Environment2015967.4333.3666.700.0nana
601M140P.S. 140 Nathan StrausK-8Living Environment20161572.6213.31386.7533.3nana
701M140P.S. 140 Nathan StrausK-8Living Environment2017964.4555.6444.4111.1nana
................................................
21232184X717Icahn Charter SchoolK-8Common Core Algebra2016687.20061006100.06100
21232284X717Icahn Charter SchoolK-8Common Core Algebra2017687.00061006100.06100
21232384X717Icahn Charter SchoolK-8Common Core Algebra2017688.3006100583.36100
21232484X717Icahn Charter SchoolK-8Living Environment2015876.5112.5787.5225.0nana
21232584X717Icahn Charter SchoolK-8Living Environment2015679.8006100233.3nana
\n", + "

137101 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " School DBN School Name School Level \\\n", + "1 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", + "2 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", + "5 01M140 P.S. 140 Nathan Straus K-8 \n", + "6 01M140 P.S. 140 Nathan Straus K-8 \n", + "7 01M140 P.S. 140 Nathan Straus K-8 \n", + "... ... ... ... \n", + "212321 84X717 Icahn Charter School K-8 \n", + "212322 84X717 Icahn Charter School K-8 \n", + "212323 84X717 Icahn Charter School K-8 \n", + "212324 84X717 Icahn Charter School K-8 \n", + "212325 84X717 Icahn Charter School K-8 \n", + "\n", + " Regents Exam Year Total Tested Mean Score \\\n", + "1 Living Environment 2015 16 77.9 \n", + "2 Living Environment 2016 9 74.0 \n", + "5 Living Environment 2015 9 67.4 \n", + "6 Living Environment 2016 15 72.6 \n", + "7 Living Environment 2017 9 64.4 \n", + "... ... ... ... ... \n", + "212321 Common Core Algebra 2016 6 87.2 \n", + "212322 Common Core Algebra 2017 6 87.0 \n", + "212323 Common Core Algebra 2017 6 88.3 \n", + "212324 Living Environment 2015 8 76.5 \n", + "212325 Living Environment 2015 6 79.8 \n", + "\n", + " Number Scoring Below 65 Percent Scoring Below 65 \\\n", + "1 1 6.3 \n", + "2 1 11.1 \n", + "5 3 33.3 \n", + "6 2 13.3 \n", + "7 5 55.6 \n", + "... ... ... \n", + "212321 0 0 \n", + "212322 0 0 \n", + "212323 0 0 \n", + "212324 1 12.5 \n", + "212325 0 0 \n", + "\n", + " Number Scoring 65 or Above Percent Scoring 65 or Above \\\n", + "1 15 93.8 \n", + "2 8 88.9 \n", + "5 6 66.7 \n", + "6 13 86.7 \n", + "7 4 44.4 \n", + "... ... ... \n", + "212321 6 100 \n", + "212322 6 100 \n", + "212323 6 100 \n", + "212324 7 87.5 \n", + "212325 6 100 \n", + "\n", + " Number Scoring 80 or Above Percent Scoring 80 or Above \\\n", + "1 7 43.8 \n", + "2 2 22.2 \n", + "5 0 0.0 \n", + "6 5 33.3 \n", + "7 1 11.1 \n", + "... ... ... \n", + "212321 6 100.0 \n", + "212322 6 100.0 \n", + "212323 5 83.3 \n", + "212324 2 25.0 \n", + "212325 2 33.3 \n", + "\n", + " Number Scoring CR Percent Scoring CR \n", + "1 na na \n", + "2 na na \n", + "5 na na \n", + "6 na na \n", + "7 na na \n", + "... ... ... \n", + "212321 6 100 \n", + "212322 6 100 \n", + "212323 6 100 \n", + "212324 na na \n", + "212325 na na \n", + "\n", + "[137101 rows x 15 columns]" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}