diff --git a/python_project_1.ipynb b/python_project_1.ipynb index 1e1f4ec..ff539b3 100644 --- a/python_project_1.ipynb +++ b/python_project_1.ipynb @@ -2,17 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 77, "id": "7acc26cb", "metadata": {}, "outputs": [], "source": [ - "import pandas as pd" + "import pandas as pd\n", + "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 30, "id": "c821dd0a", "metadata": {}, "outputs": [], @@ -22,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 31, "id": "e080ce64", "metadata": {}, "outputs": [ @@ -32,7 +33,7 @@ "(212331, 15)" ] }, - "execution_count": 71, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -43,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 32, "id": "57651a37", "metadata": { "scrolled": true @@ -70,7 +71,7 @@ "dtype: object" ] }, - "execution_count": 45, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -81,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 33, "id": "7147a7d1", "metadata": {}, "outputs": [ @@ -101,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 34, "id": "897e335b", "metadata": {}, "outputs": [ @@ -126,7 +127,7 @@ "dtype: int64" ] }, - "execution_count": 66, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -139,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 35, "id": "e0ab351b", "metadata": {}, "outputs": [ @@ -440,7 +441,7 @@ "209786 0 0 " ] }, - "execution_count": 79, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -451,118 +452,37 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 36, "id": "c9a49215", "metadata": {}, "outputs": [], "source": [ "# Cleaning the data\n", "\n", - "# There are ten missing values. One school is responsible. We'll drop that school from the dataset\n", + "# There are ten missing values in the 'Regents Exam' column. One school is responsible.\n", + "# We'll drop that school from the dataset\n", "\n", "df.dropna(inplace = True)" ] }, { "cell_type": "code", - "execution_count": 137, - "id": "1f05cd97", + "execution_count": 42, + "id": "738ee993", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 137101 entries, 1 to 212325\n", - "Data columns (total 15 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 School DBN 137101 non-null object \n", - " 1 School Name 137101 non-null object \n", - " 2 School Level 137101 non-null object \n", - " 3 Regents Exam 137101 non-null object \n", - " 4 Year 137101 non-null int64 \n", - " 5 Total Tested 137101 non-null int64 \n", - " 6 Mean Score 137101 non-null float64\n", - " 7 Number Scoring Below 65 137101 non-null object \n", - " 8 Percent Scoring Below 65 137101 non-null object \n", - " 9 Number Scoring 65 or Above 137101 non-null object \n", - " 10 Percent Scoring 65 or Above 137101 non-null object \n", - " 11 Number Scoring 80 or Above 137101 non-null object \n", - " 12 Percent Scoring 80 or Above 137101 non-null object \n", - " 13 Number Scoring CR 137101 non-null object \n", - " 14 Percent Scoring CR 137101 non-null object \n", - "dtypes: float64(1), int64(2), object(12)\n", - "memory usage: 16.7+ MB\n" - ] - } - ], + "outputs": [], "source": [ - "# 'Mean Score'\n", + "# We're interested in two score columns. We'll drop the other columns.\n", "\n", - "# We expect integers or floats. Instead we have objects.\n", - "# Can we convert them to floats?\n", + "drop_cols = ['Number Scoring Below 65', 'Percent Scoring Below 65', 'Number Scoring 65 or Above', 'Percent Scoring 65 or Above', 'Number Scoring 80 or Above', 'Number Scoring CR', 'Percent Scoring CR']\n", "\n", - "# Yes, but first we have to deal with the nonsense value 's'.\n", - "# We don't know what 's' means so let's make a subset then\n", - "# convert the mean scores to floats.\n", - "\n", - "df = df[df['Mean Score'] != 's']\n", - "df['Mean Score'] = pd.to_numeric(df['Mean Score'])\n", - "df.info()" + "df.drop(drop_cols, axis = 1, inplace = True)" ] }, { "cell_type": "code", - "execution_count": 142, - "id": "f62fbc11", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dtype('O')" - ] - }, - "execution_count": 142, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# 'Percent Scoring 80 or Above': an interesting column.\n", - "# This column's data type also needs to be recast.\n", - "\n", - "df['Percent Scoring 80 or Above'].dtype" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "id": "e3550ecb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dtype('float64')" - ] - }, - "execution_count": 146, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['Percent Scoring 80 or Above'] = pd.to_numeric(df['Percent Scoring 80 or Above'])\n", - "df['Percent Scoring 80 or Above'].dtype" - ] - }, - { - "cell_type": "code", - "execution_count": 148, - "id": "138ebba4", + "execution_count": 44, + "id": "258073d0", "metadata": {}, "outputs": [ { @@ -593,18 +513,22 @@ " Year\n", " Total Tested\n", " Mean Score\n", - " Number Scoring Below 65\n", - " Percent Scoring Below 65\n", - " Number Scoring 65 or Above\n", - " Percent Scoring 65 or Above\n", - " Number Scoring 80 or Above\n", " Percent Scoring 80 or Above\n", - " Number Scoring CR\n", - " Percent Scoring CR\n", " \n", " \n", " \n", " \n", + " 0\n", + " 01M034\n", + " P.S. 034 Franklin D. Roosevelt\n", + " K-8\n", + " Common Core Algebra\n", + " 2017\n", + " 4\n", + " s\n", + " s\n", + " \n", + " \n", " 1\n", " 01M034\n", " P.S. 034 Franklin D. Roosevelt\n", @@ -613,14 +537,7 @@ " 2015\n", " 16\n", " 77.9\n", - " 1\n", - " 6.3\n", - " 15\n", - " 93.8\n", - " 7\n", " 43.8\n", - " na\n", - " na\n", " \n", " \n", " 2\n", @@ -630,273 +547,612 @@ " Living Environment\n", " 2016\n", " 9\n", - " 74.0\n", - " 1\n", - " 11.1\n", - " 8\n", - " 88.9\n", - " 2\n", + " 74\n", " 22.2\n", - " na\n", - " na\n", " \n", " \n", - " 5\n", + " 3\n", " 01M140\n", " P.S. 140 Nathan Straus\n", " K-8\n", - " Living Environment\n", - " 2015\n", - " 9\n", - " 67.4\n", + " Common Core Algebra\n", + " 2016\n", " 3\n", - " 33.3\n", - " 6\n", - " 66.7\n", - " 0\n", - " 0.0\n", - " na\n", - " na\n", + " s\n", + " s\n", " \n", " \n", - " 6\n", + " 4\n", " 01M140\n", " P.S. 140 Nathan Straus\n", " K-8\n", - " Living Environment\n", - " 2016\n", - " 15\n", - " 72.6\n", - " 2\n", - " 13.3\n", - " 13\n", - " 86.7\n", - " 5\n", - " 33.3\n", - " na\n", - " na\n", - " \n", - " \n", - " 7\n", - " 01M140\n", - " P.S. 140 Nathan Straus\n", - " K-8\n", - " Living Environment\n", - " 2017\n", - " 9\n", - " 64.4\n", - " 5\n", - " 55.6\n", - " 4\n", - " 44.4\n", - " 1\n", - " 11.1\n", - " na\n", - " na\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 212321\n", - " 84X717\n", - " Icahn Charter School\n", - " K-8\n", - " Common Core Algebra\n", - " 2016\n", - " 6\n", - " 87.2\n", - " 0\n", - " 0\n", - " 6\n", - " 100\n", - " 6\n", - " 100.0\n", - " 6\n", - " 100\n", - " \n", - " \n", - " 212322\n", - " 84X717\n", - " Icahn Charter School\n", - " K-8\n", " Common Core Algebra\n", " 2017\n", - " 6\n", - " 87.0\n", - " 0\n", - " 0\n", - " 6\n", - " 100\n", - " 6\n", - " 100.0\n", - " 6\n", - " 100\n", - " \n", - " \n", - " 212323\n", - " 84X717\n", - " Icahn Charter School\n", - " K-8\n", - " Common Core Algebra\n", - " 2017\n", - " 6\n", - " 88.3\n", - " 0\n", - " 0\n", - " 6\n", - " 100\n", - " 5\n", - " 83.3\n", - " 6\n", - " 100\n", - " \n", - " \n", - " 212324\n", - " 84X717\n", - " Icahn Charter School\n", - " K-8\n", - " Living Environment\n", - " 2015\n", - " 8\n", - " 76.5\n", - " 1\n", - " 12.5\n", - " 7\n", - " 87.5\n", " 2\n", - " 25.0\n", - " na\n", - " na\n", - " \n", - " \n", - " 212325\n", - " 84X717\n", - " Icahn Charter School\n", - " K-8\n", - " Living Environment\n", - " 2015\n", - " 6\n", - " 79.8\n", - " 0\n", - " 0\n", - " 6\n", - " 100\n", - " 2\n", - " 33.3\n", - " na\n", - " na\n", + " s\n", + " s\n", " \n", " \n", "\n", - "

137101 rows × 15 columns

\n", "" ], "text/plain": [ - " School DBN School Name School Level \\\n", - "1 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", - "2 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", - "5 01M140 P.S. 140 Nathan Straus K-8 \n", - "6 01M140 P.S. 140 Nathan Straus K-8 \n", - "7 01M140 P.S. 140 Nathan Straus K-8 \n", - "... ... ... ... \n", - "212321 84X717 Icahn Charter School K-8 \n", - "212322 84X717 Icahn Charter School K-8 \n", - "212323 84X717 Icahn Charter School K-8 \n", - "212324 84X717 Icahn Charter School K-8 \n", - "212325 84X717 Icahn Charter School K-8 \n", + " School DBN School Name School Level \\\n", + "0 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", + "1 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", + "2 01M034 P.S. 034 Franklin D. Roosevelt K-8 \n", + "3 01M140 P.S. 140 Nathan Straus K-8 \n", + "4 01M140 P.S. 140 Nathan Straus K-8 \n", "\n", - " Regents Exam Year Total Tested Mean Score \\\n", - "1 Living Environment 2015 16 77.9 \n", - "2 Living Environment 2016 9 74.0 \n", - "5 Living Environment 2015 9 67.4 \n", - "6 Living Environment 2016 15 72.6 \n", - "7 Living Environment 2017 9 64.4 \n", - "... ... ... ... ... \n", - "212321 Common Core Algebra 2016 6 87.2 \n", - "212322 Common Core Algebra 2017 6 87.0 \n", - "212323 Common Core Algebra 2017 6 88.3 \n", - "212324 Living Environment 2015 8 76.5 \n", - "212325 Living Environment 2015 6 79.8 \n", + " Regents Exam Year Total Tested Mean Score \\\n", + "0 Common Core Algebra 2017 4 s \n", + "1 Living Environment 2015 16 77.9 \n", + "2 Living Environment 2016 9 74 \n", + "3 Common Core Algebra 2016 3 s \n", + "4 Common Core Algebra 2017 2 s \n", "\n", - " Number Scoring Below 65 Percent Scoring Below 65 \\\n", - "1 1 6.3 \n", - "2 1 11.1 \n", - "5 3 33.3 \n", - "6 2 13.3 \n", - "7 5 55.6 \n", - "... ... ... \n", - "212321 0 0 \n", - "212322 0 0 \n", - "212323 0 0 \n", - "212324 1 12.5 \n", - "212325 0 0 \n", - "\n", - " Number Scoring 65 or Above Percent Scoring 65 or Above \\\n", - "1 15 93.8 \n", - "2 8 88.9 \n", - "5 6 66.7 \n", - "6 13 86.7 \n", - "7 4 44.4 \n", - "... ... ... \n", - "212321 6 100 \n", - "212322 6 100 \n", - "212323 6 100 \n", - "212324 7 87.5 \n", - "212325 6 100 \n", - "\n", - " Number Scoring 80 or Above Percent Scoring 80 or Above \\\n", - "1 7 43.8 \n", - "2 2 22.2 \n", - "5 0 0.0 \n", - "6 5 33.3 \n", - "7 1 11.1 \n", - "... ... ... \n", - "212321 6 100.0 \n", - "212322 6 100.0 \n", - "212323 5 83.3 \n", - "212324 2 25.0 \n", - "212325 2 33.3 \n", - "\n", - " Number Scoring CR Percent Scoring CR \n", - "1 na na \n", - "2 na na \n", - "5 na na \n", - "6 na na \n", - "7 na na \n", - "... ... ... \n", - "212321 6 100 \n", - "212322 6 100 \n", - "212323 6 100 \n", - "212324 na na \n", - "212325 na na \n", - "\n", - "[137101 rows x 15 columns]" + " Percent Scoring 80 or Above \n", + "0 s \n", + "1 43.8 \n", + "2 22.2 \n", + "3 s \n", + "4 s " ] }, - "execution_count": 148, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df" + "df.head()" ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "1f05cd97", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 137101 entries, 1 to 212325\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 School DBN 137101 non-null object \n", + " 1 School Name 137101 non-null object \n", + " 2 School Level 137101 non-null object \n", + " 3 Regents Exam 137101 non-null object \n", + " 4 Year 137101 non-null int64 \n", + " 5 Total Tested 137101 non-null int64 \n", + " 6 Mean Score 137101 non-null float64\n", + " 7 Percent Scoring 80 or Above 137101 non-null float64\n", + "dtypes: float64(2), int64(2), object(4)\n", + "memory usage: 9.4+ MB\n" + ] + } + ], + "source": [ + "# 'Mean Score' and 'Percent Scoring 80 or Above'\n", + "\n", + "# We expect integers or floats. Instead we have objects.\n", + "# Can we convert them to floats?\n", + "\n", + "# Yes, but first we have to deal with the non-numeric value 's'.\n", + "# We don't know what 's' means so let's make a subset\n", + "# then convert the scores to floats.\n", + "\n", + "df = df[df['Mean Score'] != 's']\n", + "df['Mean Score'] = pd.to_numeric(df['Mean Score'])\n", + "df = df[df['Percent Scoring 80 or Above'] != 's']\n", + "df['Percent Scoring 80 or Above'] = pd.to_numeric(df['Percent Scoring 80 or Above'])\n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "f86ea927", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
School DBNSchool NameSchool LevelRegents ExamYearTotal TestedMean ScorePercent Scoring 80 or Above
222202M605Humanities Preparatory AcademyHigh schoolCommon Core English20163874.236.8
222302M605Humanities Preparatory AcademyHigh schoolCommon Core English20176377.854.0
222402M605Humanities Preparatory AcademyHigh schoolEnglish20156574.130.8
2347902M605Humanities Preparatory AcademyHigh schoolCommon Core English20163074.940.0
2348002M605Humanities Preparatory AcademyHigh schoolCommon Core English2016871.925.0
5985402M605Humanities Preparatory AcademyHigh schoolCommon Core English20162276.845.5
5985702M605Humanities Preparatory AcademyHigh schoolCommon Core English20174577.555.6
5986002M605Humanities Preparatory AcademyHigh schoolEnglish20154973.326.5
10711302M605Humanities Preparatory AcademyHigh schoolCommon Core English20162377.852.2
10711402M605Humanities Preparatory AcademyHigh schoolCommon Core English20161568.813.3
10711502M605Humanities Preparatory AcademyHigh schoolCommon Core English20173677.044.4
10711602M605Humanities Preparatory AcademyHigh schoolCommon Core English20172778.966.7
10711702M605Humanities Preparatory AcademyHigh schoolEnglish20153476.335.3
10711802M605Humanities Preparatory AcademyHigh schoolEnglish20153171.825.8
14886002M605Humanities Preparatory AcademyHigh schoolCommon Core English20161070.030.0
14886102M605Humanities Preparatory AcademyHigh schoolCommon Core English20162275.436.4
14886402M605Humanities Preparatory AcademyHigh schoolCommon Core English2017676.750.0
14886502M605Humanities Preparatory AcademyHigh schoolCommon Core English20171678.650.0
14886602M605Humanities Preparatory AcademyHigh schoolCommon Core English20173476.952.9
14886702M605Humanities Preparatory AcademyHigh schoolCommon Core English2017781.171.4
14886902M605Humanities Preparatory AcademyHigh schoolEnglish20152575.340.0
14887002M605Humanities Preparatory AcademyHigh schoolEnglish20153374.824.2
\n", + "
" + ], + "text/plain": [ + " School DBN School Name School Level \\\n", + "2222 02M605 Humanities Preparatory Academy High school \n", + "2223 02M605 Humanities Preparatory Academy High school \n", + "2224 02M605 Humanities Preparatory Academy High school \n", + "23479 02M605 Humanities Preparatory Academy High school \n", + "23480 02M605 Humanities Preparatory Academy High school \n", + "59854 02M605 Humanities Preparatory Academy High school \n", + "59857 02M605 Humanities Preparatory Academy High school \n", + "59860 02M605 Humanities Preparatory Academy High school \n", + "107113 02M605 Humanities Preparatory Academy High school \n", + "107114 02M605 Humanities Preparatory Academy High school \n", + "107115 02M605 Humanities Preparatory Academy High school \n", + "107116 02M605 Humanities Preparatory Academy High school \n", + "107117 02M605 Humanities Preparatory Academy High school \n", + "107118 02M605 Humanities Preparatory Academy High school \n", + "148860 02M605 Humanities Preparatory Academy High school \n", + "148861 02M605 Humanities Preparatory Academy High school \n", + "148864 02M605 Humanities Preparatory Academy High school \n", + "148865 02M605 Humanities Preparatory Academy High school \n", + "148866 02M605 Humanities Preparatory Academy High school \n", + "148867 02M605 Humanities Preparatory Academy High school \n", + "148869 02M605 Humanities Preparatory Academy High school \n", + "148870 02M605 Humanities Preparatory Academy High school \n", + "\n", + " Regents Exam Year Total Tested Mean Score \\\n", + "2222 Common Core English 2016 38 74.2 \n", + "2223 Common Core English 2017 63 77.8 \n", + "2224 English 2015 65 74.1 \n", + "23479 Common Core English 2016 30 74.9 \n", + "23480 Common Core English 2016 8 71.9 \n", + "59854 Common Core English 2016 22 76.8 \n", + "59857 Common Core English 2017 45 77.5 \n", + "59860 English 2015 49 73.3 \n", + "107113 Common Core English 2016 23 77.8 \n", + "107114 Common Core English 2016 15 68.8 \n", + "107115 Common Core English 2017 36 77.0 \n", + "107116 Common Core English 2017 27 78.9 \n", + "107117 English 2015 34 76.3 \n", + "107118 English 2015 31 71.8 \n", + "148860 Common Core English 2016 10 70.0 \n", + "148861 Common Core English 2016 22 75.4 \n", + "148864 Common Core English 2017 6 76.7 \n", + "148865 Common Core English 2017 16 78.6 \n", + "148866 Common Core English 2017 34 76.9 \n", + "148867 Common Core English 2017 7 81.1 \n", + "148869 English 2015 25 75.3 \n", + "148870 English 2015 33 74.8 \n", + "\n", + " Percent Scoring 80 or Above \n", + "2222 36.8 \n", + "2223 54.0 \n", + "2224 30.8 \n", + "23479 40.0 \n", + "23480 25.0 \n", + "59854 45.5 \n", + "59857 55.6 \n", + "59860 26.5 \n", + "107113 52.2 \n", + "107114 13.3 \n", + "107115 44.4 \n", + "107116 66.7 \n", + "107117 35.3 \n", + "107118 25.8 \n", + "148860 30.0 \n", + "148861 36.4 \n", + "148864 50.0 \n", + "148865 50.0 \n", + "148866 52.9 \n", + "148867 71.4 \n", + "148869 40.0 \n", + "148870 24.2 " + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Filter the dataset. Let's have a look at the Humanities Preparatory Academy High School in Chelsea\n", + "\n", + "df[df['School DBN'] == '02M605']" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "1cc0ab0a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2015 74.266667\n", + "2016 73.725000\n", + "2017 78.062500\n", + "Name: Mean Score, dtype: float64" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# How did this school perform over three years?\n", + "\n", + "df[df['School DBN'] == '02M605'].groupby(['Year'])['Mean Score'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "8ad44a32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2015 70.562738\n", + "2016 70.612695\n", + "2017 72.814383\n", + "Name: Mean Score, dtype: float64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Other high schools in Manhattan\n", + "\n", + "df[(df['School DBN'] != '02M605')\n", + " & (df['School Level'] == 'High school')\n", + " & (df['School DBN'].str.contains('M'))\n", + " & (df['Regents Exam']).str.contains('English')\n", + " ].groupby(['Year'])['Mean Score'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "3b420509", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2015 67.399191\n", + "2016 65.028724\n", + "2017 67.557191\n", + "Name: Mean Score, dtype: float64" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# High schools in Bronx, Queens, Brooklyn and Staten Island\n", + "\n", + "df[(df['School DBN'] != '02M605')\n", + " & (df['School Level'] == 'High school')\n", + " & (~df['School DBN'].str.contains('M'))\n", + " & (df['Regents Exam']).str.contains('English')\n", + " ].groupby(['Year'])['Mean Score'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "7b5fd34a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEWCAYAAAB/tMx4AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQqElEQVR4nO3de4xnZ13H8ffH3WJLEdptZzcrS1mMa7HWtIUJog0GXNYUQXbVtBSFTLC6/8g1GFwvCSiJqUaJEC/JBpBRoLZUyK5ggM1INQgUphehZVuXS7vULrtDKZeWcGn79Y85y06nsztnfnP57bPzfiWTc85zzm/ON3kmnzzz/M4lVYUkqT0/MuwCJEmDMcAlqVEGuCQ1ygCXpEYZ4JLUKANckhq1diVPdu6559bmzZtX8pSS1Lybbrrpa1U1Mru9V4AneR3wO0ABnwNeATweuBbYDNwFXFFV95/o92zevJnJyckFFS5Jq12Su+dqn3cKJcmTgVcDo1V1IbAGuBLYBUxU1RZgotuWJK2QvnPga4EzkqxleuR9L7AdGO/2jwM7lrw6SdJxzRvgVfV/wF8BB4FDwDer6qPAhqo61B1zCFi/nIVKkh6tzxTK2UyPtp8G/DhwZpKX9T1Bkp1JJpNMTk1NDV6pJOlR+kyhPB/4clVNVdUPgPcDvwAcTrIRoFsemevDVbW7qkaranRk5DFfokqSBtQnwA8Cz07y+CQBtgL7gb3AWHfMGLBneUqUJM1l3ssIq+rGJNcDNwMPAbcAu4EnANcluYrpkL98OQuVJD1ar+vAq+qNwBtnNX+P6dG4JGkIVvROTEmrw+ZdHxp2CcvqrqtfOOwSAJ+FIknNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTql78T0bjBJpzJH4JLUqFN6BK52+d+TND9H4JLUKANckhplgEtSowxwSWqUAS5JjZo3wJOcn+TWGT/fSvLaJOuS7EtyoFuevRIFS5KmzRvgVXVnVV1cVRcDzwS+A3wA2AVMVNUWYKLbliStkIVOoWwFvlhVdwPbgfGufRzYsYR1SZLmsdAAvxK4plvfUFWHALrl+qUsTJJ0Yr0DPMnjgBcD71vICZLsTDKZZHJqamqh9UmSjmMhI/AXADdX1eFu+3CSjQDd8shcH6qq3VU1WlWjIyMji6tWkvRDCwnwl3Js+gRgLzDWrY8Be5aqKEnS/HoFeJLHA9uA989ovhrYluRAt+/qpS9PknQ8vZ5GWFXfAc6Z1XYf01elSJKGwDsxJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVF9X6l2VpLrk9yRZH+Sn0+yLsm+JAe65dnLXawk6Zi+I/C3Ah+uqqcDFwH7gV3ARFVtASa6bUnSCpk3wJM8EfhF4B0AVfX9qvoGsB0Y7w4bB3YsT4mSpLn0GYH/BDAF/GOSW5K8PcmZwIaqOgTQLdcvY52SpFn6BPha4BnAP1TVJcCDLGC6JMnOJJNJJqempgYsU5I0W58Avwe4p6pu7LavZzrQDyfZCNAtj8z14araXVWjVTU6MjKyFDVLkugR4FX1VeArSc7vmrYCnwf2AmNd2xiwZ1kqlCTNaW3P414FvCfJ44AvAa9gOvyvS3IVcBC4fHlKlCTNpVeAV9WtwOgcu7YuaTWSpN68E1OSGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqN6vVItyV3At4GHgYeqajTJOuBaYDNwF3BFVd2/PGVKkmZbyAj8eVV1cVUdfTfmLmCiqrYAE922JGmFLGYKZTsw3q2PAzsWXY0kqbe+AV7AR5PclGRn17ahqg4BdMv1y1GgJGluvebAgUur6t4k64F9Se7oe4Iu8HcCnHfeeQOUKEmaS68ReFXd2y2PAB8AngUcTrIRoFseOc5nd1fVaFWNjoyMLE3VkqT5AzzJmUl+7Og68MvAbcBeYKw7bAzYs1xFSpIeq88UygbgA0mOHv/eqvpwks8A1yW5CjgIXL58ZUqSZps3wKvqS8BFc7TfB2xdjqIkSfPzTkxJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY3qHeBJ1iS5JckHu+11SfYlOdAtz16+MiVJsy1kBP4aYP+M7V3ARFVtASa6bUnSCukV4Ek2AS8E3j6jeTsw3q2PAzuWtDJJ0gn1HYH/DfAG4JEZbRuq6hBAt1w/1weT7EwymWRyampqMbVKkmaYN8CTvAg4UlU3DXKCqtpdVaNVNToyMjLIr5AkzWFtj2MuBV6c5FeA04EnJnk3cDjJxqo6lGQjcGQ5C5UkPdq8I/Cq+sOq2lRVm4Ergf+oqpcBe4Gx7rAxYM+yVSlJeozFXAd+NbAtyQFgW7ctSVohfaZQfqiqbgBu6NbvA7YufUmSpD68E1OSGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIa1eet9Kcn+XSS/0lye5I/7drXJdmX5EC3PHv5y5UkHdVnBP494Jeq6iLgYuCyJM8GdgETVbUFmOi2JUkrpM9b6auqHug2T+t+CtgOjHft48CO5ShQkjS3XnPgSdYkuRU4AuyrqhuBDVV1CKBbrl+2KiVJj9ErwKvq4aq6GNgEPCvJhX1PkGRnkskkk1NTUwOWKUmabUFXoVTVN4AbgMuAw0k2AnTLI8f5zO6qGq2q0ZGRkcVVK0n6oT5XoYwkOatbPwN4PnAHsBcY6w4bA/YsU42SpDms7XHMRmA8yRqmA/+6qvpgkk8C1yW5CjgIXL6MdUqSZpk3wKvqs8Alc7TfB2xdjqIkSfPzTkxJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY3q81LjpyT5WJL9SW5P8pqufV2SfUkOdMuzl79cSdJRfUbgDwGvr6qfBp4N/F6SC4BdwERVbQEmum1J0gqZN8Cr6lBV3dytfxvYDzwZ2A6Md4eNAzuWqUZJ0hwWNAeeZDPTb6i/EdhQVYdgOuSB9cf5zM4kk0kmp6amFlmuJOmo3gGe5AnAvwKvrapv9f1cVe2uqtGqGh0ZGRmkRknSHHoFeJLTmA7v91TV+7vmw0k2dvs3AkeWp0RJ0lz6XIUS4B3A/qp6y4xde4Gxbn0M2LP05UmSjmdtj2MuBV4OfC7JrV3bHwFXA9cluQo4CFy+LBVKkuY0b4BX1ceBHGf31qUtR5LUl3diSlKjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqP6vBPznUmOJLltRtu6JPuSHOiWZy9vmZKk2fqMwN8FXDarbRcwUVVbgIluW5K0guYN8Kr6L+Drs5q3A+Pd+jiwY2nLkiTNZ9A58A1VdQigW65fupIkSX0s+5eYSXYmmUwyOTU1tdynk6RVY9AAP5xkI0C3PHK8A6tqd1WNVtXoyMjIgKeTJM02aIDvBca69TFgz9KUI0nqq89lhNcAnwTOT3JPkquAq4FtSQ4A27ptSdIKWjvfAVX10uPs2rrEtUiSFsA7MSWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNWpRAZ7ksiR3JvlCkl1LVZQkaX4DB3iSNcDfAS8ALgBemuSCpSpMknRiixmBPwv4QlV9qaq+D/wLsH1pypIkzWfet9KfwJOBr8zYvgf4udkHJdkJ7Ow2H0hy5yLOebI7F/jaSp0sf7FSZ1oV7Lu2ner999S5GhcT4JmjrR7TULUb2L2I8zQjyWRVjQ67Di2cfde21dp/i5lCuQd4yoztTcC9iytHktTXYgL8M8CWJE9L8jjgSmDv0pQlSZrPwFMoVfVQklcCHwHWAO+sqtuXrLI2rYqpolOUfde2Vdl/qXrMtLUkqQHeiSlJjTLAJalRBrgkNcoAl6RGGeADSHLurO2XJXlbkp1J5rrBSSeRJL+WZF23PpLkn5J8Lsm1STYNuz6dWJK3JLl02HWcDLwKZQBJbq6qZ3TrfwI8B3gv8CLgnqp63TDr04kl+XxVXdCtXwt8Cngf8Hzgt6pq2zDr04klmQLuBkaAa4FrquqW4VY1HIu5lX41mznK/nXgOVX1YJL3AjcPqSb1t2bG+k9W1Uu69Xclee0Q6tHC3FNVo0m2MH0D4bu7p6New3SY/+9wy1s5TqEM5owklyR5JrCmqh4EqKofAA8PtzT1cEOSP0tyRre+AyDJ84BvDrUy9VEAVXWgqt5cVT8DXAGcDvz7UCtbYU6hDCDJx2Y1/WZVHUpyDvCR1fhQnZYkOQ34Y+C3u6ZNwIPAvwG7qurgsGrT/JLcUlWXDLuOk4EBvoS6f+N+tKq+M+xa1E+SJwFrq+q+YdeifpI8oaoeGHYdJwOnUJZQVT0MnDfsOtRfVX1zZngnefow69H8ThTeq63/HIEvsSQHq8oQb5T917bV1n9ehTKAJG873i7grBUsRQOw/9pm/x3jCHwASb4NvB743hy7/7qqzp2jXScJ+69t9t8xjsAH8xngtqr6xOwdSd608uVogey/ttl/HUfgA+huw/6uV5u0yf5rm/13jAEuSY3yMsIBJHlSkquT3JHkvu5nf9d21rDr04nZf22z/44xwAdzHXA/8NyqOqeqzgGe17W9b6iVqQ/7r232X8cplAEkubOqzl/oPp0c7L+22X/HOAIfzN1J3pBkw9GGJBuS/AHwlSHWpX7sv7bZfx0DfDAvAc4B/jPJ/Um+DtwArGP6qWg6udl/bbP/Ok6hDKh75sIm4FMzn82Q5LKq+vDwKlMf9l/b7L9pjsAHkOTVwB7glcBtSbbP2P3nw6lKfdl/bbP/jvFOzMH8LvDMqnogyWbg+iSbq+qtPPptPTo52X9ts/86Bvhg1hz9t62q7kryXKb/iJ7KKvsDapT91zb7r+MUymC+muTioxvdH9OLgHOBnx1WUerN/mub/dfxS8wBJNkEPFRVX51j36VV9d9DKEs92X9ts/+OMcAlqVFOoUhSowxwSWqUAa5TWqZ9PMkLZrRdkWTV3OyhU5dz4DrlJbmQ6afUXQKsAW4FLquqLw7wu9ZU1cNLW6E0GANcq0KSvwQeBM7slk9l+pKztcCbqmpPd1PIP3fHALyyqj7RXWf8RuAQcHFVXbCy1UtzM8C1KiQ5E7gZ+D7wQeD2qnp39wKATzM9Oi/gkar6bpItwDVVNdoF+IeAC6vqy8OoX5qLd2JqVaiqB5NcCzzA9BPrfjXJ73e7TwfOA+4F/ra7SeRh4Kdm/IpPG9462RjgWk0e6X4C/EZV3TlzZ/dG88PARUx/wf/dGbsfXKEapd68CkWr0UeAVyUJQJJLuvYnAYeq6hHg5Ux/4SmdtAxwrUZvBk4DPpvktm4b4O+BsSSfYnr6xFG3Tmp+iSlJjXIELkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWrU/wNCLPxJAgxBxQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# test plot\n", + "\n", + "df[df['School DBN'] == '02M605'].groupby(['Year'])['Mean Score'].mean().plot(kind = 'bar')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2ab50ef", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {