{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "11e53f32-8614-4020-9fce-b838ad409098", "metadata": { "id": "HPsfw4s7kiKk", "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn import metrics\n", "from joblib import dump, load # sklearn's replacement for pickle" ] }, { "cell_type": "code", "execution_count": 2, "id": "73cb34f0-0771-4df5-bb29-afee3d2f6350", "metadata": {}, "outputs": [], "source": [ "# load saved models\n", "model_1 = load('simple_regression.joblib')\n", "model_2 = load('multiple_regression.joblib')\n", "model_3 = load('decision_tree.joblib')" ] }, { "cell_type": "markdown", "id": "3023a4a8-7deb-4370-8ad6-8ab61bba514f", "metadata": {}, "source": [ "## Predictions: model 1" ] }, { "cell_type": "code", "execution_count": 3, "id": "b424b756-c488-426b-95d1-5757ab2dd413", "metadata": {}, "outputs": [], "source": [ "test_x = np.zeros((1, 1)) # create an empty vector for 'carat'" ] }, { "cell_type": "code", "execution_count": 4, "id": "851ccc74-7cdf-43af-aed1-d3f3777768a9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0.7]]\n" ] } ], "source": [ "test_x[0, 0] = 0.70 # weight of the diamond\n", "print(test_x)" ] }, { "cell_type": "code", "execution_count": 5, "id": "04fc4367-9022-49b5-a5e6-5721c0671f92", "metadata": {}, "outputs": [], "source": [ "test_y = np.zeros((1, 1)) # create an empty vector for 'price'" ] }, { "cell_type": "code", "execution_count": 6, "id": "17d43990-28a3-4799-b10e-68373208dc66", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[2751.]]\n" ] } ], "source": [ "test_y[0, 0] = 2751.00 # price of the diamond\n", "print(test_y)" ] }, { "cell_type": "code", "execution_count": 7, "id": "5eedcc23-376a-4fc4-b4a5-9a20c4674b79", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2996.33847049]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", " warnings.warn(\n" ] } ], "source": [ "prediction = model_1.predict(test_x)\n", "print(prediction)" ] }, { "cell_type": "code", "execution_count": 8, "id": "29a7b8ac-31cf-4193-93e6-cd81417a635e", "metadata": { "id": "cQEsaET7Oj8o" }, "outputs": [], "source": [ "mse = metrics.mean_squared_error(test_y, prediction)" ] }, { "cell_type": "code", "execution_count": 9, "id": "86a0249c-7987-4510-994a-e4add582c0c5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "245.338470494803" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sqrt(mse) # root mean squared error" ] }, { "cell_type": "markdown", "id": "f7821a20-26bd-4703-ad09-0224aa331c07", "metadata": {}, "source": [ "## Predictions: model 2" ] }, { "cell_type": "code", "execution_count": 10, "id": "802e6f37-b0e1-403d-865e-489d5b85f53f", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YVeMEFuEn4wi", "outputId": "bbeb9369-e01a-4358-f517-53d57f238b64" }, "outputs": [], "source": [ "test_x = np.zeros((1, 6)) # create an empty array for the features" ] }, { "cell_type": "code", "execution_count": 11, "id": "900cfbc7-ea3e-4f55-97e3-4b6a9f2b0cbb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0.7 4. 4. 4. 62. 55. ]]\n" ] } ], "source": [ "test_x[0, 0] = 0.70\n", "test_x[0, 1] = 4.00\n", "test_x[0, 2] = 4.00\n", "test_x[0, 3] = 4.00\n", "test_x[0, 4] = 62.00\n", "test_x[0, 5] = 55.00\n", "print(test_x)" ] }, { "cell_type": "code", "execution_count": 12, "id": "3965a843-809f-492b-9f79-c6577b062c76", "metadata": {}, "outputs": [], "source": [ "scaler = StandardScaler() # instantiate the scaler" ] }, { "cell_type": "code", "execution_count": 13, "id": "d70c4f75-e644-4a4a-b546-d0eed460f76a", "metadata": {}, "outputs": [], "source": [ "scaled_X = scaler.fit_transform(test_x) # scale the array" ] }, { "cell_type": "code", "execution_count": 14, "id": "7ac2f206-e400-4ac7-be27-1250f48b2d9c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[2751.]]\n" ] } ], "source": [ "# y remains the same\n", "print(test_y)" ] }, { "cell_type": "code", "execution_count": 15, "id": "53b6f21f-caa9-4d6f-b90b-2b49c00879cd", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2572.08756099]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", " warnings.warn(\n" ] } ], "source": [ "prediction = model_2.predict(scaled_X)\n", "print(prediction)" ] }, { "cell_type": "code", "execution_count": 16, "id": "855b89da-82e0-4b67-8ad7-0bcffd9bf2d5", "metadata": { "id": "cQEsaET7Oj8o" }, "outputs": [], "source": [ "mse = metrics.mean_squared_error(test_y, prediction)" ] }, { "cell_type": "code", "execution_count": 17, "id": "ef1098b5-3941-4df0-b427-d0089736a65c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "178.91243900831523" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sqrt(mse) # root mean squared error" ] }, { "cell_type": "code", "execution_count": 18, "id": "26996ee9-3b6e-4e37-9f90-7a2fb8c0979d", "metadata": {}, "outputs": [], "source": [ "## Predictions: model 3" ] }, { "cell_type": "code", "execution_count": 19, "id": "2cf9462e-d339-4f0b-bcc5-bacc31890374", "metadata": {}, "outputs": [], "source": [ "# test_x remains the same\n", "# scaled_X remains the same\n", "# test_y remains the same" ] }, { "cell_type": "code", "execution_count": 20, "id": "be587251-63a5-4e1e-8b95-7da381414699", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1667.]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but DecisionTreeRegressor was fitted with feature names\n", " warnings.warn(\n" ] } ], "source": [ "prediction = model_3.predict(scaled_X)\n", "print(prediction)" ] }, { "cell_type": "code", "execution_count": 21, "id": "753d5621-78ff-4be8-9e11-f74473ba48b6", "metadata": { "id": "cQEsaET7Oj8o" }, "outputs": [], "source": [ "mse = metrics.mean_squared_error(test_y, prediction)" ] }, { "cell_type": "code", "execution_count": 22, "id": "e50e4073-8da9-4ca2-9551-eedbe9dbf631", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "1084.0" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.sqrt(mse) # root mean squared error" ] }, { "cell_type": "code", "execution_count": 23, "id": "e76d5e9b-1ac9-477a-9dbc-c30a5df50c4a", "metadata": {}, "outputs": [], "source": [ "# Model 1 returned the smallest RMSE, followed by model 2, then model 3" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }