{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "11e53f32-8614-4020-9fce-b838ad409098",
   "metadata": {
    "id": "HPsfw4s7kiKk",
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn import metrics\n",
    "from joblib import dump, load # sklearn's replacement for pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "73cb34f0-0771-4df5-bb29-afee3d2f6350",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load saved models\n",
    "model_1 = load('simple_regression.joblib')\n",
    "model_2 = load('multiple_regression.joblib')\n",
    "model_3 = load('decision_tree.joblib')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3023a4a8-7deb-4370-8ad6-8ab61bba514f",
   "metadata": {},
   "source": [
    "## Predictions: model 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b424b756-c488-426b-95d1-5757ab2dd413",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_x = np.zeros((1, 1)) # create an empty vector for 'carat'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "851ccc74-7cdf-43af-aed1-d3f3777768a9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.7]]\n"
     ]
    }
   ],
   "source": [
    "test_x[0, 0] = 0.70 # weight of the diamond\n",
    "print(test_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "04fc4367-9022-49b5-a5e6-5721c0671f92",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_y = np.zeros((1, 1)) # create an empty vector for 'price'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "17d43990-28a3-4799-b10e-68373208dc66",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[2751.]]\n"
     ]
    }
   ],
   "source": [
    "test_y[0, 0] = 2751.00 # price of the diamond\n",
    "print(test_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5eedcc23-376a-4fc4-b4a5-9a20c4674b79",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2996.33847049]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "prediction = model_1.predict(test_x)\n",
    "print(prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "29a7b8ac-31cf-4193-93e6-cd81417a635e",
   "metadata": {
    "id": "cQEsaET7Oj8o"
   },
   "outputs": [],
   "source": [
    "mse = metrics.mean_squared_error(test_y, prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "86a0249c-7987-4510-994a-e4add582c0c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "245.338470494803"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sqrt(mse) # root mean squared error"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f7821a20-26bd-4703-ad09-0224aa331c07",
   "metadata": {},
   "source": [
    "## Predictions: model 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "802e6f37-b0e1-403d-865e-489d5b85f53f",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "YVeMEFuEn4wi",
    "outputId": "bbeb9369-e01a-4358-f517-53d57f238b64"
   },
   "outputs": [],
   "source": [
    "test_x = np.zeros((1, 6)) # create an empty array for the features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "900cfbc7-ea3e-4f55-97e3-4b6a9f2b0cbb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.7  4.   4.   4.  62.  55. ]]\n"
     ]
    }
   ],
   "source": [
    "test_x[0, 0] = 0.70\n",
    "test_x[0, 1] = 4.00\n",
    "test_x[0, 2] = 4.00\n",
    "test_x[0, 3] = 4.00\n",
    "test_x[0, 4] = 62.00\n",
    "test_x[0, 5] = 55.00\n",
    "print(test_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "3965a843-809f-492b-9f79-c6577b062c76",
   "metadata": {},
   "outputs": [],
   "source": [
    "scaler = StandardScaler() # instantiate the scaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d70c4f75-e644-4a4a-b546-d0eed460f76a",
   "metadata": {},
   "outputs": [],
   "source": [
    "scaled_X = scaler.fit_transform(test_x) # scale the array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "7ac2f206-e400-4ac7-be27-1250f48b2d9c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[2751.]]\n"
     ]
    }
   ],
   "source": [
    "# y remains the same\n",
    "print(test_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "53b6f21f-caa9-4d6f-b90b-2b49c00879cd",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2572.08756099]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "prediction = model_2.predict(scaled_X)\n",
    "print(prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "855b89da-82e0-4b67-8ad7-0bcffd9bf2d5",
   "metadata": {
    "id": "cQEsaET7Oj8o"
   },
   "outputs": [],
   "source": [
    "mse = metrics.mean_squared_error(test_y, prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "ef1098b5-3941-4df0-b427-d0089736a65c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "178.91243900831523"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sqrt(mse) # root mean squared error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "26996ee9-3b6e-4e37-9f90-7a2fb8c0979d",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Predictions: model 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2cf9462e-d339-4f0b-bcc5-bacc31890374",
   "metadata": {},
   "outputs": [],
   "source": [
    "# test_x remains the same\n",
    "# scaled_X remains the same\n",
    "# test_y remains the same"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "be587251-63a5-4e1e-8b95-7da381414699",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1667.]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but DecisionTreeRegressor was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "prediction = model_3.predict(scaled_X)\n",
    "print(prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "753d5621-78ff-4be8-9e11-f74473ba48b6",
   "metadata": {
    "id": "cQEsaET7Oj8o"
   },
   "outputs": [],
   "source": [
    "mse = metrics.mean_squared_error(test_y, prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e50e4073-8da9-4ca2-9551-eedbe9dbf631",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1084.0"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sqrt(mse) # root mean squared error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "e76d5e9b-1ac9-477a-9dbc-c30a5df50c4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Model 1 returned the smallest RMSE, followed by model 2, then model 3"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}