added project1 files
parent
8af9c2e8a0
commit
bb8c7a0491
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because one or more lines are too long
@ -0,0 +1,428 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "11e53f32-8614-4020-9fce-b838ad409098",
|
||||
"metadata": {
|
||||
"id": "HPsfw4s7kiKk",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"from sklearn import metrics\n",
|
||||
"from joblib import dump, load # sklearn's replacement for pickle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "73cb34f0-0771-4df5-bb29-afee3d2f6350",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load saved models\n",
|
||||
"model_1 = load('simple_regression.joblib')\n",
|
||||
"model_2 = load('multiple_regression.joblib')\n",
|
||||
"model_3 = load('decision_tree.joblib')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3023a4a8-7deb-4370-8ad6-8ab61bba514f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Predictions: model 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b424b756-c488-426b-95d1-5757ab2dd413",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_x = np.zeros((1, 1)) # create an empty vector for 'carat'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "851ccc74-7cdf-43af-aed1-d3f3777768a9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[0.7]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test_x[0, 0] = 0.70 # weight of the diamond\n",
|
||||
"print(test_x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "04fc4367-9022-49b5-a5e6-5721c0671f92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_y = np.zeros((1, 1)) # create an empty vector for 'price'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "17d43990-28a3-4799-b10e-68373208dc66",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[2751.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test_y[0, 0] = 2751.00 # price of the diamond\n",
|
||||
"print(test_y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "5eedcc23-376a-4fc4-b4a5-9a20c4674b79",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2996.33847049]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prediction = model_1.predict(test_x)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "29a7b8ac-31cf-4193-93e6-cd81417a635e",
|
||||
"metadata": {
|
||||
"id": "cQEsaET7Oj8o"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mse = metrics.mean_squared_error(test_y, prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "86a0249c-7987-4510-994a-e4add582c0c5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"245.338470494803"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.sqrt(mse) # root mean squared error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f7821a20-26bd-4703-ad09-0224aa331c07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Predictions: model 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "802e6f37-b0e1-403d-865e-489d5b85f53f",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "YVeMEFuEn4wi",
|
||||
"outputId": "bbeb9369-e01a-4358-f517-53d57f238b64"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_x = np.zeros((1, 6)) # create an empty array for the features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "900cfbc7-ea3e-4f55-97e3-4b6a9f2b0cbb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[ 0.7 4. 4. 4. 62. 55. ]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"test_x[0, 0] = 0.70\n",
|
||||
"test_x[0, 1] = 4.00\n",
|
||||
"test_x[0, 2] = 4.00\n",
|
||||
"test_x[0, 3] = 4.00\n",
|
||||
"test_x[0, 4] = 62.00\n",
|
||||
"test_x[0, 5] = 55.00\n",
|
||||
"print(test_x)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "3965a843-809f-492b-9f79-c6577b062c76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scaler = StandardScaler() # instantiate the scaler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "d70c4f75-e644-4a4a-b546-d0eed460f76a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scaled_X = scaler.fit_transform(test_x) # scale the array"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7ac2f206-e400-4ac7-be27-1250f48b2d9c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[2751.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# y remains the same\n",
|
||||
"print(test_y)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "53b6f21f-caa9-4d6f-b90b-2b49c00879cd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2572.08756099]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prediction = model_2.predict(scaled_X)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "855b89da-82e0-4b67-8ad7-0bcffd9bf2d5",
|
||||
"metadata": {
|
||||
"id": "cQEsaET7Oj8o"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mse = metrics.mean_squared_error(test_y, prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "ef1098b5-3941-4df0-b427-d0089736a65c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"178.91243900831523"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.sqrt(mse) # root mean squared error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "26996ee9-3b6e-4e37-9f90-7a2fb8c0979d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Predictions: model 3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "2cf9462e-d339-4f0b-bcc5-bacc31890374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test_x remains the same\n",
|
||||
"# scaled_X remains the same\n",
|
||||
"# test_y remains the same"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "be587251-63a5-4e1e-8b95-7da381414699",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[1667.]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but DecisionTreeRegressor was fitted with feature names\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prediction = model_3.predict(scaled_X)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "753d5621-78ff-4be8-9e11-f74473ba48b6",
|
||||
"metadata": {
|
||||
"id": "cQEsaET7Oj8o"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mse = metrics.mean_squared_error(test_y, prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "e50e4073-8da9-4ca2-9551-eedbe9dbf631",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1084.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.sqrt(mse) # root mean squared error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "e76d5e9b-1ac9-477a-9dbc-c30a5df50c4a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Model 1 returned the smallest RMSE, followed by model 2, then model 3"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Binary file not shown.
Loading…
Reference in New Issue