added project1 files

This commit is contained in:
gyoza1 2022-12-06 19:29:33 -05:00
parent 8af9c2e8a0
commit bb8c7a0491
6 changed files with 57004 additions and 0 deletions

BIN
decision_tree.joblib Normal file

Binary file not shown.

53941
diamonds.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
multiple_regression.joblib Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

428
project_1_predictions.ipynb Normal file
View File

@ -0,0 +1,428 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "11e53f32-8614-4020-9fce-b838ad409098",
"metadata": {
"id": "HPsfw4s7kiKk",
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn import metrics\n",
"from joblib import dump, load # sklearn's replacement for pickle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "73cb34f0-0771-4df5-bb29-afee3d2f6350",
"metadata": {},
"outputs": [],
"source": [
"# load saved models\n",
"model_1 = load('simple_regression.joblib')\n",
"model_2 = load('multiple_regression.joblib')\n",
"model_3 = load('decision_tree.joblib')"
]
},
{
"cell_type": "markdown",
"id": "3023a4a8-7deb-4370-8ad6-8ab61bba514f",
"metadata": {},
"source": [
"## Predictions: model 1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b424b756-c488-426b-95d1-5757ab2dd413",
"metadata": {},
"outputs": [],
"source": [
"test_x = np.zeros((1, 1)) # create an empty vector for 'carat'"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "851ccc74-7cdf-43af-aed1-d3f3777768a9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.7]]\n"
]
}
],
"source": [
"test_x[0, 0] = 0.70 # weight of the diamond\n",
"print(test_x)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "04fc4367-9022-49b5-a5e6-5721c0671f92",
"metadata": {},
"outputs": [],
"source": [
"test_y = np.zeros((1, 1)) # create an empty vector for 'price'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "17d43990-28a3-4799-b10e-68373208dc66",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[2751.]]\n"
]
}
],
"source": [
"test_y[0, 0] = 2751.00 # price of the diamond\n",
"print(test_y)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5eedcc23-376a-4fc4-b4a5-9a20c4674b79",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2996.33847049]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"prediction = model_1.predict(test_x)\n",
"print(prediction)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "29a7b8ac-31cf-4193-93e6-cd81417a635e",
"metadata": {
"id": "cQEsaET7Oj8o"
},
"outputs": [],
"source": [
"mse = metrics.mean_squared_error(test_y, prediction)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "86a0249c-7987-4510-994a-e4add582c0c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"245.338470494803"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.sqrt(mse) # root mean squared error"
]
},
{
"cell_type": "markdown",
"id": "f7821a20-26bd-4703-ad09-0224aa331c07",
"metadata": {},
"source": [
"## Predictions: model 2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "802e6f37-b0e1-403d-865e-489d5b85f53f",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YVeMEFuEn4wi",
"outputId": "bbeb9369-e01a-4358-f517-53d57f238b64"
},
"outputs": [],
"source": [
"test_x = np.zeros((1, 6)) # create an empty array for the features"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "900cfbc7-ea3e-4f55-97e3-4b6a9f2b0cbb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.7 4. 4. 4. 62. 55. ]]\n"
]
}
],
"source": [
"test_x[0, 0] = 0.70\n",
"test_x[0, 1] = 4.00\n",
"test_x[0, 2] = 4.00\n",
"test_x[0, 3] = 4.00\n",
"test_x[0, 4] = 62.00\n",
"test_x[0, 5] = 55.00\n",
"print(test_x)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "3965a843-809f-492b-9f79-c6577b062c76",
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler() # instantiate the scaler"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d70c4f75-e644-4a4a-b546-d0eed460f76a",
"metadata": {},
"outputs": [],
"source": [
"scaled_X = scaler.fit_transform(test_x) # scale the array"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7ac2f206-e400-4ac7-be27-1250f48b2d9c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[2751.]]\n"
]
}
],
"source": [
"# y remains the same\n",
"print(test_y)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "53b6f21f-caa9-4d6f-b90b-2b49c00879cd",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2572.08756099]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"prediction = model_2.predict(scaled_X)\n",
"print(prediction)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "855b89da-82e0-4b67-8ad7-0bcffd9bf2d5",
"metadata": {
"id": "cQEsaET7Oj8o"
},
"outputs": [],
"source": [
"mse = metrics.mean_squared_error(test_y, prediction)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "ef1098b5-3941-4df0-b427-d0089736a65c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"178.91243900831523"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.sqrt(mse) # root mean squared error"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "26996ee9-3b6e-4e37-9f90-7a2fb8c0979d",
"metadata": {},
"outputs": [],
"source": [
"## Predictions: model 3"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "2cf9462e-d339-4f0b-bcc5-bacc31890374",
"metadata": {},
"outputs": [],
"source": [
"# test_x remains the same\n",
"# scaled_X remains the same\n",
"# test_y remains the same"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "be587251-63a5-4e1e-8b95-7da381414699",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1667.]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tim/Nextcloud/Documents/predictive_analytics/envs/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but DecisionTreeRegressor was fitted with feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"prediction = model_3.predict(scaled_X)\n",
"print(prediction)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "753d5621-78ff-4be8-9e11-f74473ba48b6",
"metadata": {
"id": "cQEsaET7Oj8o"
},
"outputs": [],
"source": [
"mse = metrics.mean_squared_error(test_y, prediction)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "e50e4073-8da9-4ca2-9551-eedbe9dbf631",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"1084.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.sqrt(mse) # root mean squared error"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "e76d5e9b-1ac9-477a-9dbc-c30a5df50c4a",
"metadata": {},
"outputs": [],
"source": [
"# Model 1 returned the smallest RMSE, followed by model 2, then model 3"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

BIN
simple_regression.joblib Normal file

Binary file not shown.