From 38d8f0a401190083bfe6306c531b8b902f99bf84 Mon Sep 17 00:00:00 2001 From: Pierre-Edouard Portier Date: Sun, 29 Jan 2023 17:52:53 +0100 Subject: [PATCH] Experiments. Where is the error in nakr? --- ...em_approximation_housing_experiment_code.R | 20 +++++++------- pad.R | 26 +++++++++++++++++++ 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/19_b_nystroem_approximation_housing_experiment_code.R b/19_b_nystroem_approximation_housing_experiment_code.R index 2c7986c..3c6d285 100644 --- a/19_b_nystroem_approximation_housing_experiment_code.R +++ b/19_b_nystroem_approximation_housing_experiment_code.R @@ -99,13 +99,13 @@ X.entr <- hous.dat.nakr$entr$X Y.entr <- hous.dat.nakr$entr$Y X.test <- hous.dat.nakr$test$X Y.test <- hous.dat.nakr$test$Y -hous.dat.ca <- datasetHousing.mca() -hous.cam <- mca(hous.dat.ca) -nb.landmarks <- round(sqrt(nrow(X.entr))) -landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks) -nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks) -nakrm.yh <- predict(nakrm, X.test) -nakrm.mae <- mean(abs(nakrm.yh - Y.test)) -# nakrm.yh.train <- predict(nakrm, X.entr) -# rev(order(abs(nakrm.yh.train - Y.entr)))[1:20] -# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]]) \ No newline at end of file +# hous.dat.ca <- datasetHousing.mca() +# hous.cam <- mca(hous.dat.ca) +# nb.landmarks <- round(sqrt(nrow(X.entr))) +# landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks) +# nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks) +# nakrm.yh <- predict(nakrm, X.test) +# nakrm.mae <- mean(abs(nakrm.yh - Y.test)) +# nakrm.yh.train <- predict(nakrm, X.entr) +# rev(order(abs(nakrm.yh.train - Y.entr)))[1:20] +# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]]) \ No newline at end of file diff --git a/pad.R b/pad.R index 8703faa..bb7f7c9 100644 --- a/pad.R +++ b/pad.R @@ -27,3 +27,29 @@ # adj = 0, cex = 0.6) # points(0, 0, pch = 3) # ``` + +# source("19_b_nystroem_approximation_housing_experiment_code.R") +# rdat <- hous.dat.nakr$dat[sample(nrow(hous.dat.nakr$dat), size=2000, replace=FALSE),] +# X <- rdat[,!(colnames(rdat) %in% c('median_house_value'))] +# Y <- rdat[,c('median_house_value')] +# names(Y) <- rownames(X) +# rsplt <- splitdata(list(X = X, Y = Y), 0.8) +# X.entr <- rsplt$entr$X +# Y.entr <- rsplt$entr$Y +# X.test <- rsplt$test$X +# Y.test <- rsplt$test$Y +# source("18_kernel_ridge_regression_code.R") +# krm <- krr(X.entr, Y.entr) +# krm.yh <- predict(krm, X.test) +# krm.mae <- mean(abs(krm.yh - Y.test)) # 35445.1 +# source("15_loocv_code.R") +# rm <- ridge(X.entr, Y.entr) +# rm.yh <- predict(rm, X.test) +# rm.mae <- mean(abs(rm.yh - Y.test)) # 45786.62 +# library(randomForest) +# rfm <- randomForest(X.entr, Y.entr) +# rfm.yh <- predict(rfm, X.test) +# rfm.mae <- mean(abs(rfm.yh - Y.test)) # 34229.02 +# nakrm <- kfold.nakr(X.entr, Y.entr, nb.landmarks=500) +# nakrm.yh <- predict(nakrm, X.test) +# nakrm.mae <- mean(abs(nakrm.yh - Y.test)) # 65454.18 \ No newline at end of file