Nystroem approximation is now correct (and simplified).
This commit is contained in:
parent
3c83c256b5
commit
5689bf6fef
@ -1,8 +1,7 @@
|
|||||||
source("05_d_svd_mca_code.R")
|
source("15_loocv_code.R")
|
||||||
source("04_validation_croisee_code.R")
|
|
||||||
source("19_nystroem_approximation_code.R")
|
source("19_nystroem_approximation_code.R")
|
||||||
|
|
||||||
datasetHousing.nakr <-
|
dataset.housing <-
|
||||||
function()
|
function()
|
||||||
{
|
{
|
||||||
dat <- read.csv(file="data/housing.csv", header=TRUE)
|
dat <- read.csv(file="data/housing.csv", header=TRUE)
|
||||||
@ -44,68 +43,24 @@ function()
|
|||||||
return(r)
|
return(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
# test.tbl <- table(c(4,2,4,2,1,1,1))
|
print("load dataset")
|
||||||
# all(intersperse(test.tbl) == c(1, 2, 4, 1, 2, 4, 1))
|
dat <- dataset.housing()
|
||||||
intersperse <-
|
|
||||||
function(tbl)
|
|
||||||
{
|
|
||||||
n <- sum(tbl)
|
|
||||||
values <- as.numeric(names(tbl))
|
|
||||||
r <- numeric(n)
|
|
||||||
i <- 1
|
|
||||||
while(i <= n)
|
|
||||||
{
|
|
||||||
for(j in 1:length(tbl))
|
|
||||||
{
|
|
||||||
if(tbl[j] != 0)
|
|
||||||
{
|
|
||||||
r[i] <- values[j]
|
|
||||||
i <- i + 1
|
|
||||||
tbl[j] <- tbl[j] - 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(r)
|
|
||||||
}
|
|
||||||
|
|
||||||
# sample the landmarks from the clusters of individuals
|
print("Nystroem approx ridge regression")
|
||||||
# obtained after correspondence analysis
|
nakrm <- nakr(dat$entr$X, dat$entr$Y)
|
||||||
landmarks.by.ca.clst <-
|
nakrm.yh <- predict(nakrm, dat$test$X)
|
||||||
function(cam, X, nbLandmarks)
|
nakrm.mae <- mean(abs(nakrm.yh - dat$test$Y))
|
||||||
{
|
print(paste("MAE for Nystroem: ", nakrm.mae))
|
||||||
if(nbLandmarks > nrow(X))
|
|
||||||
{
|
|
||||||
stop("The number of landmarks must be less than the number of training samples.")
|
|
||||||
}
|
|
||||||
landmarks <- numeric(nbLandmarks)
|
|
||||||
clst <- cam$clsti$cluster[as.numeric(rownames(X))]
|
|
||||||
clst.tbl <- table(clst)
|
|
||||||
nb.by.clst <- table((intersperse(clst.tbl))[1:nbLandmarks])
|
|
||||||
clst.id <- as.numeric(names(nb.by.clst))
|
|
||||||
set.seed(1123)
|
|
||||||
clst <- sample(clst)
|
|
||||||
offset <- 0
|
|
||||||
for (i in 1:length(nb.by.clst))
|
|
||||||
{
|
|
||||||
k <- as.numeric(nb.by.clst[i])
|
|
||||||
landmarks[(offset+1):(offset+k)] <- as.numeric(names((clst[clst==clst.id[i]])[1:k]))
|
|
||||||
offset <- offset+k
|
|
||||||
}
|
|
||||||
return(landmarks)
|
|
||||||
}
|
|
||||||
|
|
||||||
hous.dat.nakr <- datasetHousing.nakr()
|
print("Linear ridge regression")
|
||||||
X.entr <- hous.dat.nakr$entr$X
|
rm <- ridge(dat$entr$X, dat$entr$Y)
|
||||||
Y.entr <- hous.dat.nakr$entr$Y
|
rm.yh <- predict(rm, dat$test$X)
|
||||||
X.test <- hous.dat.nakr$test$X
|
rm.mae <- mean(abs(rm.yh - dat$test$Y))
|
||||||
Y.test <- hous.dat.nakr$test$Y
|
print(paste("MAE for Ridge: ", rm.mae))
|
||||||
hous.dat.ca <- datasetHousing.mca()
|
|
||||||
hous.cam <- mca(hous.dat.ca)
|
print("Random forest")
|
||||||
nb.landmarks <- round(sqrt(nrow(X.entr)))
|
library(randomForest)
|
||||||
landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
|
rfm <- randomForest(dat$entr$X, dat$entr$Y)
|
||||||
# nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks)
|
rfm.yh <- predict(rfm, dat$test$X)
|
||||||
# nakrm.yh <- predict(nakrm, X.test)
|
rfm.mae <- mean(abs(rfm.yh - dat$test$Y))
|
||||||
# nakrm.mae <- mean(abs(nakrm.yh - Y.test))
|
print(paste("MAE for Random Forest: ", rfm.mae))
|
||||||
# nakrm.yh.train <- predict(nakrm, X.entr)
|
|
||||||
# rev(order(abs(nakrm.yh.train - Y.entr)))[1:20]
|
|
||||||
# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]])
|
|
@ -3,6 +3,7 @@
|
|||||||
```{r, include=FALSE}
|
```{r, include=FALSE}
|
||||||
source("01_intro_code.R", local = knitr::knit_global())
|
source("01_intro_code.R", local = knitr::knit_global())
|
||||||
source("04_validation_croisee_code.R", local = knitr::knit_global())
|
source("04_validation_croisee_code.R", local = knitr::knit_global())
|
||||||
|
source("15_loocv_code.R", local = knitr::knit_global())
|
||||||
source("19_nystroem_approximation_code.R", local = knitr::knit_global())
|
source("19_nystroem_approximation_code.R", local = knitr::knit_global())
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -103,7 +104,7 @@ $$
|
|||||||
\end{aligned}
|
\end{aligned}
|
||||||
$$
|
$$
|
||||||
|
|
||||||
Ainsi, nous pouvons introduire une matrice $\boldsymbol\Phi$ de dimension $m$ telle que $\mathbf{K} \approx \boldsymbol\Phi \boldsymbol\Phi^T$.
|
Ainsi, nous pouvons introduire une matrice $\boldsymbol\Phi$ de rang $m$ telle que $\mathbf{K} \approx \boldsymbol\Phi \boldsymbol\Phi^T$.
|
||||||
$$
|
$$
|
||||||
\boldsymbol\Phi =
|
\boldsymbol\Phi =
|
||||||
\left[
|
\left[
|
||||||
@ -162,45 +163,7 @@ $$
|
|||||||
\end{aligned}
|
\end{aligned}
|
||||||
$$
|
$$
|
||||||
|
|
||||||
## Régression ridge à noyau et approximation de Nyström
|
Nous voyons que la matrice $\mathbf{L}$ correspond exactement à la matrice $\boldsymbol\Phi$ dont les lignes sont les nouvelles variables $m$-dimensionnelles telles que $\boldsymbol\Phi \boldsymbol\Phi^T$ soit une approximation de rang-$m$ de $\mathbf{K}$. Ainsi, pour opérer une régression ridge à noyau avec approximation de Nyström, il suffit de faire une régression ridge sur ces variables transformées.
|
||||||
|
|
||||||
Dans le cadre de la régression ridge à noyau (voir un précédent module), nous notons : $\mathbf{G} = \mathbf{K} + \lambda\mathbf{I_n}$. Les coefficients du modèle ridge sont alors donnés par : $\boldsymbol\alpha_\lambda = \mathbf{G}^{-1} \mathbf{y}$. Nous cherchons à calculer efficacement $\mathbf{G}^{-1}$ à partir d'une approximation Nyström de rang $m$ de $\mathbf{K} \approx \mathbf{L}\mathbf{L}^T$. Pour ce faire, nous utilisons une forme de l'identité de Woodbury :
|
|
||||||
$$
|
|
||||||
\left(\mathbf{A} + \mathbf{U}\mathbf{C}\mathbf{V}\right)^{-1} = \mathbf{A}^{-1} - \mathbf{A}^{-1}\mathbf{U}\left(\mathbf{C}^{-1} + \mathbf{V}\mathbf{A}^{-1}\mathbf{U}\right)^{-1}\mathbf{V} \mathbf{A}^{-1}
|
|
||||||
$$
|
|
||||||
|
|
||||||
Nous obtenons ainsi :
|
|
||||||
$$
|
|
||||||
\begin{aligned}
|
|
||||||
& \mathbf{G}^{-1} \\
|
|
||||||
= \{& \text{Définition de $\mathbf{G}$} \} \\
|
|
||||||
& \left(\lambda\mathbf{I_n} + \mathbf{K}\right)^{-1} \\
|
|
||||||
\approx \{& \text{Approximation de Nyström} \} \\
|
|
||||||
& \left(\lambda\mathbf{I_n} + \mathbf{L}\mathbf{L}^T\right)^{-1} \\
|
|
||||||
= \{& \text{Identité de Woodbury} \} \\
|
|
||||||
& \lambda^{-1}\mathbf{I_n} - \lambda^{-1}\mathbf{L}\left(\mathbf{I_m}+\lambda^{-1}\mathbf{L}^T\mathbf{L}\right)^{-1}\lambda^{-1}\mathbf{L}^T \\
|
|
||||||
= \{& \text{Algèbre linéaire : si $\mathbf{A}$ et $\mathbf{B}$ sont des matrices carrés inversibles alors $(\mathbf{A}\mathbf{B})^{-1} = \mathbf{B}^{-1}\mathbf{A}^{-1}$} \} \\
|
|
||||||
& \lambda^{-1}\mathbf{I_n} - \lambda^{-1}\mathbf{L}\left(\lambda\mathbf{I_m}+\mathbf{L}^T\mathbf{L}\right)^{-1}\mathbf{L}^T \\
|
|
||||||
\end{aligned}
|
|
||||||
$$
|
|
||||||
|
|
||||||
Montrons comment calculer efficacement la prédiction $\hat{y}_{new}$ d'une nouvelle observation $\mathbf{x_{new}}$. Nous avons montré plus haut que l'approximation de Nyström pouvait s'écrire $\mathbf{K} \approx \mathbf{C} \mathbf{K_{11}}^{-1} \mathbf{C}^T$. Donc la ligne de l'approximation du noyau $\mathbf{K}$ qui correspond à l'observation $\mathbf{x_j}$ du jeu d'entrainement (dont la version non approximée est $[k(\mathbf{x_j},\mathbf{x_1}),\dots,k(\mathbf{x_j},\mathbf{x_n})]$) s'obtient par combinaison linéaire des lignes de $\mathbf{K_{11}}^{-1} \mathbf{C}^T$. Les $m$ coefficients de cette combinaison linéaire sont $(k(\mathbf{x_j},\mathbf{x_1}),\dots,k(\mathbf{x_j},\mathbf{x_m}))$.
|
|
||||||
|
|
||||||
Ainsi, l'approximation d'une "nouvelle ligne" du noyau $\mathbf{K}$ formée des similarités d'une nouvelle observation $\mathbf{x_{new}}$ avec les observations $(\mathbf{x_1},\dots,\mathbf{x_n})$ du jeu d'entrainement est :
|
|
||||||
$$[k(\mathbf{x_{new}},\mathbf{x_1}),\dots,k(\mathbf{x_{new}},\mathbf{x_n})] \approx [k(\mathbf{x_{new}},\mathbf{x_1}),\dots,k(\mathbf{x_{new}},\mathbf{x_m})] \mathbf{K_{11}}^{-1} \mathbf{C}^T$$
|
|
||||||
|
|
||||||
Enfin, la prédiction associée à $\mathbf{x_{new}}$ à partir de la valeur approximée du noyau est :
|
|
||||||
$$
|
|
||||||
\begin{aligned}
|
|
||||||
& \hat{y}_{new} \\
|
|
||||||
\approx \{& \text{voir raisonnement ci-dessus, et } \hat{y} = \mathbf{K} \boldsymbol\alpha_\lambda \} \\
|
|
||||||
& \left[ k(\mathbf{x_{new}},\mathbf{x_1}),\dots,k(\mathbf{x_{new}},\mathbf{x_m}) \right] \left( \mathbf{K_{11}}^{-1} \mathbf{C}^T \boldsymbol\alpha_\lambda \right) \\
|
|
||||||
= \{& \text{introduction de $\boldsymbol\beta_\lambda = \mathbf{K_{11}}^{-1} \mathbf{C}^T \boldsymbol\alpha_\lambda$} \} \\
|
|
||||||
& \left[ k(\mathbf{x_{new}},\mathbf{x_1}),\dots,k(\mathbf{x_{new}},\mathbf{x_m}) \right] \boldsymbol\beta_\lambda \\
|
|
||||||
\end{aligned}
|
|
||||||
$$
|
|
||||||
|
|
||||||
$\boldsymbol\beta_\lambda$ peut être calculé une seule fois à la fin de l'entrainement pour être ensuite réutilisé pour chaque prédiction.
|
|
||||||
|
|
||||||
## Exemple sur un jeu de données synthétique
|
## Exemple sur un jeu de données synthétique
|
||||||
|
|
||||||
@ -214,7 +177,7 @@ Nous reprenons le jeu de données synthétique utilisé depuis le premier module
|
|||||||
entr <- splitres$entr
|
entr <- splitres$entr
|
||||||
test <- splitres$test
|
test <- splitres$test
|
||||||
|
|
||||||
nakrm <- nakr(entr$X,entr$Y, nb.landmarks=25)
|
nakrm <- nakr(entr$X,entr$Y,nb.landmarks=25)
|
||||||
yh <- predict(nakrm,test$X)
|
yh <- predict(nakrm,test$X)
|
||||||
plt(test,f)
|
plt(test,f)
|
||||||
points(test$X, yh, pch=4)
|
points(test$X, yh, pch=4)
|
||||||
|
@ -1,80 +1,38 @@
|
|||||||
# compute the gaussian kernel between each row of X1 and each row of X2
|
|
||||||
# should be done more efficiently (C code, threads)
|
|
||||||
gausskernel.nakr <-
|
|
||||||
function(X1, X2, sigma2)
|
|
||||||
{
|
|
||||||
n1 <- dim(X1)[1]
|
|
||||||
n2 <- dim(X2)[1]
|
|
||||||
K <- matrix(nrow = n1, ncol = n2)
|
|
||||||
for(i in 1:n1)
|
|
||||||
for(j in 1:n2)
|
|
||||||
K[i,j] <- sum((X1[i,] - X2[j,])^2)
|
|
||||||
K <- exp(-1*K/sigma2)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Nystroem Approximation Kernel Ridge Regression
|
# Nystroem Approximation Kernel Ridge Regression
|
||||||
nakr <-
|
nakr <-
|
||||||
function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL)
|
function(X, y, sigma2=NULL, lambdas=NULL, nb.landmarks=NULL)
|
||||||
{
|
{
|
||||||
|
set.seed(1123)
|
||||||
X <- as.matrix(X)
|
X <- as.matrix(X)
|
||||||
n <- nrow(X)
|
n <- nrow(X)
|
||||||
p <- ncol(X)
|
p <- ncol(X)
|
||||||
|
|
||||||
if(is.null(sigma2)) { sigma2 <- p }
|
if(is.null(sigma2)) { sigma2 <- p }
|
||||||
|
if(is.null(nb.landmarks)) { nb.landmarks <- 15*n^(1/3) }
|
||||||
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
|
||||||
|
|
||||||
X <- scale(X)
|
X <- scale(X)
|
||||||
y <- scale(y)
|
y <- scale(y)
|
||||||
|
idx.landmarks <- sample(1:n, nb.landmarks, replace = FALSE)
|
||||||
C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2)
|
S <- X[idx.landmarks, ]
|
||||||
K11 <- C[ldm$idx,]
|
K11 <- gausskernel.nakr(S, S, sigma2)
|
||||||
|
C <- gausskernel.nakr(X, S, sigma2)
|
||||||
svdK11 <- svd(K11)
|
svd.K11 <- svd(K11)
|
||||||
# K11 often ill-formed -> drop small sv
|
ks <- which(svd.K11$d < 1E-12) # K11 often ill-formed -> drop small sv
|
||||||
ks <- which(svdK11$d < 1E-12)
|
if (length(ks)>0) {k <- ks[1]} else {k <- length(svd.K11$d)}
|
||||||
if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)}
|
W <- svd.K11$u[,1:k] %*% diag(1/sqrt(svd.K11$d[1:k]))
|
||||||
|
Phi <- C %*% W
|
||||||
US <- svdK11$u[,1:k] %*% diag(1 / sqrt(svdK11$d[1:k]))
|
ridge <- ridge(Phi, y, lambdas)
|
||||||
L <- C %*% US
|
r <- list(center.X=attr(X,"scaled:center"),
|
||||||
Ginv <- t(L) %*% L
|
scale.X=attr(X,"scaled:scale"),
|
||||||
diag(Ginv) <- diag(Ginv) + lambda
|
center.y=attr(y,"scaled:center"),
|
||||||
Ginv <- chol2inv(chol(Ginv))
|
scale.y=attr(y,"scaled:scale"),
|
||||||
Ginv <- L %*% Ginv %*% t(L)
|
S=S,
|
||||||
Ginv <- - Ginv / lambda
|
|
||||||
diag(Ginv) <- diag(Ginv) + (1/lambda)
|
|
||||||
coef <- Ginv %*% y
|
|
||||||
|
|
||||||
K11inv <- svdK11$v[,1:k] %*% diag(1/svdK11$d[1:k]) %*% t(svdK11$u[,1:k])
|
|
||||||
beta <- K11inv %*% t(C) %*% coef
|
|
||||||
|
|
||||||
r <- list(X=X,
|
|
||||||
y=y,
|
|
||||||
sigma2=sigma2,
|
sigma2=sigma2,
|
||||||
lambda=lambda,
|
W=W,
|
||||||
ldmidx=ldm$idx,
|
ridge=ridge
|
||||||
coef=coef,
|
|
||||||
beta=beta
|
|
||||||
)
|
)
|
||||||
class(r) <- "nakr"
|
class(r) <- "nakr"
|
||||||
return(r)
|
return(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
landmarks.nakr <-
|
|
||||||
function(X, landmarks, nb.landmarks)
|
|
||||||
{
|
|
||||||
n <- nrow(X)
|
|
||||||
if(is.null(landmarks)) {
|
|
||||||
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
|
|
||||||
ldmidx <- sample(1:n, nb.landmarks, replace = FALSE)
|
|
||||||
} else {
|
|
||||||
ldmidx <- which(rownames(X) %in% as.character(landmarks))
|
|
||||||
}
|
|
||||||
ldmidx <- sort(ldmidx)
|
|
||||||
ldmnms <- as.numeric(rownames(X)[ldmidx])
|
|
||||||
return(list(idx=ldmidx, nms=ldmnms))
|
|
||||||
}
|
|
||||||
|
|
||||||
predict.nakr <-
|
predict.nakr <-
|
||||||
function(o, newdata)
|
function(o, newdata)
|
||||||
{
|
{
|
||||||
@ -83,42 +41,30 @@ function(o, newdata)
|
|||||||
UseMethod("predict")
|
UseMethod("predict")
|
||||||
return(invisible(NULL))
|
return(invisible(NULL))
|
||||||
}
|
}
|
||||||
newdata <- as.matrix(newdata)
|
test <- as.matrix(newdata)
|
||||||
if(ncol(o$X)!=ncol(newdata)) {
|
test <- scale(test,center=o$center.X,scale=o$scale.X)
|
||||||
stop("Not the same number of variables btwn fitted nakr object and new data")
|
K.test <- gausskernel.nakr(test, o$S, o$sigma2)
|
||||||
}
|
Phi.test <- K.test %*% o$W
|
||||||
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
|
yh <- predict(o$ridge, Phi.test)
|
||||||
scale=attr(o$X,"scaled:scale"))
|
yh <- yh * o$scale.y + o$center.y
|
||||||
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2)
|
|
||||||
yh <- Ktest %*% o$beta
|
|
||||||
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kfold.nakr <-
|
# compute the gaussian kernel between each row of X1 and each row of X2
|
||||||
function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL)
|
# should be done more efficiently (C code, threads)
|
||||||
|
gausskernel.nakr <-
|
||||||
|
function(X1, X2, sigma2)
|
||||||
{
|
{
|
||||||
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) }
|
if(is(X1,"vector"))
|
||||||
|
X1 <- as.matrix(X1)
|
||||||
n <- nrow(X)
|
if(is(X2,"vector"))
|
||||||
folds <- rep_len(1:K, n)
|
X2 <- as.matrix(X2)
|
||||||
folds <- sample(folds, n)
|
if (!(dim(X1)[2]==dim(X2)[2]))
|
||||||
maes <- matrix(data = NA, nrow = K, ncol = length(lambdas))
|
stop("X1 and X2 must have the same number of columns")
|
||||||
colnames(maes) <- lambdas
|
n1 <- dim(X1)[1]
|
||||||
lambda_idx <- 1
|
n2 <- dim(X2)[1]
|
||||||
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
dotX1 <- rowSums(X1*X1)
|
||||||
for(lambda in lambdas) {
|
dotX2 <- rowSums(X2*X2)
|
||||||
for(k in 1:K) {
|
res <- X1%*%t(X2)
|
||||||
fold <- folds == k
|
for(i in 1:n2) res[,i] <- exp((2*res[,i] - dotX1 - rep(dotX2[i],n1))/sigma2)
|
||||||
ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)]
|
return(res)
|
||||||
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep)
|
|
||||||
pred <- predict(nakrm, X[fold,])
|
|
||||||
maes[k,lambda_idx] <- mean(abs(pred - y[fold]))
|
|
||||||
print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx]))
|
|
||||||
}
|
|
||||||
lambda_idx <- lambda_idx + 1
|
|
||||||
}
|
|
||||||
mmaes <- colMeans(maes)
|
|
||||||
minmmaes <- min(mmaes)
|
|
||||||
bestlambda <- lambdas[which(mmaes == minmmaes)]
|
|
||||||
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms)
|
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ author: "Pierre-Edouard Portier"
|
|||||||
documentclass: book
|
documentclass: book
|
||||||
geometry: margin=2cm
|
geometry: margin=2cm
|
||||||
fontsize: 12pt
|
fontsize: 12pt
|
||||||
date: "5 Mar 2023"
|
date: "19 Mar 2023"
|
||||||
toc: true
|
toc: true
|
||||||
classoption: fleqn
|
classoption: fleqn
|
||||||
bibliography: intro_to_ml.bib
|
bibliography: intro_to_ml.bib
|
||||||
|
26
pad.R
26
pad.R
@ -27,29 +27,3 @@
|
|||||||
# adj = 0, cex = 0.6)
|
# adj = 0, cex = 0.6)
|
||||||
# points(0, 0, pch = 3)
|
# points(0, 0, pch = 3)
|
||||||
# ```
|
# ```
|
||||||
|
|
||||||
# source("19_b_nystroem_approximation_housing_experiment_code.R")
|
|
||||||
# rdat <- hous.dat.nakr$dat[sample(nrow(hous.dat.nakr$dat), size=2000, replace=FALSE),]
|
|
||||||
# X <- rdat[,!(colnames(rdat) %in% c('median_house_value'))]
|
|
||||||
# Y <- rdat[,c('median_house_value')]
|
|
||||||
# names(Y) <- rownames(X)
|
|
||||||
# rsplt <- splitdata(list(X = X, Y = Y), 0.8)
|
|
||||||
# X.entr <- rsplt$entr$X
|
|
||||||
# Y.entr <- rsplt$entr$Y
|
|
||||||
# X.test <- rsplt$test$X
|
|
||||||
# Y.test <- rsplt$test$Y
|
|
||||||
# source("18_kernel_ridge_regression_code.R")
|
|
||||||
# krm <- krr(X.entr, Y.entr)
|
|
||||||
# krm.yh <- predict(krm, X.test)
|
|
||||||
# krm.mae <- mean(abs(krm.yh - Y.test)) # 35445.1
|
|
||||||
# nakrm <- nakr(X.entr, Y.entr, nb.landmarks=1600)
|
|
||||||
# nakrm.yh <- predict(nakrm, X.test)
|
|
||||||
# nakrm.mae <- mean(abs(nakrm.yh - Y.test)) # 65454.18
|
|
||||||
# source("15_loocv_code.R")
|
|
||||||
# rm <- ridge(X.entr, Y.entr)
|
|
||||||
# rm.yh <- predict(rm, X.test)
|
|
||||||
# rm.mae <- mean(abs(rm.yh - Y.test)) # 45786.62
|
|
||||||
# library(randomForest)
|
|
||||||
# rfm <- randomForest(X.entr, Y.entr)
|
|
||||||
# rfm.yh <- predict(rfm, X.test)
|
|
||||||
# rfm.mae <- mean(abs(rfm.yh - Y.test)) # 34229.02
|
|
||||||
|
Loading…
Reference in New Issue
Block a user