progress on nystroem approximation implementation ; there is still a problem: for a small sample size the result of krr should be the same as the one of nystroem approx krr where the number of landmarks is equal to the number of training points. For now, it isn't.

This commit is contained in:
Pierre-Edouard Portier 2023-03-13 13:01:40 +01:00
parent ef6db6e85a
commit 3c83c256b5
2 changed files with 36 additions and 25 deletions

View File

@ -99,13 +99,13 @@ X.entr <- hous.dat.nakr$entr$X
Y.entr <- hous.dat.nakr$entr$Y
X.test <- hous.dat.nakr$test$X
Y.test <- hous.dat.nakr$test$Y
# hous.dat.ca <- datasetHousing.mca()
# hous.cam <- mca(hous.dat.ca)
# nb.landmarks <- round(sqrt(nrow(X.entr)))
# landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
hous.dat.ca <- datasetHousing.mca()
hous.cam <- mca(hous.dat.ca)
nb.landmarks <- round(sqrt(nrow(X.entr)))
landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
# nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks)
# nakrm.yh <- predict(nakrm, X.test)
# nakrm.mae <- mean(abs(nakrm.yh - Y.test))
# nakrm.yh.train <- predict(nakrm, X.entr)
# rev(order(abs(nakrm.yh.train - Y.entr)))[1:20]
# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]])
# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]])

View File

@ -1,5 +1,5 @@
# compute the gaussian kernel between each row of X1 and each row of X2
# should be done more efficiently
# should be done more efficiently (C code, threads)
gausskernel.nakr <-
function(X1, X2, sigma2)
{
@ -14,7 +14,7 @@ function(X1, X2, sigma2)
# Nystroem Approximation Kernel Ridge Regression
nakr <-
function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL)
{
X <- as.matrix(X)
n <- nrow(X)
@ -22,23 +22,16 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
if(is.null(sigma2)) { sigma2 <- p }
if(is.null(landmarks)) {
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
splidx <- sample(1:n, nb.landmarks, replace = FALSE)
} else {
splidx <- which(rownames(X) %in% as.character(landmarks))
nb.landmarks <- length(splidx)
}
splidx <- sort(splidx)
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
X <- scale(X)
y <- scale(y)
C <- gausskernel.nakr(X, as.matrix(X[splidx,]), sigma2)
K11 <- C[splidx,]
C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2)
K11 <- C[ldm$idx,]
svdK11 <- svd(K11)
# K11 will often be ill-formed, thus we drop the bottom singular values
# K11 often ill-formed -> drop small sv
ks <- which(svdK11$d < 1E-12)
if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)}
@ -59,7 +52,7 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
y=y,
sigma2=sigma2,
lambda=lambda,
splidx=splidx,
ldmidx=ldm$idx,
coef=coef,
beta=beta
)
@ -67,6 +60,21 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
return(r)
}
landmarks.nakr <-
function(X, landmarks, nb.landmarks)
{
n <- nrow(X)
if(is.null(landmarks)) {
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
ldmidx <- sample(1:n, nb.landmarks, replace = FALSE)
} else {
ldmidx <- which(rownames(X) %in% as.character(landmarks))
}
ldmidx <- sort(ldmidx)
ldmnms <- as.numeric(rownames(X)[ldmidx])
return(list(idx=ldmidx, nms=ldmnms))
}
predict.nakr <-
function(o, newdata)
{
@ -81,7 +89,7 @@ function(o, newdata)
}
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
scale=attr(o$X,"scaled:scale"))
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$splidx,]), o$sigma2)
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2)
yh <- Ktest %*% o$beta
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
}
@ -91,23 +99,26 @@ function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL
{
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) }
N <- nrow(X)
folds <- rep_len(1:K, N)
folds <- sample(folds, N)
n <- nrow(X)
folds <- rep_len(1:K, n)
folds <- sample(folds, n)
maes <- matrix(data = NA, nrow = K, ncol = length(lambdas))
colnames(maes) <- lambdas
lambda_idx <- 1
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
for(lambda in lambdas) {
for(k in 1:K) {
fold <- folds == k
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks, nb.landmarks)
ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)]
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep)
pred <- predict(nakrm, X[fold,])
maes[k,lambda_idx] <- mean(abs(pred - y[fold]))
print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx]))
}
lambda_idx <- lambda_idx + 1
}
mmaes <- colMeans(maes)
minmmaes <- min(mmaes)
bestlambda <- lambdas[which(mmaes == minmmaes)]
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks, nb.landmarks)
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms)
}