From 3c83c256b5a965f6e31f32ba8be746068f2aa9df Mon Sep 17 00:00:00 2001 From: Pierre-Edouard Portier Date: Mon, 13 Mar 2023 13:01:40 +0100 Subject: [PATCH] progress on nystroem approximation implementation ; there is still a problem: for a small sample size the result of krr should be the same as the one of nystroem approx krr where the number of landmarks is equal to the number of training points. For now, it isn't. --- ...em_approximation_housing_experiment_code.R | 10 ++-- 19_nystroem_approximation_code.R | 51 +++++++++++-------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/19_b_nystroem_approximation_housing_experiment_code.R b/19_b_nystroem_approximation_housing_experiment_code.R index 3c6d285..8af0a3c 100644 --- a/19_b_nystroem_approximation_housing_experiment_code.R +++ b/19_b_nystroem_approximation_housing_experiment_code.R @@ -99,13 +99,13 @@ X.entr <- hous.dat.nakr$entr$X Y.entr <- hous.dat.nakr$entr$Y X.test <- hous.dat.nakr$test$X Y.test <- hous.dat.nakr$test$Y -# hous.dat.ca <- datasetHousing.mca() -# hous.cam <- mca(hous.dat.ca) -# nb.landmarks <- round(sqrt(nrow(X.entr))) -# landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks) +hous.dat.ca <- datasetHousing.mca() +hous.cam <- mca(hous.dat.ca) +nb.landmarks <- round(sqrt(nrow(X.entr))) +landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks) # nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks) # nakrm.yh <- predict(nakrm, X.test) # nakrm.mae <- mean(abs(nakrm.yh - Y.test)) # nakrm.yh.train <- predict(nakrm, X.entr) # rev(order(abs(nakrm.yh.train - Y.entr)))[1:20] -# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]]) \ No newline at end of file +# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]]) diff --git a/19_nystroem_approximation_code.R b/19_nystroem_approximation_code.R index c0a534b..8bc05f0 100644 --- a/19_nystroem_approximation_code.R +++ b/19_nystroem_approximation_code.R @@ -1,5 +1,5 @@ # compute the gaussian kernel between each row of X1 and each row of X2 -# should be done more efficiently +# should be done more efficiently (C code, threads) gausskernel.nakr <- function(X1, X2, sigma2) { @@ -14,7 +14,7 @@ function(X1, X2, sigma2) # Nystroem Approximation Kernel Ridge Regression nakr <- -function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) +function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL) { X <- as.matrix(X) n <- nrow(X) @@ -22,23 +22,16 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) if(is.null(sigma2)) { sigma2 <- p } - if(is.null(landmarks)) { - if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) } - splidx <- sample(1:n, nb.landmarks, replace = FALSE) - } else { - splidx <- which(rownames(X) %in% as.character(landmarks)) - nb.landmarks <- length(splidx) - } - splidx <- sort(splidx) + ldm <- landmarks.nakr(X, landmarks, nb.landmarks) X <- scale(X) y <- scale(y) - C <- gausskernel.nakr(X, as.matrix(X[splidx,]), sigma2) - K11 <- C[splidx,] + C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2) + K11 <- C[ldm$idx,] svdK11 <- svd(K11) - # K11 will often be ill-formed, thus we drop the bottom singular values + # K11 often ill-formed -> drop small sv ks <- which(svdK11$d < 1E-12) if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)} @@ -59,7 +52,7 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) y=y, sigma2=sigma2, lambda=lambda, - splidx=splidx, + ldmidx=ldm$idx, coef=coef, beta=beta ) @@ -67,6 +60,21 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) return(r) } +landmarks.nakr <- +function(X, landmarks, nb.landmarks) +{ + n <- nrow(X) + if(is.null(landmarks)) { + if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) } + ldmidx <- sample(1:n, nb.landmarks, replace = FALSE) + } else { + ldmidx <- which(rownames(X) %in% as.character(landmarks)) + } + ldmidx <- sort(ldmidx) + ldmnms <- as.numeric(rownames(X)[ldmidx]) + return(list(idx=ldmidx, nms=ldmnms)) +} + predict.nakr <- function(o, newdata) { @@ -81,7 +89,7 @@ function(o, newdata) } newdata <- scale(newdata,center=attr(o$X,"scaled:center"), scale=attr(o$X,"scaled:scale")) - Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$splidx,]), o$sigma2) + Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2) yh <- Ktest %*% o$beta yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center") } @@ -91,23 +99,26 @@ function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL { if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) } - N <- nrow(X) - folds <- rep_len(1:K, N) - folds <- sample(folds, N) + n <- nrow(X) + folds <- rep_len(1:K, n) + folds <- sample(folds, n) maes <- matrix(data = NA, nrow = K, ncol = length(lambdas)) colnames(maes) <- lambdas lambda_idx <- 1 + ldm <- landmarks.nakr(X, landmarks, nb.landmarks) for(lambda in lambdas) { for(k in 1:K) { fold <- folds == k - nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks, nb.landmarks) + ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)] + nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep) pred <- predict(nakrm, X[fold,]) maes[k,lambda_idx] <- mean(abs(pred - y[fold])) + print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx])) } lambda_idx <- lambda_idx + 1 } mmaes <- colMeans(maes) minmmaes <- min(mmaes) bestlambda <- lambdas[which(mmaes == minmmaes)] - nakrm <- nakr(X, y, sigma2, bestlambda, landmarks, nb.landmarks) + nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms) }