diff --git a/19_b_nystroem_approximation_housing_experiment_code.R b/19_b_nystroem_approximation_housing_experiment_code.R index 3c6d285..8af0a3c 100644 --- a/19_b_nystroem_approximation_housing_experiment_code.R +++ b/19_b_nystroem_approximation_housing_experiment_code.R @@ -99,13 +99,13 @@ X.entr <- hous.dat.nakr$entr$X Y.entr <- hous.dat.nakr$entr$Y X.test <- hous.dat.nakr$test$X Y.test <- hous.dat.nakr$test$Y -# hous.dat.ca <- datasetHousing.mca() -# hous.cam <- mca(hous.dat.ca) -# nb.landmarks <- round(sqrt(nrow(X.entr))) -# landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks) +hous.dat.ca <- datasetHousing.mca() +hous.cam <- mca(hous.dat.ca) +nb.landmarks <- round(sqrt(nrow(X.entr))) +landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks) # nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks) # nakrm.yh <- predict(nakrm, X.test) # nakrm.mae <- mean(abs(nakrm.yh - Y.test)) # nakrm.yh.train <- predict(nakrm, X.entr) # rev(order(abs(nakrm.yh.train - Y.entr)))[1:20] -# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]]) \ No newline at end of file +# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]]) diff --git a/19_nystroem_approximation_code.R b/19_nystroem_approximation_code.R index c0a534b..8bc05f0 100644 --- a/19_nystroem_approximation_code.R +++ b/19_nystroem_approximation_code.R @@ -1,5 +1,5 @@ # compute the gaussian kernel between each row of X1 and each row of X2 -# should be done more efficiently +# should be done more efficiently (C code, threads) gausskernel.nakr <- function(X1, X2, sigma2) { @@ -14,7 +14,7 @@ function(X1, X2, sigma2) # Nystroem Approximation Kernel Ridge Regression nakr <- -function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) +function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL) { X <- as.matrix(X) n <- nrow(X) @@ -22,23 +22,16 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) if(is.null(sigma2)) { sigma2 <- p } - if(is.null(landmarks)) { - if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) } - splidx <- sample(1:n, nb.landmarks, replace = FALSE) - } else { - splidx <- which(rownames(X) %in% as.character(landmarks)) - nb.landmarks <- length(splidx) - } - splidx <- sort(splidx) + ldm <- landmarks.nakr(X, landmarks, nb.landmarks) X <- scale(X) y <- scale(y) - C <- gausskernel.nakr(X, as.matrix(X[splidx,]), sigma2) - K11 <- C[splidx,] + C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2) + K11 <- C[ldm$idx,] svdK11 <- svd(K11) - # K11 will often be ill-formed, thus we drop the bottom singular values + # K11 often ill-formed -> drop small sv ks <- which(svdK11$d < 1E-12) if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)} @@ -59,7 +52,7 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) y=y, sigma2=sigma2, lambda=lambda, - splidx=splidx, + ldmidx=ldm$idx, coef=coef, beta=beta ) @@ -67,6 +60,21 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL) return(r) } +landmarks.nakr <- +function(X, landmarks, nb.landmarks) +{ + n <- nrow(X) + if(is.null(landmarks)) { + if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) } + ldmidx <- sample(1:n, nb.landmarks, replace = FALSE) + } else { + ldmidx <- which(rownames(X) %in% as.character(landmarks)) + } + ldmidx <- sort(ldmidx) + ldmnms <- as.numeric(rownames(X)[ldmidx]) + return(list(idx=ldmidx, nms=ldmnms)) +} + predict.nakr <- function(o, newdata) { @@ -81,7 +89,7 @@ function(o, newdata) } newdata <- scale(newdata,center=attr(o$X,"scaled:center"), scale=attr(o$X,"scaled:scale")) - Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$splidx,]), o$sigma2) + Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2) yh <- Ktest %*% o$beta yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center") } @@ -91,23 +99,26 @@ function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL { if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) } - N <- nrow(X) - folds <- rep_len(1:K, N) - folds <- sample(folds, N) + n <- nrow(X) + folds <- rep_len(1:K, n) + folds <- sample(folds, n) maes <- matrix(data = NA, nrow = K, ncol = length(lambdas)) colnames(maes) <- lambdas lambda_idx <- 1 + ldm <- landmarks.nakr(X, landmarks, nb.landmarks) for(lambda in lambdas) { for(k in 1:K) { fold <- folds == k - nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks, nb.landmarks) + ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)] + nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep) pred <- predict(nakrm, X[fold,]) maes[k,lambda_idx] <- mean(abs(pred - y[fold])) + print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx])) } lambda_idx <- lambda_idx + 1 } mmaes <- colMeans(maes) minmmaes <- min(mmaes) bestlambda <- lambdas[which(mmaes == minmmaes)] - nakrm <- nakr(X, y, sigma2, bestlambda, landmarks, nb.landmarks) + nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms) }