2023-01-16 23:17:52 +00:00
|
|
|
# compute the gaussian kernel between each row of X1 and each row of X2
|
2023-03-13 12:01:40 +00:00
|
|
|
# should be done more efficiently (C code, threads)
|
2023-01-29 16:53:32 +00:00
|
|
|
gausskernel.nakr <-
|
2023-01-16 23:17:52 +00:00
|
|
|
function(X1, X2, sigma2)
|
|
|
|
{
|
|
|
|
n1 <- dim(X1)[1]
|
|
|
|
n2 <- dim(X2)[1]
|
|
|
|
K <- matrix(nrow = n1, ncol = n2)
|
|
|
|
for(i in 1:n1)
|
|
|
|
for(j in 1:n2)
|
2023-02-20 08:41:21 +00:00
|
|
|
K[i,j] <- sum((X1[i,] - X2[j,])^2)
|
2023-01-16 23:17:52 +00:00
|
|
|
K <- exp(-1*K/sigma2)
|
|
|
|
}
|
|
|
|
|
|
|
|
# Nystroem Approximation Kernel Ridge Regression
|
|
|
|
nakr <-
|
2023-03-13 12:01:40 +00:00
|
|
|
function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL)
|
2023-01-16 23:17:52 +00:00
|
|
|
{
|
|
|
|
X <- as.matrix(X)
|
|
|
|
n <- nrow(X)
|
|
|
|
p <- ncol(X)
|
|
|
|
|
|
|
|
if(is.null(sigma2)) { sigma2 <- p }
|
|
|
|
|
2023-03-13 12:01:40 +00:00
|
|
|
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
2023-01-16 23:17:52 +00:00
|
|
|
|
|
|
|
X <- scale(X)
|
|
|
|
y <- scale(y)
|
|
|
|
|
2023-03-13 12:01:40 +00:00
|
|
|
C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2)
|
|
|
|
K11 <- C[ldm$idx,]
|
2023-01-16 23:17:52 +00:00
|
|
|
|
|
|
|
svdK11 <- svd(K11)
|
2023-03-13 12:01:40 +00:00
|
|
|
# K11 often ill-formed -> drop small sv
|
2023-02-20 08:41:21 +00:00
|
|
|
ks <- which(svdK11$d < 1E-12)
|
|
|
|
if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)}
|
2023-01-22 20:27:38 +00:00
|
|
|
|
2023-01-16 23:17:52 +00:00
|
|
|
US <- svdK11$u[,1:k] %*% diag(1 / sqrt(svdK11$d[1:k]))
|
|
|
|
L <- C %*% US
|
2023-01-22 20:27:38 +00:00
|
|
|
Ginv <- t(L) %*% L
|
|
|
|
diag(Ginv) <- diag(Ginv) + lambda
|
|
|
|
Ginv <- chol2inv(chol(Ginv))
|
|
|
|
Ginv <- L %*% Ginv %*% t(L)
|
|
|
|
Ginv <- - Ginv / lambda
|
|
|
|
diag(Ginv) <- diag(Ginv) + (1/lambda)
|
|
|
|
coef <- Ginv %*% y
|
2023-01-16 23:17:52 +00:00
|
|
|
|
2023-01-22 20:27:38 +00:00
|
|
|
K11inv <- svdK11$v[,1:k] %*% diag(1/svdK11$d[1:k]) %*% t(svdK11$u[,1:k])
|
|
|
|
beta <- K11inv %*% t(C) %*% coef
|
2023-01-16 23:17:52 +00:00
|
|
|
|
|
|
|
r <- list(X=X,
|
|
|
|
y=y,
|
|
|
|
sigma2=sigma2,
|
2023-01-22 20:27:38 +00:00
|
|
|
lambda=lambda,
|
2023-03-13 12:01:40 +00:00
|
|
|
ldmidx=ldm$idx,
|
2023-01-16 23:17:52 +00:00
|
|
|
coef=coef,
|
2023-01-22 20:27:38 +00:00
|
|
|
beta=beta
|
2023-01-16 23:17:52 +00:00
|
|
|
)
|
|
|
|
class(r) <- "nakr"
|
|
|
|
return(r)
|
|
|
|
}
|
|
|
|
|
2023-03-13 12:01:40 +00:00
|
|
|
landmarks.nakr <-
|
|
|
|
function(X, landmarks, nb.landmarks)
|
|
|
|
{
|
|
|
|
n <- nrow(X)
|
|
|
|
if(is.null(landmarks)) {
|
|
|
|
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
|
|
|
|
ldmidx <- sample(1:n, nb.landmarks, replace = FALSE)
|
|
|
|
} else {
|
|
|
|
ldmidx <- which(rownames(X) %in% as.character(landmarks))
|
|
|
|
}
|
|
|
|
ldmidx <- sort(ldmidx)
|
|
|
|
ldmnms <- as.numeric(rownames(X)[ldmidx])
|
|
|
|
return(list(idx=ldmidx, nms=ldmnms))
|
|
|
|
}
|
|
|
|
|
2023-01-16 23:17:52 +00:00
|
|
|
predict.nakr <-
|
|
|
|
function(o, newdata)
|
|
|
|
{
|
|
|
|
if(class(o) != "nakr") {
|
|
|
|
warning("Object is not of class 'nakr'")
|
|
|
|
UseMethod("predict")
|
|
|
|
return(invisible(NULL))
|
|
|
|
}
|
|
|
|
newdata <- as.matrix(newdata)
|
|
|
|
if(ncol(o$X)!=ncol(newdata)) {
|
|
|
|
stop("Not the same number of variables btwn fitted nakr object and new data")
|
|
|
|
}
|
|
|
|
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
|
|
|
|
scale=attr(o$X,"scaled:scale"))
|
2023-03-13 12:01:40 +00:00
|
|
|
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2)
|
2023-01-22 20:27:38 +00:00
|
|
|
yh <- Ktest %*% o$beta
|
2023-01-16 23:17:52 +00:00
|
|
|
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
|
|
|
|
}
|
2023-01-22 20:27:38 +00:00
|
|
|
|
|
|
|
kfold.nakr <-
|
|
|
|
function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL)
|
|
|
|
{
|
|
|
|
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) }
|
|
|
|
|
2023-03-13 12:01:40 +00:00
|
|
|
n <- nrow(X)
|
|
|
|
folds <- rep_len(1:K, n)
|
|
|
|
folds <- sample(folds, n)
|
2023-01-22 20:27:38 +00:00
|
|
|
maes <- matrix(data = NA, nrow = K, ncol = length(lambdas))
|
|
|
|
colnames(maes) <- lambdas
|
|
|
|
lambda_idx <- 1
|
2023-03-13 12:01:40 +00:00
|
|
|
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
2023-01-22 20:27:38 +00:00
|
|
|
for(lambda in lambdas) {
|
|
|
|
for(k in 1:K) {
|
|
|
|
fold <- folds == k
|
2023-03-13 12:01:40 +00:00
|
|
|
ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)]
|
|
|
|
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep)
|
2023-01-22 20:27:38 +00:00
|
|
|
pred <- predict(nakrm, X[fold,])
|
|
|
|
maes[k,lambda_idx] <- mean(abs(pred - y[fold]))
|
2023-03-13 12:01:40 +00:00
|
|
|
print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx]))
|
2023-01-22 20:27:38 +00:00
|
|
|
}
|
|
|
|
lambda_idx <- lambda_idx + 1
|
|
|
|
}
|
|
|
|
mmaes <- colMeans(maes)
|
|
|
|
minmmaes <- min(mmaes)
|
|
|
|
bestlambda <- lambdas[which(mmaes == minmmaes)]
|
2023-03-13 12:01:40 +00:00
|
|
|
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms)
|
2023-02-20 08:41:21 +00:00
|
|
|
}
|