intro_to_ml/19_nystroem_approximation_c...

94 lines
2.3 KiB
R
Raw Normal View History

2023-01-16 23:17:52 +00:00
# compute the gaussian kernel between each row of X1 and each row of X2
# should be done more efficiently
gausskernel <-
function(X1, X2, sigma2)
{
n1 <- dim(X1)[1]
n2 <- dim(X2)[1]
K <- matrix(nrow = n1, ncol = n2)
for(i in 1:n1)
for(j in 1:n2)
K[i,j] <- sum(X1[i,] - X2[j,])^2
K <- exp(-1*K/sigma2)
}
# Nystroem Approximation Kernel Ridge Regression
nakr <-
function(X, y, sigma2=NULL, lambdas=NULL, splidx=NULL, nspl=NULL)
{
X <- as.matrix(X)
n <- nrow(X)
p <- ncol(X)
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2,by=0.5) }
if(is.null(sigma2)) { sigma2 <- p }
if(is.null(splidx)) {
if(is.null(nspl)) { nspl <- round(sqrt(n)) }
splidx <- sample(1:n, nspl, replace = FALSE)
} else {
nspl <- length(splidx)
}
splidx <- sort(splidx)
X <- scale(X)
y <- scale(y)
C <- gausskernel(X, as.matrix(X[splidx,]), sigma2)
K11 <- C[splidx,]
svdK11 <- svd(K11)
# K11 will often be ill-formed, thus we drop the bottom singular values
k <- 0.8 * nspl
US <- svdK11$u[,1:k] %*% diag(1 / sqrt(svdK11$d[1:k]))
L <- C %*% US
LtL <- t(L) %*% L
looe <- double(length(lambdas))
coef <- matrix(data = NA, nrow = n, ncol = length(lambdas))
i <- 1
for(lambda in lambdas) {
Ginv <- LtL
diag(Ginv) <- diag(Ginv) + lambda
Ginv <- solve(Ginv)
Ginv <- L %*% Ginv %*% t(L)
Ginv <- - Ginv / lambda
diag(Ginv) <- diag(Ginv) + (1/lambda)
coef[,i] <- Ginv %*% y
looe[i] <- mean((coef[,i]/diag(Ginv))^2)
i <- i+1
}
looe.min <- min(looe)
lambda <- lambdas[which(looe == looe.min)]
coef <- coef[,which(looe == looe.min)]
r <- list(X=X,
y=y,
sigma2=sigma2,
coef=coef,
looe=looe.min,
lambda=lambda
)
class(r) <- "nakr"
return(r)
}
predict.nakr <-
function(o, newdata)
{
if(class(o) != "nakr") {
warning("Object is not of class 'nakr'")
UseMethod("predict")
return(invisible(NULL))
}
newdata <- as.matrix(newdata)
if(ncol(o$X)!=ncol(newdata)) {
stop("Not the same number of variables btwn fitted nakr object and new data")
}
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
scale=attr(o$X,"scaled:scale"))
Ktest <- gausskernel(newdata, o$X, o$sigma2)
yh <- Ktest %*% o$coef
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
}