progress on nystroem approximation implementation ; there is still a problem: for a small sample size the result of krr should be the same as the one of nystroem approx krr where the number of landmarks is equal to the number of training points. For now, it isn't.
This commit is contained in:
parent
ef6db6e85a
commit
3c83c256b5
@ -99,10 +99,10 @@ X.entr <- hous.dat.nakr$entr$X
|
||||
Y.entr <- hous.dat.nakr$entr$Y
|
||||
X.test <- hous.dat.nakr$test$X
|
||||
Y.test <- hous.dat.nakr$test$Y
|
||||
# hous.dat.ca <- datasetHousing.mca()
|
||||
# hous.cam <- mca(hous.dat.ca)
|
||||
# nb.landmarks <- round(sqrt(nrow(X.entr)))
|
||||
# landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
|
||||
hous.dat.ca <- datasetHousing.mca()
|
||||
hous.cam <- mca(hous.dat.ca)
|
||||
nb.landmarks <- round(sqrt(nrow(X.entr)))
|
||||
landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
|
||||
# nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks)
|
||||
# nakrm.yh <- predict(nakrm, X.test)
|
||||
# nakrm.mae <- mean(abs(nakrm.yh - Y.test))
|
||||
|
@ -1,5 +1,5 @@
|
||||
# compute the gaussian kernel between each row of X1 and each row of X2
|
||||
# should be done more efficiently
|
||||
# should be done more efficiently (C code, threads)
|
||||
gausskernel.nakr <-
|
||||
function(X1, X2, sigma2)
|
||||
{
|
||||
@ -14,7 +14,7 @@ function(X1, X2, sigma2)
|
||||
|
||||
# Nystroem Approximation Kernel Ridge Regression
|
||||
nakr <-
|
||||
function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||
function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL)
|
||||
{
|
||||
X <- as.matrix(X)
|
||||
n <- nrow(X)
|
||||
@ -22,23 +22,16 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||
|
||||
if(is.null(sigma2)) { sigma2 <- p }
|
||||
|
||||
if(is.null(landmarks)) {
|
||||
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
|
||||
splidx <- sample(1:n, nb.landmarks, replace = FALSE)
|
||||
} else {
|
||||
splidx <- which(rownames(X) %in% as.character(landmarks))
|
||||
nb.landmarks <- length(splidx)
|
||||
}
|
||||
splidx <- sort(splidx)
|
||||
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
||||
|
||||
X <- scale(X)
|
||||
y <- scale(y)
|
||||
|
||||
C <- gausskernel.nakr(X, as.matrix(X[splidx,]), sigma2)
|
||||
K11 <- C[splidx,]
|
||||
C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2)
|
||||
K11 <- C[ldm$idx,]
|
||||
|
||||
svdK11 <- svd(K11)
|
||||
# K11 will often be ill-formed, thus we drop the bottom singular values
|
||||
# K11 often ill-formed -> drop small sv
|
||||
ks <- which(svdK11$d < 1E-12)
|
||||
if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)}
|
||||
|
||||
@ -59,7 +52,7 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||
y=y,
|
||||
sigma2=sigma2,
|
||||
lambda=lambda,
|
||||
splidx=splidx,
|
||||
ldmidx=ldm$idx,
|
||||
coef=coef,
|
||||
beta=beta
|
||||
)
|
||||
@ -67,6 +60,21 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||
return(r)
|
||||
}
|
||||
|
||||
landmarks.nakr <-
|
||||
function(X, landmarks, nb.landmarks)
|
||||
{
|
||||
n <- nrow(X)
|
||||
if(is.null(landmarks)) {
|
||||
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
|
||||
ldmidx <- sample(1:n, nb.landmarks, replace = FALSE)
|
||||
} else {
|
||||
ldmidx <- which(rownames(X) %in% as.character(landmarks))
|
||||
}
|
||||
ldmidx <- sort(ldmidx)
|
||||
ldmnms <- as.numeric(rownames(X)[ldmidx])
|
||||
return(list(idx=ldmidx, nms=ldmnms))
|
||||
}
|
||||
|
||||
predict.nakr <-
|
||||
function(o, newdata)
|
||||
{
|
||||
@ -81,7 +89,7 @@ function(o, newdata)
|
||||
}
|
||||
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
|
||||
scale=attr(o$X,"scaled:scale"))
|
||||
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$splidx,]), o$sigma2)
|
||||
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2)
|
||||
yh <- Ktest %*% o$beta
|
||||
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
|
||||
}
|
||||
@ -91,23 +99,26 @@ function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL
|
||||
{
|
||||
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) }
|
||||
|
||||
N <- nrow(X)
|
||||
folds <- rep_len(1:K, N)
|
||||
folds <- sample(folds, N)
|
||||
n <- nrow(X)
|
||||
folds <- rep_len(1:K, n)
|
||||
folds <- sample(folds, n)
|
||||
maes <- matrix(data = NA, nrow = K, ncol = length(lambdas))
|
||||
colnames(maes) <- lambdas
|
||||
lambda_idx <- 1
|
||||
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
||||
for(lambda in lambdas) {
|
||||
for(k in 1:K) {
|
||||
fold <- folds == k
|
||||
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks, nb.landmarks)
|
||||
ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)]
|
||||
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep)
|
||||
pred <- predict(nakrm, X[fold,])
|
||||
maes[k,lambda_idx] <- mean(abs(pred - y[fold]))
|
||||
print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx]))
|
||||
}
|
||||
lambda_idx <- lambda_idx + 1
|
||||
}
|
||||
mmaes <- colMeans(maes)
|
||||
minmmaes <- min(mmaes)
|
||||
bestlambda <- lambdas[which(mmaes == minmmaes)]
|
||||
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks, nb.landmarks)
|
||||
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user