progress on nystroem approximation implementation ; there is still a problem: for a small sample size the result of krr should be the same as the one of nystroem approx krr where the number of landmarks is equal to the number of training points. For now, it isn't.
This commit is contained in:
parent
ef6db6e85a
commit
3c83c256b5
|
@ -99,13 +99,13 @@ X.entr <- hous.dat.nakr$entr$X
|
||||||
Y.entr <- hous.dat.nakr$entr$Y
|
Y.entr <- hous.dat.nakr$entr$Y
|
||||||
X.test <- hous.dat.nakr$test$X
|
X.test <- hous.dat.nakr$test$X
|
||||||
Y.test <- hous.dat.nakr$test$Y
|
Y.test <- hous.dat.nakr$test$Y
|
||||||
# hous.dat.ca <- datasetHousing.mca()
|
hous.dat.ca <- datasetHousing.mca()
|
||||||
# hous.cam <- mca(hous.dat.ca)
|
hous.cam <- mca(hous.dat.ca)
|
||||||
# nb.landmarks <- round(sqrt(nrow(X.entr)))
|
nb.landmarks <- round(sqrt(nrow(X.entr)))
|
||||||
# landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
|
landmarks <- landmarks.by.ca.clst(hous.cam, X.entr, nb.landmarks)
|
||||||
# nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks)
|
# nakrm <- kfold.nakr(X.entr, Y.entr, landmarks=landmarks)
|
||||||
# nakrm.yh <- predict(nakrm, X.test)
|
# nakrm.yh <- predict(nakrm, X.test)
|
||||||
# nakrm.mae <- mean(abs(nakrm.yh - Y.test))
|
# nakrm.mae <- mean(abs(nakrm.yh - Y.test))
|
||||||
# nakrm.yh.train <- predict(nakrm, X.entr)
|
# nakrm.yh.train <- predict(nakrm, X.entr)
|
||||||
# rev(order(abs(nakrm.yh.train - Y.entr)))[1:20]
|
# rev(order(abs(nakrm.yh.train - Y.entr)))[1:20]
|
||||||
# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]])
|
# hist(Y.entr[rev(order(abs(nakrm.yh.train - Y.entr)))[1:200]])
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# compute the gaussian kernel between each row of X1 and each row of X2
|
# compute the gaussian kernel between each row of X1 and each row of X2
|
||||||
# should be done more efficiently
|
# should be done more efficiently (C code, threads)
|
||||||
gausskernel.nakr <-
|
gausskernel.nakr <-
|
||||||
function(X1, X2, sigma2)
|
function(X1, X2, sigma2)
|
||||||
{
|
{
|
||||||
|
@ -14,7 +14,7 @@ function(X1, X2, sigma2)
|
||||||
|
|
||||||
# Nystroem Approximation Kernel Ridge Regression
|
# Nystroem Approximation Kernel Ridge Regression
|
||||||
nakr <-
|
nakr <-
|
||||||
function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
function(X, y, sigma2=NULL, lambda=1E-8, landmarks=NULL, nb.landmarks=NULL)
|
||||||
{
|
{
|
||||||
X <- as.matrix(X)
|
X <- as.matrix(X)
|
||||||
n <- nrow(X)
|
n <- nrow(X)
|
||||||
|
@ -22,23 +22,16 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||||
|
|
||||||
if(is.null(sigma2)) { sigma2 <- p }
|
if(is.null(sigma2)) { sigma2 <- p }
|
||||||
|
|
||||||
if(is.null(landmarks)) {
|
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
||||||
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
|
|
||||||
splidx <- sample(1:n, nb.landmarks, replace = FALSE)
|
|
||||||
} else {
|
|
||||||
splidx <- which(rownames(X) %in% as.character(landmarks))
|
|
||||||
nb.landmarks <- length(splidx)
|
|
||||||
}
|
|
||||||
splidx <- sort(splidx)
|
|
||||||
|
|
||||||
X <- scale(X)
|
X <- scale(X)
|
||||||
y <- scale(y)
|
y <- scale(y)
|
||||||
|
|
||||||
C <- gausskernel.nakr(X, as.matrix(X[splidx,]), sigma2)
|
C <- gausskernel.nakr(X, as.matrix(X[ldm$idx,]), sigma2)
|
||||||
K11 <- C[splidx,]
|
K11 <- C[ldm$idx,]
|
||||||
|
|
||||||
svdK11 <- svd(K11)
|
svdK11 <- svd(K11)
|
||||||
# K11 will often be ill-formed, thus we drop the bottom singular values
|
# K11 often ill-formed -> drop small sv
|
||||||
ks <- which(svdK11$d < 1E-12)
|
ks <- which(svdK11$d < 1E-12)
|
||||||
if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)}
|
if (length(ks)>0) {k <- ks[1]} else {k <- length(svdK11$d)}
|
||||||
|
|
||||||
|
@ -59,7 +52,7 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||||
y=y,
|
y=y,
|
||||||
sigma2=sigma2,
|
sigma2=sigma2,
|
||||||
lambda=lambda,
|
lambda=lambda,
|
||||||
splidx=splidx,
|
ldmidx=ldm$idx,
|
||||||
coef=coef,
|
coef=coef,
|
||||||
beta=beta
|
beta=beta
|
||||||
)
|
)
|
||||||
|
@ -67,6 +60,21 @@ function(X, y, sigma2=NULL, lambda=1E-4, landmarks=NULL, nb.landmarks=NULL)
|
||||||
return(r)
|
return(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
landmarks.nakr <-
|
||||||
|
function(X, landmarks, nb.landmarks)
|
||||||
|
{
|
||||||
|
n <- nrow(X)
|
||||||
|
if(is.null(landmarks)) {
|
||||||
|
if(is.null(nb.landmarks)) { nb.landmarks <- round(sqrt(n)) }
|
||||||
|
ldmidx <- sample(1:n, nb.landmarks, replace = FALSE)
|
||||||
|
} else {
|
||||||
|
ldmidx <- which(rownames(X) %in% as.character(landmarks))
|
||||||
|
}
|
||||||
|
ldmidx <- sort(ldmidx)
|
||||||
|
ldmnms <- as.numeric(rownames(X)[ldmidx])
|
||||||
|
return(list(idx=ldmidx, nms=ldmnms))
|
||||||
|
}
|
||||||
|
|
||||||
predict.nakr <-
|
predict.nakr <-
|
||||||
function(o, newdata)
|
function(o, newdata)
|
||||||
{
|
{
|
||||||
|
@ -81,7 +89,7 @@ function(o, newdata)
|
||||||
}
|
}
|
||||||
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
|
newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
|
||||||
scale=attr(o$X,"scaled:scale"))
|
scale=attr(o$X,"scaled:scale"))
|
||||||
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$splidx,]), o$sigma2)
|
Ktest <- gausskernel.nakr(newdata, as.matrix(o$X[o$ldmidx,]), o$sigma2)
|
||||||
yh <- Ktest %*% o$beta
|
yh <- Ktest %*% o$beta
|
||||||
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
|
yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
|
||||||
}
|
}
|
||||||
|
@ -91,23 +99,26 @@ function(X, y, K=5, lambdas=NULL, sigma2=NULL, landmarks=NULL, nb.landmarks=NULL
|
||||||
{
|
{
|
||||||
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) }
|
if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2, by=1) }
|
||||||
|
|
||||||
N <- nrow(X)
|
n <- nrow(X)
|
||||||
folds <- rep_len(1:K, N)
|
folds <- rep_len(1:K, n)
|
||||||
folds <- sample(folds, N)
|
folds <- sample(folds, n)
|
||||||
maes <- matrix(data = NA, nrow = K, ncol = length(lambdas))
|
maes <- matrix(data = NA, nrow = K, ncol = length(lambdas))
|
||||||
colnames(maes) <- lambdas
|
colnames(maes) <- lambdas
|
||||||
lambda_idx <- 1
|
lambda_idx <- 1
|
||||||
|
ldm <- landmarks.nakr(X, landmarks, nb.landmarks)
|
||||||
for(lambda in lambdas) {
|
for(lambda in lambdas) {
|
||||||
for(k in 1:K) {
|
for(k in 1:K) {
|
||||||
fold <- folds == k
|
fold <- folds == k
|
||||||
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks, nb.landmarks)
|
ldmnms2keep <- ldm$nms[! ldm$idx %in% which(fold)]
|
||||||
|
nakrm <- nakr(X[!fold,], y[!fold], sigma2, lambda, landmarks=ldmnms2keep)
|
||||||
pred <- predict(nakrm, X[fold,])
|
pred <- predict(nakrm, X[fold,])
|
||||||
maes[k,lambda_idx] <- mean(abs(pred - y[fold]))
|
maes[k,lambda_idx] <- mean(abs(pred - y[fold]))
|
||||||
|
print(paste("lbd =", lambda, "; k =", k, "; mae =", maes[k,lambda_idx]))
|
||||||
}
|
}
|
||||||
lambda_idx <- lambda_idx + 1
|
lambda_idx <- lambda_idx + 1
|
||||||
}
|
}
|
||||||
mmaes <- colMeans(maes)
|
mmaes <- colMeans(maes)
|
||||||
minmmaes <- min(mmaes)
|
minmmaes <- min(mmaes)
|
||||||
bestlambda <- lambdas[which(mmaes == minmmaes)]
|
bestlambda <- lambdas[which(mmaes == minmmaes)]
|
||||||
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks, nb.landmarks)
|
nakrm <- nakr(X, y, sigma2, bestlambda, landmarks=ldm$nms)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue