implementation of nystroem approx

2023-01-17 00:17:52 +01:00 · 2023-01-17 00:17:52 +01:00 · 2ad99f1adb
commit 2ad99f1adb
parent 1d0386b45d
2 changed files with 121 additions and 2 deletions
--- a/19_nystroem_approximation.Rmd
+++ b/19_nystroem_approximation.Rmd
@ -3,7 +3,7 @@
 ```{r, include=FALSE}
 source("01_intro_code.R", local = knitr::knit_global())
 source("04_validation_croisee_code.R", local = knitr::knit_global())
-source("18_kernel_ridge_regression_code.R", local = knitr::knit_global())
+source("19_nystroem_approximation_code.R", local = knitr::knit_global())
 ```

 ## Approximation de Nyström d'un noyau
@ -182,4 +182,30 @@ $$
 = \{& \text{Algèbre linéaire : si $\mathbf{A}$ et $\mathbf{B}$ sont des matrices carrés inversibles alors $(\mathbf{A}\mathbf{B})^{-1} = \mathbf{B}^{-1}\mathbf{A}^{-1}$} \} \\
 & \lambda^{-1}\mathbf{I_n} - \lambda^{-1}\mathbf{L}\left(\lambda\mathbf{I_m}+\mathbf{L}^T\mathbf{L}\right)^{-1}\mathbf{L}^T \\
 \end{aligned}
-$$
+$$
+
+## Exemple sur un jeu de données synthétique
+
+Nous reprenons le jeu de données synthétique utilisé depuis le premier module pour tester l'implémentation de la régression ridge à noyau gaussien.
+
+```{r}
+  set.seed(1123)
+  n <- 100
+  data = gendat(n,0.2)
+  splitres <- splitdata(data,0.8)
+  entr <- splitres$entr
+  test <- splitres$test
+
+  nakrm <- nakr(entr$X,entr$Y, nspl=25)
+  yh <- predict(nakrm,test$X)
+  plt(test,f)
+  points(test$X, yh, pch=4)
+  testmae <- mean(abs(yh-test$Y))
+```
+
+Ce modèle atteint une erreur absolue moyenne de `r testmae` sur le jeu de test.
+
+## Annexe code source
+
+```{r, code=readLines("19_nystroem_approximation_code.R"), eval=FALSE}
+```
--- a/19_nystroem_approximation_code.R
+++ b/19_nystroem_approximation_code.R
@ -0,0 +1,93 @@
+# compute the gaussian kernel between each row of X1 and each row of X2
+# should be done more efficiently
+gausskernel <-
+function(X1, X2, sigma2)
+{
+  n1 <- dim(X1)[1]
+  n2 <- dim(X2)[1]
+  K <- matrix(nrow = n1, ncol = n2)
+  for(i in 1:n1)
+    for(j in 1:n2)
+      K[i,j] <- sum(X1[i,] - X2[j,])^2
+  K <- exp(-1*K/sigma2)
+}
+
+# Nystroem Approximation Kernel Ridge Regression
+nakr <-
+function(X, y, sigma2=NULL, lambdas=NULL, splidx=NULL, nspl=NULL)
+{
+  X <- as.matrix(X)
+  n <- nrow(X)
+  p <- ncol(X)
+
+  if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2,by=0.5) }
+  if(is.null(sigma2)) { sigma2 <- p }
+
+  if(is.null(splidx)) {
+    if(is.null(nspl)) { nspl <- round(sqrt(n)) }
+    splidx <- sample(1:n, nspl, replace = FALSE)
+  } else {
+    nspl <- length(splidx)
+  }
+  splidx <- sort(splidx)
+
+  X <- scale(X)
+  y <- scale(y)
+
+  C <- gausskernel(X, as.matrix(X[splidx,]), sigma2)
+  K11 <- C[splidx,]
+
+  svdK11 <- svd(K11)
+  # K11 will often be ill-formed, thus we drop the bottom singular values
+  k <- 0.8 * nspl
+  US <- svdK11$u[,1:k] %*% diag(1 / sqrt(svdK11$d[1:k]))
+  L <- C %*% US
+  LtL <- t(L) %*% L
+
+  looe <- double(length(lambdas))
+  coef <- matrix(data = NA, nrow = n, ncol = length(lambdas))
+  i <- 1
+  for(lambda in lambdas) {
+    Ginv <- LtL
+    diag(Ginv) <- diag(Ginv) + lambda
+    Ginv <- solve(Ginv)
+    Ginv <- L %*% Ginv %*% t(L)
+    Ginv <- - Ginv / lambda
+    diag(Ginv) <- diag(Ginv) + (1/lambda)
+    coef[,i] <- Ginv %*% y
+    looe[i] <- mean((coef[,i]/diag(Ginv))^2)
+    i <- i+1
+  }
+  looe.min <- min(looe)
+  lambda <- lambdas[which(looe == looe.min)]
+  coef <- coef[,which(looe == looe.min)]
+
+  r <- list(X=X,
+            y=y,
+            sigma2=sigma2,
+            coef=coef,
+            looe=looe.min,
+            lambda=lambda
+           )
+  class(r) <- "nakr"
+  return(r)
+}
+
+predict.nakr <-
+function(o, newdata)
+{
+  if(class(o) != "nakr") {
+    warning("Object is not of class 'nakr'")
+    UseMethod("predict")
+    return(invisible(NULL))
+  }
+  newdata <- as.matrix(newdata)
+  if(ncol(o$X)!=ncol(newdata)) {
+    stop("Not the same number of variables btwn fitted nakr object and new data")
+  }
+  newdata <- scale(newdata,center=attr(o$X,"scaled:center"),
+                   scale=attr(o$X,"scaled:scale"))
+  Ktest <- gausskernel(newdata, o$X, o$sigma2)
+  yh <- Ktest %*% o$coef
+  yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center")
+}