diff --git a/19_nystroem_approximation.Rmd b/19_nystroem_approximation.Rmd index 09b385f..e12d06c 100644 --- a/19_nystroem_approximation.Rmd +++ b/19_nystroem_approximation.Rmd @@ -3,7 +3,7 @@ ```{r, include=FALSE} source("01_intro_code.R", local = knitr::knit_global()) source("04_validation_croisee_code.R", local = knitr::knit_global()) -source("18_kernel_ridge_regression_code.R", local = knitr::knit_global()) +source("19_nystroem_approximation_code.R", local = knitr::knit_global()) ``` ## Approximation de Nyström d'un noyau @@ -182,4 +182,30 @@ $$ = \{& \text{Algèbre linéaire : si $\mathbf{A}$ et $\mathbf{B}$ sont des matrices carrés inversibles alors $(\mathbf{A}\mathbf{B})^{-1} = \mathbf{B}^{-1}\mathbf{A}^{-1}$} \} \\ & \lambda^{-1}\mathbf{I_n} - \lambda^{-1}\mathbf{L}\left(\lambda\mathbf{I_m}+\mathbf{L}^T\mathbf{L}\right)^{-1}\mathbf{L}^T \\ \end{aligned} -$$ \ No newline at end of file +$$ + +## Exemple sur un jeu de données synthétique + +Nous reprenons le jeu de données synthétique utilisé depuis le premier module pour tester l'implémentation de la régression ridge à noyau gaussien. + +```{r} + set.seed(1123) + n <- 100 + data = gendat(n,0.2) + splitres <- splitdata(data,0.8) + entr <- splitres$entr + test <- splitres$test + + nakrm <- nakr(entr$X,entr$Y, nspl=25) + yh <- predict(nakrm,test$X) + plt(test,f) + points(test$X, yh, pch=4) + testmae <- mean(abs(yh-test$Y)) +``` + +Ce modèle atteint une erreur absolue moyenne de `r testmae` sur le jeu de test. + +## Annexe code source + +```{r, code=readLines("19_nystroem_approximation_code.R"), eval=FALSE} +``` diff --git a/19_nystroem_approximation_code.R b/19_nystroem_approximation_code.R new file mode 100644 index 0000000..8c524af --- /dev/null +++ b/19_nystroem_approximation_code.R @@ -0,0 +1,93 @@ +# compute the gaussian kernel between each row of X1 and each row of X2 +# should be done more efficiently +gausskernel <- +function(X1, X2, sigma2) +{ + n1 <- dim(X1)[1] + n2 <- dim(X2)[1] + K <- matrix(nrow = n1, ncol = n2) + for(i in 1:n1) + for(j in 1:n2) + K[i,j] <- sum(X1[i,] - X2[j,])^2 + K <- exp(-1*K/sigma2) +} + +# Nystroem Approximation Kernel Ridge Regression +nakr <- +function(X, y, sigma2=NULL, lambdas=NULL, splidx=NULL, nspl=NULL) +{ + X <- as.matrix(X) + n <- nrow(X) + p <- ncol(X) + + if(is.null(lambdas)) { lambdas <- 10^seq(-8, 2,by=0.5) } + if(is.null(sigma2)) { sigma2 <- p } + + if(is.null(splidx)) { + if(is.null(nspl)) { nspl <- round(sqrt(n)) } + splidx <- sample(1:n, nspl, replace = FALSE) + } else { + nspl <- length(splidx) + } + splidx <- sort(splidx) + + X <- scale(X) + y <- scale(y) + + C <- gausskernel(X, as.matrix(X[splidx,]), sigma2) + K11 <- C[splidx,] + + svdK11 <- svd(K11) + # K11 will often be ill-formed, thus we drop the bottom singular values + k <- 0.8 * nspl + US <- svdK11$u[,1:k] %*% diag(1 / sqrt(svdK11$d[1:k])) + L <- C %*% US + LtL <- t(L) %*% L + + looe <- double(length(lambdas)) + coef <- matrix(data = NA, nrow = n, ncol = length(lambdas)) + i <- 1 + for(lambda in lambdas) { + Ginv <- LtL + diag(Ginv) <- diag(Ginv) + lambda + Ginv <- solve(Ginv) + Ginv <- L %*% Ginv %*% t(L) + Ginv <- - Ginv / lambda + diag(Ginv) <- diag(Ginv) + (1/lambda) + coef[,i] <- Ginv %*% y + looe[i] <- mean((coef[,i]/diag(Ginv))^2) + i <- i+1 + } + looe.min <- min(looe) + lambda <- lambdas[which(looe == looe.min)] + coef <- coef[,which(looe == looe.min)] + + r <- list(X=X, + y=y, + sigma2=sigma2, + coef=coef, + looe=looe.min, + lambda=lambda + ) + class(r) <- "nakr" + return(r) +} + +predict.nakr <- +function(o, newdata) +{ + if(class(o) != "nakr") { + warning("Object is not of class 'nakr'") + UseMethod("predict") + return(invisible(NULL)) + } + newdata <- as.matrix(newdata) + if(ncol(o$X)!=ncol(newdata)) { + stop("Not the same number of variables btwn fitted nakr object and new data") + } + newdata <- scale(newdata,center=attr(o$X,"scaled:center"), + scale=attr(o$X,"scaled:scale")) + Ktest <- gausskernel(newdata, o$X, o$sigma2) + yh <- Ktest %*% o$coef + yh <- (yh * attr(o$y,"scaled:scale")) + attr(o$y,"scaled:center") +}