intro_to_ml/05_b_svd_pca_code.R

# 05 Application du SVD à l'analyse factorielle

fa <-
function(X, nbClst=100, nstart=25)
{
  XStd <- scale(X)
  clst <- kmeans(XStd, nbClst, nstart)
  C <- scale(clst$centers)
  n <- nrow(C)
  Cs <- svd(C)
  fact <- Cs$u %*% diag(Cs$d)
  fact2 <- fact^2
  ctr <- sweep(fact2, 2, colSums(fact2), "/")
  cos2 <- sweep(fact2, 1, rowSums(fact2), "/")
  cos2 <- round(cos2*100,2)
  varctr <- ((Cs$v %*% diag(Cs$d))/sqrt(n-1))^2
  rownames(varctr) <- colnames(X)
  varctr <- round(varctr,2)
  prctPrcp <- round((Cs$d^2 / sum(Cs$d^2))*100, 2)

  r <- list(clst=clst, fact=fact, ctr=ctr, cos2=cos2, varctr=varctr,
            prctPrcp=prctPrcp)
  class(r) <- "fa"
  return(r)
}

print.fa <-
function(o,d1=NULL,d2=NULL)
{
  if(is.null(d1)) d1<-1
  if(is.null(d2)) d2<-2
  n <- dim(o$fact)[1]
  d1BestCtr <- which(o$ctr[,d1] > 1/n)
  d2BestCtr <- which(o$ctr[,d2] > 1/n)
  d1d2BestCtr <- union(d1BestCtr,d2BestCtr)
  plot(o$fact[d1d2BestCtr,d1], o$fact[d1d2BestCtr,d2], pch="",
       xlab=paste("D",d1), ylab=paste("D",d2))
  text(o$fact[d1d2BestCtr,d1], o$fact[d1d2BestCtr,d2], d1d2BestCtr, cex=0.8)
}

away <- function(x,...) UseMethod("away", x)
getCluster <- function(x,...) UseMethod("getCluster", x)

getCluster.fa <-
function(o,clstId)
{
  size <- o$clst$size[clstId]
  names <- names(o$clst$cluster[o$clst$cluster==clstId])

  r <- list(id=clstId, size=size, names=names)
  return(r)
}

away.fa <-
function(o,d=1)
{
  id <- which.max(abs(o$fact[,d]))
  return(getCluster(o,id))
}