intro_to_ml/05_svd_pca.R

60 lines
1.4 KiB
R

# 05 Application du SVD à l'analyse factorielle
fa <-
function(X, nbClst=100, nstart=25)
{
XStd <- scale(X)
clst <- kmeans(XStd, nbClst, nstart)
C <- scale(clst$centers)
n <- nrow(C)
Cs <- svd(C)
fact <- Cs$u %*% diag(Cs$d)
fact2 <- fact^2
ctr <- sweep(fact2, 2, colSums(fact2), "/")
cos2 <- sweep(fact2, 1, rowSums(fact2), "/")
cos2 <- round(cos2*100,2)
varctr <- ((Cs$v %*% diag(Cs$d))/sqrt(n-1))^2
rownames(varctr) <- colnames(X)
varctr <- round(varctr,2)
prctPrcp <- round((Cs$d^2 / sum(Cs$d^2))*100, 2)
r <- list(clst=clst, fact=fact, ctr=ctr, cos2=cos2, varctr=varctr,
prctPrcp=prctPrcp)
class(r) <- "fa"
return(r)
}
print.fa <-
function(o,d1=NULL,d2=NULL)
{
if(is.null(d1)) d1<-1
if(is.null(d2)) d2<-2
n <- dim(o$fact)[1]
d1BestCtr <- which(o$ctr[,d1] > 1/n)
d2BestCtr <- which(o$ctr[,d2] > 1/n)
d1d2BestCtr <- union(d1BestCtr,d2BestCtr)
plot(o$fact[d1d2BestCtr,d1], o$fact[d1d2BestCtr,d2], pch="",
xlab=paste("D",d1), ylab=paste("D",d2))
text(o$fact[d1d2BestCtr,d1], o$fact[d1d2BestCtr,d2], d1d2BestCtr, cex=0.8)
}
away <- function(x,...) UseMethod("away", x)
getCluster <- function(x,...) UseMethod("getCluster", x)
getCluster.fa <-
function(o,clstId)
{
size <- o$clst$size[clstId]
names <- names(o$clst$cluster[o$clst$cluster==clstId])
r <- list(id=clstId, size=size, names=names)
return(r)
}
away.fa <-
function(o,d=1)
{
id <- which.max(abs(o$fact[,d]))
return(getCluster(o,id))
}