intro_to_ml/05_c_svd_ca.R

92 lines
3.0 KiB
R

## -----------------------------------------------------------------------------
set.seed(1123)
## -----------------------------------------------------------------------------
#N <- matrix( c(1,2,1,3,2,5,1,7,3,6,2,8),
# nrow = 4, ncol = 3,
# dimnames = list( c("i1", "i2", "i3", "i4"),
# c("v1", "v2", "v3")) )
N <- matrix( c(4, 3, 6, 2, 2, 3, 0, 0, 2, 0,
2, 4, 4, 0, 5, 3, 0, 2, 1, 1,
4, 2, 5, 5, 0, 1, 0, 0, 1, 4,
4, 2, 2, 1, 1, 0, 0,11, 0, 1,
1, 1, 3, 3, 4, 0, 1, 1, 2, 6,
2, 1, 1, 3, 1, 3, 4, 3, 4, 0,
2, 0, 1, 3, 2, 0, 1,10, 0, 3,
4, 3, 3, 1, 1, 2, 5, 1, 2, 0,
1, 2, 0, 5, 3, 1, 3, 1, 0, 6),
nrow = 10, ncol = 9,
dimnames = list(
c("red", "orange", "yellow", "green",
"blue", "purple", "white", "black",
"pink", "brown"),
c("video", "jazz", "country", "rap",
"pop", "opera", "low F", "high F",
"middle F")) )
n <- sum(sum(N))
print(N, digits=2)
print(n)
## -----------------------------------------------------------------------------
P <- (1/n) * N
print(P, digits=2)
## -----------------------------------------------------------------------------
r <- rowSums(P)
c <- colSums(P)
print(r, digits=2)
print(c, digits=2)
## -----------------------------------------------------------------------------
Dr <- diag(r)
Dc <- diag(c)
print(Dr, digits=2)
print(Dc, digits=2)
## -----------------------------------------------------------------------------
# Pour construire R ou C, il n'est pas nécessaire de construire Dr ou Dc.
# L'utilisation de la fonction sweep est plus efficace.
R <- sweep(P, MARGIN = 1, 1/r, '*')
R <- sweep(P, MARGIN = 1, 1/r, '*')
print(R, digits=2)
print(C, digits=2)
## -----------------------------------------------------------------------------
# On vérifie que la somme des éléments d'une ligne de R ou C est égale à 1.
print(rowSums(R), digits=2)
print(rowSums(C), digits=2)
## -----------------------------------------------------------------------------
scaleR <- 1/sqrt(r)
scaleC <- 1/sqrt(c)
S <- sweep(P - r %*% t(c), 1, scaleR, FUN = "*")
S <- sweep(S, 2, scaleC, FUN = "*")
dec <- svd(S)
Phi <- sweep(dec$u, 1, scaleR, FUN="*")
F <- sweep(Phi, 2, dec$d, FUN="*")
Gam <- sweep(dec$v, 1, scaleC, FUN="*")
G <- sweep(Gam, 2, dec$d, FUN="*")
plot(c(F[,1], G[,1]), c(F[,2], G[,2]), main = "x: d1, y: d2", type = "n",
xlab="", ylab="", asp = 1, xaxt = "n", yaxt = "n")
text(c(F[,1], G[,1]), c(F[,2], G[,2]), c(rownames(P), colnames(P)),
adj = 0, cex = 0.6)
points(0, 0, pch = 3)
## -----------------------------------------------------------------------------
plot(c(F[,1], Gam[,1]), c(F[,2], Gam[,2]), main = "x: d1, y: d2", type = "n",
xlab="", ylab="", asp = 1, xaxt = "n", yaxt = "n")
text(c(F[,1], Gam[,1]), c(F[,2], Gam[,2]), c(rownames(P), colnames(P)),
adj = 0, cex = 0.6)
points(0, 0, pch = 3)