From 652301c761222994801ab112ab2cce9288dfb6c8 Mon Sep 17 00:00:00 2001 From: Pierre-Edouard Portier Date: Fri, 30 Dec 2022 15:45:04 +0100 Subject: [PATCH] code for rounding a list of numbers while preserving the sum --- 05_c_svd_ca.R | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 05_c_svd_ca_code.R | 16 ++++++++ 2 files changed, 107 insertions(+) create mode 100644 05_c_svd_ca.R create mode 100644 05_c_svd_ca_code.R diff --git a/05_c_svd_ca.R b/05_c_svd_ca.R new file mode 100644 index 0000000..b023bf5 --- /dev/null +++ b/05_c_svd_ca.R @@ -0,0 +1,91 @@ +## ----------------------------------------------------------------------------- +set.seed(1123) + + +## ----------------------------------------------------------------------------- +#N <- matrix( c(1,2,1,3,2,5,1,7,3,6,2,8), +# nrow = 4, ncol = 3, +# dimnames = list( c("i1", "i2", "i3", "i4"), +# c("v1", "v2", "v3")) ) + +N <- matrix( c(4, 3, 6, 2, 2, 3, 0, 0, 2, 0, + 2, 4, 4, 0, 5, 3, 0, 2, 1, 1, + 4, 2, 5, 5, 0, 1, 0, 0, 1, 4, + 4, 2, 2, 1, 1, 0, 0,11, 0, 1, + 1, 1, 3, 3, 4, 0, 1, 1, 2, 6, + 2, 1, 1, 3, 1, 3, 4, 3, 4, 0, + 2, 0, 1, 3, 2, 0, 1,10, 0, 3, + 4, 3, 3, 1, 1, 2, 5, 1, 2, 0, + 1, 2, 0, 5, 3, 1, 3, 1, 0, 6), + nrow = 10, ncol = 9, + dimnames = list( + c("red", "orange", "yellow", "green", + "blue", "purple", "white", "black", + "pink", "brown"), + c("video", "jazz", "country", "rap", + "pop", "opera", "low F", "high F", + "middle F")) ) + +n <- sum(sum(N)) +print(N, digits=2) +print(n) + + +## ----------------------------------------------------------------------------- +P <- (1/n) * N +print(P, digits=2) + + +## ----------------------------------------------------------------------------- +r <- rowSums(P) +c <- colSums(P) +print(r, digits=2) +print(c, digits=2) + + +## ----------------------------------------------------------------------------- +Dr <- diag(r) +Dc <- diag(c) +print(Dr, digits=2) +print(Dc, digits=2) + + +## ----------------------------------------------------------------------------- +# Pour construire R ou C, il n'est pas nécessaire de construire Dr ou Dc. +# L'utilisation de la fonction sweep est plus efficace. +R <- sweep(P, MARGIN = 1, 1/r, '*') +R <- sweep(P, MARGIN = 1, 1/r, '*') +print(R, digits=2) +print(C, digits=2) + + +## ----------------------------------------------------------------------------- +# On vérifie que la somme des éléments d'une ligne de R ou C est égale à 1. +print(rowSums(R), digits=2) +print(rowSums(C), digits=2) + + +## ----------------------------------------------------------------------------- +scaleR <- 1/sqrt(r) +scaleC <- 1/sqrt(c) +S <- sweep(P - r %*% t(c), 1, scaleR, FUN = "*") +S <- sweep(S, 2, scaleC, FUN = "*") +dec <- svd(S) +Phi <- sweep(dec$u, 1, scaleR, FUN="*") +F <- sweep(Phi, 2, dec$d, FUN="*") +Gam <- sweep(dec$v, 1, scaleC, FUN="*") +G <- sweep(Gam, 2, dec$d, FUN="*") +plot(c(F[,1], G[,1]), c(F[,2], G[,2]), main = "x: d1, y: d2", type = "n", + xlab="", ylab="", asp = 1, xaxt = "n", yaxt = "n") +text(c(F[,1], G[,1]), c(F[,2], G[,2]), c(rownames(P), colnames(P)), + adj = 0, cex = 0.6) +points(0, 0, pch = 3) + + +## ----------------------------------------------------------------------------- +plot(c(F[,1], Gam[,1]), c(F[,2], Gam[,2]), main = "x: d1, y: d2", type = "n", + xlab="", ylab="", asp = 1, xaxt = "n", yaxt = "n") +text(c(F[,1], Gam[,1]), c(F[,2], Gam[,2]), c(rownames(P), colnames(P)), + adj = 0, cex = 0.6) +points(0, 0, pch = 3) + diff --git a/05_c_svd_ca_code.R b/05_c_svd_ca_code.R new file mode 100644 index 0000000..a7c29e8 --- /dev/null +++ b/05_c_svd_ca_code.R @@ -0,0 +1,16 @@ +# https://www.r-bloggers.com/2016/07/round-values-while-preserve-their-rounded-sum-in-r/ +# E.G. +# > sum(c(0.333, 0.333, 0.334)) +# [1] 1 +# > sum(round(c(0.333, 0.333, 0.334), 2)) +# [1] 0.99 +# > sum(round_preserve_sum(c(0.333, 0.333, 0.334), 2)) +# [1] 1.00 +round_preserve_sum <- function(x, digits = 0) { + up <- 10 ^ digits + x <- x * up + y <- floor(x) + indices <- tail(order(x-y), round(sum(x)) - sum(y)) + y[indices] <- y[indices] + 1 + y / up +} \ No newline at end of file