# `kcuts` cuts variable `x` into `centers` categories with k-means. kcuts <- function(x, centers) { km <- kmeans(x = x, centers = centers) cuts <- unlist(lapply(order(km$centers), function(clustId) { min(x[km$cluster == clustId]) })) cuts <- c(cuts, max(x)) } # ventilate modality `mod` of categorical var `cat` of dataframe `dat` # into the remaining modalities according to their relative frequencies. ventilate <- function(cat, mod) { if (mod == "NA") isna <- TRUE else isna <- FALSE tab <- table(cat) if (isna) { sup_i <- which(is.na(cat)) sup_n <- length(sup_i) act_mod <- tab } else { sup_i <- which(cat == mod) sup_n <- tab[mod] act_mod <- tab[! names(tab) %in% mod] } prob <- act_mod / sum(act_mod) smpl <- sample(names(prob), size = sup_n, replace = TRUE, prob = prob) r <- list(sup_mod = mod, sup_n = sup_n, sup_i = sup_i, smpl = smpl) return(r) }