42 lines
1.0 KiB
R
42 lines
1.0 KiB
R
|
# `kcuts` cuts variable `x` into `centers` categories with k-means.
|
||
|
kcuts <- function(x, centers)
|
||
|
{
|
||
|
km <- kmeans(x = x, centers = centers)
|
||
|
cuts <- unlist(lapply(order(km$centers), function(clustId) {
|
||
|
min(x[km$cluster == clustId]) }))
|
||
|
cuts <- c(cuts, max(x))
|
||
|
}
|
||
|
|
||
|
# ventilate modality `mod` of categorical var `cat` of dataframe `dat`
|
||
|
# into the remaining modalities according to their relative frequencies.
|
||
|
ventilate <- function(cat, mod)
|
||
|
{
|
||
|
if (mod == "NA") isna <- TRUE else isna <- FALSE
|
||
|
tab <- table(cat)
|
||
|
if (isna)
|
||
|
{
|
||
|
sup_i <- which(is.na(cat))
|
||
|
sup_n <- length(sup_i)
|
||
|
act_mod <- tab
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
sup_i <- which(cat == mod)
|
||
|
sup_n <- tab[mod]
|
||
|
act_mod <- tab[! names(tab) %in% mod]
|
||
|
}
|
||
|
prob <- act_mod / sum(act_mod)
|
||
|
smpl <- sample(names(prob), size = sup_n, replace = TRUE, prob = prob)
|
||
|
r <- list(sup_mod = mod,
|
||
|
sup_n = sup_n,
|
||
|
sup_i = sup_i,
|
||
|
smpl = smpl)
|
||
|
return(r)
|
||
|
}
|
||
|
|
||
|
# Plot axes
|
||
|
plaxes <- function(a,b)
|
||
|
{
|
||
|
segments( min(a),0, max(a),0 )
|
||
|
segments( 0,min(b), 0,max(b) )
|
||
|
}
|