Some more AAC stuffs.

from Brad (maintainer)
This commit is contained in:
ajacoutot 2015-10-13 05:44:18 +00:00
parent 4230e621f7
commit a4127afac2
24 changed files with 3072 additions and 270 deletions

View File

@ -1,11 +1,11 @@
# $OpenBSD: Makefile,v 1.130 2015/10/03 19:39:47 sthen Exp $
# $OpenBSD: Makefile,v 1.131 2015/10/13 05:44:18 ajacoutot Exp $
COMMENT= audio/video converter and streamer
V= 20150909
DISTNAME= ffmpeg-git-${V}
PKGNAME= ffmpeg-${V}
REVISION= 6
REVISION= 7
CATEGORIES= graphics multimedia
MASTER_SITES= http://comstyle.com/source/
EXTRACT_SUFX= .tar.xz

View File

@ -0,0 +1,14 @@
$OpenBSD: patch-libavcodec_aac_h,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: Extensive improvements
--- libavcodec/aac.h.orig Mon Oct 12 21:55:54 2015
+++ libavcodec/aac.h Mon Oct 12 22:04:41 2015
@@ -252,6 +252,7 @@ typedef struct SingleChannelElement {
INTFLOAT sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
+ uint8_t can_pns[128]; ///< band is allowed to PNS (informative)
float is_ener[128]; ///< Intensity stereo pos (used by encoder)
float pns_ener[128]; ///< Noise energy values (used by encoder)
DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine

View File

@ -1,4 +1,4 @@
$OpenBSD: patch-libavcodec_aaccoder_c,v 1.3 2015/09/27 06:35:33 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aaccoder_c,v 1.4 2015/10/13 05:44:18 ajacoutot Exp $
aaccoder: use roundf() instead of ceilf()
@ -16,17 +16,29 @@ AAC encoder: tweak PNS usage to be more aggressive
AAC encoder: fix OOB access in search_for_pns
AAC encoder: Extensive improvements
aacenc: shorten name of ff_aac_adjust_common_prediction
--- libavcodec/aaccoder.c.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aaccoder.c Sat Sep 26 13:22:59 2015
@@ -48,18 +48,21 @@
+++ libavcodec/aaccoder.c Mon Oct 12 22:04:45 2015
@@ -33,7 +33,9 @@
#include "libavutil/libm.h" // brought forward to work around cygwin header breakage
#include <float.h>
+
#include "libavutil/mathematics.h"
+#include "mathops.h"
#include "avcodec.h"
#include "put_bits.h"
#include "aac.h"
@@ -48,18 +50,18 @@
#include "aacenc_tns.h"
#include "aacenc_pred.h"
+#include "libavcodec/aaccoder_twoloop.h"
+
/** Frequency in Hz for lower limit of noise substitution **/
-/** Frequency in Hz for lower limit of noise substitution **/
-#define NOISE_LOW_LIMIT 4500
+#define NOISE_LOW_LIMIT 4000
+#include "libavcodec/aaccoder_twoloop.h"
-/* Energy spread threshold value below which no PNS is used, this corresponds to
- * typically around 17Khz, after which PNS usage decays ending at 19Khz */
@ -48,7 +60,16 @@ AAC encoder: fix OOB access in search_for_pns
/**
* structure used in optimal codebook search
*/
@@ -182,138 +185,7 @@ static void encode_window_bands_info(AACEncContext *s,
@@ -121,7 +123,7 @@ static void encode_window_bands_info(AACEncContext *s,
rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
&s->scoefs[start + w*128], size,
sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
- lambda / band->threshold, INFINITY, NULL, 0);
+ lambda / band->threshold, INFINITY, NULL, NULL, 0);
}
cost_stay_here = path[swb][cb].cost + rd;
cost_get_here = minrd + rd + run_bits + 4;
@@ -182,138 +184,7 @@ static void encode_window_bands_info(AACEncContext *s,
}
}
@ -187,7 +208,7 @@ AAC encoder: fix OOB access in search_for_pns
typedef struct TrellisPath {
float cost;
int prev;
@@ -332,11 +204,11 @@ static void set_special_band_scalefactors(AACEncContex
@@ -332,11 +203,11 @@ static void set_special_band_scalefactors(AACEncContex
start = 0;
for (g = 0; g < sce->ics.num_swb; g++) {
if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
@ -201,7 +222,16 @@ AAC encoder: fix OOB access in search_for_pns
minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]);
bands++;
}
@@ -509,156 +381,7 @@ static void search_for_quantizers_anmr(AVCodecContext
@@ -463,7 +334,7 @@ static void search_for_quantizers_anmr(AVCodecContext
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
- q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
+ q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
}
minrd = FFMIN(minrd, dist);
@@ -509,156 +380,7 @@ static void search_for_quantizers_anmr(AVCodecContext
sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
}
@ -358,22 +388,56 @@ AAC encoder: fix OOB access in search_for_pns
static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
SingleChannelElement *sce,
const float lambda)
@@ -863,32 +586,78 @@ static void search_for_quantizers_fast(AVCodecContext
@@ -776,7 +498,7 @@ static void search_for_quantizers_faac(AVCodecContext
ESC_BT,
lambda,
INFINITY,
- &b,
+ &b, NULL,
0);
dist -= b;
}
@@ -863,92 +585,320 @@ static void search_for_quantizers_fast(AVCodecContext
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
{
- int start = 0, w, w2, g;
+ FFPsyBand *band;
+ int w, g, w2, i;
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth, cutoff;
+ float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
+ float *NOR34 = &s->scoefs[3*128];
const float lambda = s->lambda;
const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
- const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
- const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/120.f);
- const float thr_mult = NOISE_LAMBDA_NUMERATOR/lambda;
+ const float freq_mult = avctx->sample_rate*0.5f/wlen;
+ const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
+ const float spread_threshold = NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f);
+ const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+ const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
+ const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+ int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+
+ /** Keep this in sync with twoloop's cutoff selection */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+ memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- start = 0;
@ -386,44 +450,53 @@ AAC encoder: fix OOB access in search_for_pns
- energy += band->energy;
- threshold += band->threshold;
- spread += band->spread;
- }
+ int noise_sfi;
+ float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
+ float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+ float min_energy = -1.0f, max_energy = 0.0f;
+ const int start = wstart+sce->ics.swb_offset[g];
+ const float freq = (start-wstart)*freq_mult;
+ const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+ if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff)
+ continue;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ sfb_energy += band->energy;
+ spread = FFMIN(spread, band->spread);
+ threshold += band->threshold;
+ if (!w2) {
+ min_energy = max_energy = band->energy;
+ } else {
+ min_energy = FFMIN(min_energy, band->energy);
+ max_energy = FFMAX(max_energy, band->energy);
}
- if (spread > spread_threshold*sce->ics.group_len[w] &&
- ((sce->zeroes[w*16+g] && energy >= threshold) ||
- energy < threshold*thr_mult*sce->ics.group_len[w])) {
- sce->band_type[w*16+g] = NOISE_BT;
- sce->pns_ener[w*16+g] = energy / sce->ics.group_len[w];
- sce->zeroes[w*16+g] = 0;
- }
+ int noise_sfi;
+ float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
+ float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f;
+ const int start = wstart+sce->ics.swb_offset[g];
+ const float freq = (start-wstart)*freq_mult;
+ const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+ if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff)
+ continue;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ sfb_energy += band->energy;
+ spread += band->spread;
+ threshold += band->threshold;
}
- start += sce->ics.swb_sizes[g];
+ }
+
+ /* Ramps down at ~8000Hz and loosens the dist threshold */
+ dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 2.5f);
+ dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
+
+ /* zero and energy close to threshold usually means hole avoidance,
+ * we do want to remain avoiding holes with PNS
+ /* PNS is acceptable when all of these are true:
+ * 1. high spread energy (noise-like band)
+ * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+ * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+ *
+ * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
+ */
+ if (((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.5f/freq_boost)) || spread < spread_threshold ||
+ (sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost)) {
+ (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
+ min_energy < pns_transient_energy_r * max_energy ) {
+ sce->pns_ener[w*16+g] = sfb_energy;
+ continue;
+ }
+
+ pns_tgt_energy = sfb_energy*spread*spread/sce->ics.group_len[w];
+ pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
+ noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
+ noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
@ -444,16 +517,314 @@ AAC encoder: fix OOB access in search_for_pns
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[(w+w2)*16+g],
+ sce->band_alt[(w+w2)*16+g],
+ lambda/band->threshold, INFINITY, NULL, 0);
+ /* Estimate rd on average as 9 bits for CB and sf + spread energy * lambda/thr */
+ dist2 += 9+band->energy/(band->spread*band->spread)*lambda/band->threshold;
+ lambda/band->threshold, INFINITY, NULL, NULL, 0);
+ /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
+ dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
+ }
+ if (g && sce->sf_idx[(w+w2)*16+g-1] == NOISE_BT) {
+ dist2 += 5;
+ } else {
+ dist2 += 9;
+ }
+ energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
+ sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
+ if (energy_ratio > 0.85f && energy_ratio < 1.25f && (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || dist2*dist_thresh < dist1)) {
+ if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
+ sce->band_type[w*16+g] = NOISE_BT;
+ sce->zeroes[w*16+g] = 0;
+ }
+ }
+ }
+}
+
+static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
+{
+ FFPsyBand *band;
+ int w, g, w2;
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth, cutoff;
+ const float lambda = s->lambda;
+ const float freq_mult = avctx->sample_rate*0.5f/wlen;
+ const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+ const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+ int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+
+ /** Keep this in sync with twoloop's cutoff selection */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+ memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+ float min_energy = -1.0f, max_energy = 0.0f;
+ const int start = sce->ics.swb_offset[g];
+ const float freq = start*freq_mult;
+ const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+ if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
+ sce->can_pns[w*16+g] = 0;
+ continue;
+ }
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ sfb_energy += band->energy;
+ spread = FFMIN(spread, band->spread);
+ threshold += band->threshold;
+ if (!w2) {
+ min_energy = max_energy = band->energy;
+ } else {
+ min_energy = FFMIN(min_energy, band->energy);
+ max_energy = FFMAX(max_energy, band->energy);
}
}
- start += sce->ics.swb_sizes[g];
+
+ /* PNS is acceptable when all of these are true:
+ * 1. high spread energy (noise-like band)
+ * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+ * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+ */
+ sce->pns_ener[w*16+g] = sfb_energy;
+ if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
+ sce->can_pns[w*16+g] = 0;
+ } else {
+ sce->can_pns[w*16+g] = 1;
+ }
}
}
}
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
{
- int start = 0, i, w, w2, g;
+ int start = 0, i, w, w2, g, sid_sf_boost;
float M[128], S[128];
float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
const float lambda = s->lambda;
+ const float mslambda = FFMIN(1.0f, lambda / 120.f);
SingleChannelElement *sce0 = &cpe->ch[0];
SingleChannelElement *sce1 = &cpe->ch[1];
if (!cpe->common_window)
return;
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+ int min_sf_idx_mid = SCALE_MAX_POS;
+ int min_sf_idx_side = SCALE_MAX_POS;
+ for (g = 0; g < sce0->ics.num_swb; g++) {
+ if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
+ min_sf_idx_mid = FFMIN(min_sf_idx_mid, sce0->sf_idx[w*16+g]);
+ if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
+ min_sf_idx_side = FFMIN(min_sf_idx_side, sce1->sf_idx[w*16+g]);
+ }
+
start = 0;
for (g = 0; g < sce0->ics.num_swb; g++) {
+ float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
+ cpe->ms_mask[w*16+g] = 0;
if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
- float dist1 = 0.0f, dist2 = 0.0f;
+ float Mmax = 0.0f, Smax = 0.0f;
+
+ /* Must compute mid/side SF and book for the whole window group */
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
- FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
- FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
- float minthr = FFMIN(band0->threshold, band1->threshold);
- float maxthr = FFMAX(band0->threshold, band1->threshold);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+ sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
S[i] = M[i]
- sce1->coeffs[start+(w+w2)*128+i];
}
- abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
- abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
- dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
- L34,
- sce0->ics.swb_sizes[g],
- sce0->sf_idx[(w+w2)*16+g],
- sce0->band_type[(w+w2)*16+g],
- lambda / band0->threshold, INFINITY, NULL, 0);
- dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
- R34,
- sce1->ics.swb_sizes[g],
- sce1->sf_idx[(w+w2)*16+g],
- sce1->band_type[(w+w2)*16+g],
- lambda / band1->threshold, INFINITY, NULL, 0);
- dist2 += quantize_band_cost(s, M,
- M34,
- sce0->ics.swb_sizes[g],
- sce0->sf_idx[(w+w2)*16+g],
- sce0->band_type[(w+w2)*16+g],
- lambda / maxthr, INFINITY, NULL, 0);
- dist2 += quantize_band_cost(s, S,
- S34,
- sce1->ics.swb_sizes[g],
- sce1->sf_idx[(w+w2)*16+g],
- sce1->band_type[(w+w2)*16+g],
- lambda / minthr, INFINITY, NULL, 0);
+ abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+ for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
+ Mmax = FFMAX(Mmax, M34[i]);
+ Smax = FFMAX(Smax, S34[i]);
+ }
}
- cpe->ms_mask[w*16+g] = dist2 < dist1;
+
+ for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
+ float dist1 = 0.0f, dist2 = 0.0f;
+ int B0 = 0, B1 = 0;
+ int minidx;
+ int mididx, sididx;
+ int midcb, sidcb;
+
+ minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
+ mididx = av_clip(minidx, min_sf_idx_mid, min_sf_idx_mid + SCALE_MAX_DIFF);
+ sididx = av_clip(minidx - sid_sf_boost * 3, min_sf_idx_side, min_sf_idx_side + SCALE_MAX_DIFF);
+ midcb = find_min_book(Mmax, mididx);
+ sidcb = find_min_book(Smax, sididx);
+
+ if ((mididx > minidx) || (sididx > minidx)) {
+ /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
+ continue;
+ }
+
+ /* No CB can be zero */
+ midcb = FFMAX(1,midcb);
+ sidcb = FFMAX(1,sidcb);
+
+ for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+ FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+ FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+ float minthr = FFMIN(band0->threshold, band1->threshold);
+ int b1,b2,b3,b4;
+ for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+ M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+ + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
+ S[i] = M[i]
+ - sce1->coeffs[start+(w+w2)*128+i];
+ }
+
+ abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+ abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+ dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
+ L34,
+ sce0->ics.swb_sizes[g],
+ sce0->sf_idx[(w+w2)*16+g],
+ sce0->band_type[(w+w2)*16+g],
+ lambda / band0->threshold, INFINITY, &b1, NULL, 0);
+ dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
+ R34,
+ sce1->ics.swb_sizes[g],
+ sce1->sf_idx[(w+w2)*16+g],
+ sce1->band_type[(w+w2)*16+g],
+ lambda / band1->threshold, INFINITY, &b2, NULL, 0);
+ dist2 += quantize_band_cost(s, M,
+ M34,
+ sce0->ics.swb_sizes[g],
+ sce0->sf_idx[(w+w2)*16+g],
+ sce0->band_type[(w+w2)*16+g],
+ lambda / minthr, INFINITY, &b3, NULL, 0);
+ dist2 += quantize_band_cost(s, S,
+ S34,
+ sce1->ics.swb_sizes[g],
+ sce1->sf_idx[(w+w2)*16+g],
+ sce1->band_type[(w+w2)*16+g],
+ mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
+ B0 += b1+b2;
+ B1 += b3+b4;
+ dist1 -= B0;
+ dist2 -= B1;
+ }
+ cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
+ if (cpe->ms_mask[w*16+g]) {
+ /* Setting the M/S mask is useful with I/S, but only the flag */
+ if (!cpe->is_mask[w*16+g]) {
+ sce0->sf_idx[w*16+g] = mididx;
+ sce1->sf_idx[w*16+g] = sididx;
+ sce0->band_type[w*16+g] = midcb;
+ sce1->band_type[w*16+g] = sidcb;
+ }
+ break;
+ } else if (B1 > B0) {
+ /* More boost won't fix this */
+ break;
+ }
+ }
}
start += sce0->ics.swb_sizes[g];
}
@@ -962,11 +912,12 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
quantize_and_encode_band,
ff_aac_encode_tns_info,
ff_aac_encode_main_pred,
- ff_aac_adjust_common_prediction,
+ ff_aac_adjust_common_pred,
ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -978,11 +929,12 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
quantize_and_encode_band,
ff_aac_encode_tns_info,
ff_aac_encode_main_pred,
- ff_aac_adjust_common_prediction,
+ ff_aac_adjust_common_pred,
ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -994,11 +946,12 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
quantize_and_encode_band,
ff_aac_encode_tns_info,
ff_aac_encode_main_pred,
- ff_aac_adjust_common_prediction,
+ ff_aac_adjust_common_pred,
ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,
@@ -1010,11 +963,12 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
quantize_and_encode_band,
ff_aac_encode_tns_info,
ff_aac_encode_main_pred,
- ff_aac_adjust_common_prediction,
+ ff_aac_adjust_common_pred,
ff_aac_apply_main_pred,
ff_aac_apply_tns,
set_special_band_scalefactors,
search_for_pns,
+ mark_pns,
ff_aac_search_for_tns,
search_for_ms,
ff_aac_search_for_is,

View File

@ -1,9 +1,11 @@
$OpenBSD: patch-libavcodec_aaccoder_trellis_h,v 1.1 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aaccoder_trellis_h,v 1.2 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: refactor to resynchronize MIPS port
--- libavcodec/aaccoder_trellis.h.orig Wed Sep 23 03:49:27 2015
+++ libavcodec/aaccoder_trellis.h Wed Sep 23 03:49:43 2015
AAC encoder: Extensive improvements
--- libavcodec/aaccoder_trellis.h.orig Mon Oct 12 21:54:16 2015
+++ libavcodec/aaccoder_trellis.h Mon Oct 12 22:04:49 2015
@@ -0,0 +1,194 @@
+/*
+ * AAC encoder trellis codebook selector
@ -136,7 +138,7 @@ AAC encoder: refactor to resynchronize MIPS port
+ &s->scoefs[start + w*128], size,
+ sce->sf_idx[win*16+swb],
+ aac_cb_out_map[cb],
+ 0, INFINITY, NULL, 0);
+ 0, INFINITY, NULL, NULL, 0);
+ }
+ cost_stay_here = path[swb][cb].cost + bits;
+ cost_get_here = minbits + bits + run_bits + 4;

View File

@ -1,10 +1,12 @@
$OpenBSD: patch-libavcodec_aaccoder_twoloop_h,v 1.1 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aaccoder_twoloop_h,v 1.2 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: refactor to resynchronize MIPS port
--- libavcodec/aaccoder_twoloop.h.orig Wed Sep 23 03:49:33 2015
+++ libavcodec/aaccoder_twoloop.h Wed Sep 23 03:49:47 2015
@@ -0,0 +1,203 @@
AAC encoder: Extensive improvements
--- libavcodec/aaccoder_twoloop.h.orig Mon Oct 12 21:54:16 2015
+++ libavcodec/aaccoder_twoloop.h Mon Oct 12 22:07:00 2015
@@ -0,0 +1,703 @@
+/*
+ * AAC encoder twoloop coder
+ * Copyright (C) 2008-2009 Konstantin Shishkov
@ -29,7 +31,7 @@ AAC encoder: refactor to resynchronize MIPS port
+/**
+ * @file
+ * AAC encoder twoloop coder
+ * @author Konstantin Shishkov
+ * @author Konstantin Shishkov, Claudio Freire
+ */
+
+/**
@ -41,6 +43,7 @@ AAC encoder: refactor to resynchronize MIPS port
+ * - abs_pow34_v
+ * - find_max_val
+ * - find_min_book
+ * - find_form_factor
+ */
+
+#ifndef AVCODEC_AACCODER_TWOLOOP_H
@ -48,6 +51,7 @@ AAC encoder: refactor to resynchronize MIPS port
+
+#include <float.h>
+#include "libavutil/mathematics.h"
+#include "mathops.h"
+#include "avcodec.h"
+#include "put_bits.h"
+#include "aac.h"
@ -56,6 +60,16 @@ AAC encoder: refactor to resynchronize MIPS port
+#include "aacenctab.h"
+#include "aac_tablegen_decl.h"
+
+/** Frequency in Hz for lower limit of noise substitution **/
+#define NOISE_LOW_LIMIT 4000
+
+#define sclip(x) av_clip(x,60,218)
+
+/* Reflects the cost to change codebooks */
+static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
+{
+ return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
+}
+
+/**
+ * two-loop quantizers search taken from ISO 13818-7 Appendix C
@ -65,54 +79,224 @@ AAC encoder: refactor to resynchronize MIPS port
+ SingleChannelElement *sce,
+ const float lambda)
+{
+ int start = 0, i, w, w2, g;
+ int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
+ float dists[128] = { 0 }, uplims[128] = { 0 };
+ float maxvals[128];
+ int fflag, minscaler;
+ int its = 0;
+ int allz = 0;
+ float minthr = INFINITY;
+ int start = 0, i, w, w2, g, recomprd;
+ int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+ / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+ * (lambda / 120.f);
+ int refbits = destbits;
+ int toomanybits, toofewbits;
+ char nzs[128];
+ int maxsf[128];
+ float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
+ float maxvals[128], spread_thr_r[128];
+ float min_spread_thr_r, max_spread_thr_r;
+
+ // for values above this the decoder might end up in an endless loop
+ // due to always having more bits than what can be encoded.
+ /**
+ * rdlambda controls the maximum tolerated distortion. Twoloop
+ * will keep iterating until it fails to lower it or it reaches
+ * ulimit * rdlambda. Keeping it low increases quality on difficult
+ * signals, but lower it too much, and bits will be taken from weak
+ * signals, creating "holes". A balance is necesary.
+ * rdmax and rdmin specify the relative deviation from rdlambda
+ * allowed for tonality compensation
+ */
+ float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
+ const float nzslope = 1.5f;
+ float rdmin = 0.03125f;
+ float rdmax = 1.0f;
+
+ /**
+ * sfoffs controls an offset of optmium allocation that will be
+ * applied based on lambda. Keep it real and modest, the loop
+ * will take care of the rest, this just accelerates convergence
+ */
+ float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
+
+ int fflag, minscaler, maxscaler, nminscaler, minrdsf;
+ int its = 0;
+ int maxits = 30;
+ int allz = 0;
+ int tbits;
+ int cutoff = 1024;
+ int pns_start_pos;
+ int prev;
+
+ /**
+ * zeroscale controls a multiplier of the threshold, if band energy
+ * is below this, a zero is forced. Keep it lower than 1, unless
+ * low lambda is used, because energy < threshold doesn't mean there's
+ * no audible signal outright, it's just energy. Also make it rise
+ * slower than rdlambda, as rdscale has due compensation with
+ * noisy band depriorization below, whereas zeroing logic is rather dumb
+ */
+ float zeroscale;
+ if (lambda > 120.f) {
+ zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
+ } else {
+ zeroscale = 1.f;
+ }
+
+ if (s->psy.bitres.alloc >= 0) {
+ /**
+ * Psy granted us extra bits to use, from the reservoire
+ * adjust for lambda except what psy already did
+ */
+ destbits = s->psy.bitres.alloc
+ * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
+ }
+
+ if (avctx->flags & CODEC_FLAG_QSCALE) {
+ /**
+ * Constant Q-scale doesn't compensate MS coding on its own
+ * No need to be overly precise, this only controls RD
+ * adjustment CB limits when going overboard
+ */
+ if (s->options.mid_side && s->cur_type == TYPE_CPE)
+ destbits *= 2;
+
+ /**
+ * When using a constant Q-scale, don't adjust bits, just use RD
+ * Don't let it go overboard, though... 8x psy target is enough
+ */
+ toomanybits = 5800;
+ toofewbits = destbits / 16;
+
+ /** Don't offset scalers, just RD */
+ sfoffs = sce->ics.num_windows - 1;
+ rdlambda = sqrtf(rdlambda);
+
+ /** search further */
+ maxits *= 2;
+ } else {
+ /** When using ABR, be strict */
+ toomanybits = destbits + destbits/16;
+ toofewbits = destbits - destbits/4;
+
+ sfoffs = 0;
+ rdlambda = sqrtf(rdlambda);
+ }
+
+ /** and zero out above cutoff frequency */
+ {
+ int wlen = 1024 / sce->ics.num_windows;
+ int bandwidth;
+
+ /**
+ * Scale, psy gives us constant quality, this LP only scales
+ * bitrate by lambda, so we save bits on subjectively unimportant HF
+ * rather than increase quantization noise. Adjust nominal bitrate
+ * to effective bitrate according to encoding parameters,
+ * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
+ */
+ float rate_bandwidth_multiplier = 1.5f;
+ int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+ ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+ : (avctx->bit_rate / avctx->channels);
+
+ /** Compensate for extensions that increase efficiency */
+ if (s->options.pns || s->options.intensity_stereo)
+ frame_bit_rate *= 1.15f;
+
+ if (avctx->cutoff > 0) {
+ bandwidth = avctx->cutoff;
+ } else {
+ bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+ }
+
+ cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+ pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
+ }
+
+ /**
+ * for values above this the decoder might end up in an endless loop
+ * due to always having more bits than what can be encoded.
+ */
+ destbits = FFMIN(destbits, 5800);
+ //XXX: some heuristic to determine initial quantizers will reduce search time
+ //determine zero bands and upper limits
+ toomanybits = FFMIN(toomanybits, 5800);
+ toofewbits = FFMIN(toofewbits, 5800);
+ /**
+ * XXX: some heuristic to determine initial quantizers will reduce search time
+ * determine zero bands and upper distortion limits
+ */
+ min_spread_thr_r = -1;
+ max_spread_thr_r = -1;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ int nz = 0;
+ float uplim = 0.0f, energy = 0.0f;
+ float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ uplim += band->threshold;
+ energy += band->energy;
+ if (band->energy <= band->threshold || band->threshold == 0.0f) {
+ if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
+ sce->zeroes[(w+w2)*16+g] = 1;
+ continue;
+ }
+ nz = 1;
+ }
+ uplims[w*16+g] = uplim *512;
+ if (!nz) {
+ uplim = 0.0f;
+ } else {
+ nz = 0;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+ if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
+ continue;
+ uplim += band->threshold;
+ energy += band->energy;
+ spread += band->spread;
+ nz++;
+ }
+ }
+ uplims[w*16+g] = uplim;
+ energies[w*16+g] = energy;
+ nzs[w*16+g] = nz;
+ sce->zeroes[w*16+g] = !nz;
+ if (nz)
+ minthr = FFMIN(minthr, uplim);
+ allz |= nz;
+ if (nz) {
+ spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
+ if (min_spread_thr_r < 0) {
+ min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
+ } else {
+ min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
+ max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
+ }
+ }
+ }
+ }
+
+ /** Compute initial scalers */
+ minscaler = 65535;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ if (sce->zeroes[w*16+g]) {
+ sce->sf_idx[w*16+g] = SCALE_ONE_POS;
+ continue;
+ }
+ sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
+ /**
+ * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
+ * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
+ * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
+ * more robust.
+ */
+ sce->sf_idx[w*16+g] = av_clip(
+ SCALE_ONE_POS
+ + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
+ + sfoffs,
+ 60, SCALE_MAX_POS);
+ minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+ }
+ }
+
+ /** Clip */
+ minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
+ for (g = 0; g < sce->ics.num_swb; g++)
+ if (!sce->zeroes[w*16+g])
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
+
+ if (!allz)
+ return;
+ abs_pow34_v(s->scoefs, sce->coeffs, 1024);
+ ff_quantize_band_cost_cache_init(s);
+
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ start = w*128;
@ -123,15 +307,66 @@ AAC encoder: refactor to resynchronize MIPS port
+ }
+ }
+
+ /**
+ * Scale uplims to match rate distortion to quality
+ * bu applying noisy band depriorization and tonal band priorization.
+ * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
+ * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
+ * rate distortion requirements.
+ */
+ memcpy(euplims, uplims, sizeof(euplims));
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ /** psy already priorizes transients to some extent */
+ float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
+ start = w*128;
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ if (nzs[g] > 0) {
+ float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
+ float energy2uplim = find_form_factor(
+ sce->ics.group_len[w], sce->ics.swb_sizes[g],
+ uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
+ sce->coeffs + start,
+ nzslope * cleanup_factor);
+ energy2uplim *= de_psy_factor;
+ if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+ /** In ABR, we need to priorize less and let rate control do its thing */
+ energy2uplim = sqrtf(energy2uplim);
+ }
+ energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
+ uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
+ * sce->ics.group_len[w];
+
+ energy2uplim = find_form_factor(
+ sce->ics.group_len[w], sce->ics.swb_sizes[g],
+ uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
+ sce->coeffs + start,
+ 2.0f);
+ energy2uplim *= de_psy_factor;
+ if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+ /** In ABR, we need to priorize less and let rate control do its thing */
+ energy2uplim = sqrtf(energy2uplim);
+ }
+ energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
+ euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
+ 0.5f, 1.0f);
+ }
+ start += sce->ics.swb_sizes[g];
+ }
+ }
+
+ for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
+ maxsf[i] = SCALE_MAX_POS;
+
+ //perform two-loop search
+ //outer loop - improve quality
+ do {
+ int tbits, qstep;
+ minscaler = sce->sf_idx[0];
+ //inner loop - quantize spectrum to fit into given number of bits
+ qstep = its ? 1 : 32;
+ int overdist;
+ int qstep = its ? 1 : 32;
+ do {
+ int prev = -1;
+ int changed = 0;
+ prev = -1;
+ recomprd = 0;
+ tbits = 0;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ start = w*128;
@ -141,70 +376,337 @@ AAC encoder: refactor to resynchronize MIPS port
+ int bits = 0;
+ int cb;
+ float dist = 0.0f;
+ float qenergy = 0.0f;
+
+ if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+ start += sce->ics.swb_sizes[g];
+ if (sce->can_pns[w*16+g]) {
+ /** PNS isn't free */
+ tbits += ff_pns_bits(sce, w, g);
+ }
+ continue;
+ }
+ minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ dist += quantize_band_cost(s, coefs + w2*128,
+ float sqenergy;
+ dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g],
+ cb,
+ 1.0f,
+ INFINITY,
+ &b,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ dists[w*16+g] = dist - bits;
+ qenergies[w*16+g] = qenergy;
+ if (prev != -1) {
+ bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
+ int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
+ bits += ff_aac_scalefactor_bits[sfdiff];
+ }
+ tbits += bits;
+ start += sce->ics.swb_sizes[g];
+ prev = sce->sf_idx[w*16+g];
+ }
+ }
+ if (tbits > destbits) {
+ for (i = 0; i < 128; i++)
+ if (sce->sf_idx[i] < 218 - qstep)
+ sce->sf_idx[i] += qstep;
+ } else {
+ for (i = 0; i < 128; i++)
+ if (sce->sf_idx[i] > 60 - qstep)
+ sce->sf_idx[i] -= qstep;
+ if (tbits > toomanybits) {
+ recomprd = 1;
+ for (i = 0; i < 128; i++) {
+ if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
+ int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
+ int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
+ if (new_sf != sce->sf_idx[i]) {
+ sce->sf_idx[i] = new_sf;
+ changed = 1;
+ }
+ }
+ }
+ } else if (tbits < toofewbits) {
+ recomprd = 1;
+ for (i = 0; i < 128; i++) {
+ if (sce->sf_idx[i] > SCALE_ONE_POS) {
+ int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
+ if (new_sf != sce->sf_idx[i]) {
+ sce->sf_idx[i] = new_sf;
+ changed = 1;
+ }
+ }
+ }
+ }
+ qstep >>= 1;
+ if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
+ if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
+ qstep = 1;
+ } while (qstep);
+
+ fflag = 0;
+ minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
+ overdist = 1;
+ for (i = 0; i < 2 && (overdist || recomprd); ++i) {
+ if (recomprd) {
+ /** Must recompute distortion */
+ prev = -1;
+ tbits = 0;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ start = w*128;
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ const float *coefs = sce->coeffs + start;
+ const float *scaled = s->scoefs + start;
+ int bits = 0;
+ int cb;
+ float dist = 0.0f;
+ float qenergy = 0.0f;
+
+ if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+ start += sce->ics.swb_sizes[g];
+ if (sce->can_pns[w*16+g]) {
+ /** PNS isn't free */
+ tbits += ff_pns_bits(sce, w, g);
+ }
+ continue;
+ }
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ float sqenergy;
+ dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g],
+ cb,
+ 1.0f,
+ INFINITY,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ dists[w*16+g] = dist - bits;
+ qenergies[w*16+g] = qenergy;
+ if (prev != -1) {
+ int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
+ bits += ff_aac_scalefactor_bits[sfdiff];
+ }
+ tbits += bits;
+ start += sce->ics.swb_sizes[g];
+ prev = sce->sf_idx[w*16+g];
+ }
+ }
+ }
+ if (!i && s->options.pns && its > maxits/2) {
+ float maxoverdist = 0.0f;
+ overdist = recomprd = 0;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ float ovrfactor = 2.f+(maxits-its)*16.f/maxits;
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
+ float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
+ maxoverdist = FFMAX(maxoverdist, ovrdist);
+ overdist++;
+ }
+ }
+ }
+ if (overdist) {
+ /* We have overdistorted bands, trade for zeroes (that can be noise)
+ * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
+ */
+ float minspread = max_spread_thr_r;
+ float maxspread = min_spread_thr_r;
+ float zspread;
+ int zeroable = 0;
+ int zeroed = 0;
+ int maxzeroed;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
+ minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
+ maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
+ zeroable++;
+ }
+ }
+ }
+ zspread = (maxspread-minspread) * 0.0125f + minspread;
+ zspread = FFMIN(maxoverdist, zspread);
+ maxzeroed = zeroable * its / (2 * maxits);
+ for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
+ if (sce->ics.swb_offset[g] < pns_start_pos)
+ continue;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) {
+ sce->zeroes[w*16+g] = 1;
+ sce->band_type[w*16+g] = 0;
+ zeroed++;
+ }
+ }
+ }
+ if (zeroed)
+ recomprd = 1;
+ } else {
+ overdist = 0;
+ }
+ }
+ }
+
+ minscaler = SCALE_MAX_POS;
+ maxscaler = 0;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ if (!sce->zeroes[w*16+g]) {
+ minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+ maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
+ }
+ }
+ }
+
+ fflag = 0;
+ minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
+ minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1);
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ /** Start with big steps, end up fine-tunning */
+ int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
+ int edepth = depth+2;
+ float uplmax = its / (maxits*0.25f) + 1.0f;
+ uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
+ start = w * 128;
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ int prevsc = sce->sf_idx[w*16+g];
+ if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
+ if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
+ sce->sf_idx[w*16+g]--;
+ else //Try to make sure there is some energy in every band
+ sce->sf_idx[w*16+g]-=2;
+ int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0);
+ if (!sce->zeroes[w*16+g]) {
+ const float *coefs = sce->coeffs + start;
+ const float *scaled = s->scoefs + start;
+ int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) {
+ /* Try to make sure there is some energy in every nonzero band
+ * NOTE: This algorithm must be forcibly imbalanced, pushing harder
+ * on holes or more distorted bands at first, otherwise there's
+ * no net gain (since the next iteration will offset all bands
+ * on the opposite direction to compensate for extra bits)
+ */
+ for (i = 0; i < edepth; ++i) {
+ int cb, bits;
+ float dist, qenergy;
+ int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ dist = qenergy = 0.f;
+ bits = 0;
+ if (!cb) {
+ maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
+ } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
+ break;
+ }
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ float sqenergy;
+ dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g]-1,
+ cb,
+ 1.0f,
+ INFINITY,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ sce->sf_idx[w*16+g]--;
+ dists[w*16+g] = dist - bits;
+ qenergies[w*16+g] = qenergy;
+ if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || (
+ (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
+ && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
+ ) )) {
+ break;
+ }
+ }
+ } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler
+ && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
+ && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
+ ) {
+ /** Um... over target. Save bits for more important stuff. */
+ for (i = 0; i < depth; ++i) {
+ int cb, bits;
+ float dist, qenergy;
+ cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
+ if (cb > 0) {
+ dist = qenergy = 0.f;
+ bits = 0;
+ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+ int b;
+ float sqenergy;
+ dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+ scaled + w2*128,
+ sce->ics.swb_sizes[g],
+ sce->sf_idx[w*16+g]+1,
+ cb,
+ 1.0f,
+ INFINITY,
+ &b, &sqenergy,
+ 0);
+ bits += b;
+ qenergy += sqenergy;
+ }
+ dist -= bits;
+ if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
+ sce->sf_idx[w*16+g]++;
+ dists[w*16+g] = dist;
+ qenergies[w*16+g] = qenergy;
+ } else {
+ break;
+ }
+ } else {
+ maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
+ break;
+ }
+ }
+ }
+ }
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
+ sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF);
+ sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512);
+ if (sce->sf_idx[w*16+g] != prevsc)
+ fflag = 1;
+ nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
+ sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ start += sce->ics.swb_sizes[g];
+ }
+ }
+ if (nminscaler < minscaler || sce->ics.num_windows > 1) {
+ /** SF difference limit violation risk. Must re-clamp. */
+ minscaler = nminscaler;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ for (g = 0; g < sce->ics.num_swb; g++) {
+ sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
+ sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ }
+ }
+ }
+ its++;
+ } while (fflag && its < 10);
+ } while (fflag && its < maxits);
+
+ prev = -1;
+ for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+ /** Make sure proper codebooks are set */
+ for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+ if (!sce->zeroes[w*16+g]) {
+ sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+ if (sce->band_type[w*16+g] <= 0) {
+ sce->zeroes[w*16+g] = 1;
+ sce->band_type[w*16+g] = 0;
+ }
+ } else {
+ sce->band_type[w*16+g] = 0;
+ }
+ /** Check that there's no SF delta range violations */
+ if (!sce->zeroes[w*16+g]) {
+ if (prev != -1) {
+ int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
+ av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
+ }
+ prev = sce->sf_idx[w*16+g];
+ }
+ }
+ }
+}
+
+#endif /* AVCODEC_AACCODER_TWOLOOP_H */

View File

@ -1,29 +1,135 @@
$OpenBSD: patch-libavcodec_aacenc_c,v 1.4 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenc_c,v 1.5 2015/10/13 05:44:18 ajacoutot Exp $
aacenc: copy PRNG from the decoder
avcodec/aacenc: use AV_OPT_TYPE_BOOL
AAC encoder: tweak rate-distortion logic
AAC encoder: Extensive improvements
AAC encoder: memoize quantize_band_cost
aacenc: add support for encoding 7.1 channel audio
aacenc: add support for changing options based on a profile
aacenc: shorten name of ff_aac_adjust_common_prediction
--- libavcodec/aacenc.c.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenc.c Wed Sep 23 03:54:56 2015
@@ -489,7 +489,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
+++ libavcodec/aacenc.c Mon Oct 12 22:07:21 2015
@@ -46,6 +46,44 @@
#include "psymodel.h"
+struct AACProfileOptions {
+ int profile;
+ struct AACEncOptions opts;
+};
+
+ /**
+ * List of currently supported profiles, anything not listed isn't supported.
+ */
+static const struct AACProfileOptions aacenc_profiles[] = {
+ {FF_PROFILE_AAC_MAIN,
+ { /* Main profile, all advanced encoding abilities enabled */
+ .mid_side = 0,
+ .pns = 1,
+ .tns = 0,
+ .pred = OPT_REQUIRED,
+ .intensity_stereo = 1,
+ },
+ },
+ {FF_PROFILE_AAC_LOW,
+ { /* Default profile, these are the settings that get set by default */
+ .mid_side = 0,
+ .pns = 1,
+ .tns = 0,
+ .pred = OPT_NEEDS_MAIN,
+ .intensity_stereo = 1,
+ },
+ },
+ {FF_PROFILE_MPEG2_AAC_LOW,
+ { /* Strict MPEG 2 Part 7 compliance profile */
+ .mid_side = 0,
+ .pns = OPT_BANNED,
+ .tns = 0,
+ .pred = OPT_BANNED,
+ .intensity_stereo = 1,
+ },
+ },
+};
+
/**
* Make AAC audio config object.
* @see 1.6.2.1 "Syntax - AudioSpecificConfig"
@@ -54,11 +92,12 @@ static void put_audio_specific_config(AVCodecContext *
{
PutBitContext pb;
AACEncContext *s = avctx->priv_data;
+ int channels = s->channels - (s->channels == 8 ? 1 : 0);
init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
put_bits(&pb, 5, s->profile+1); //profile
put_bits(&pb, 4, s->samplerate_index); //sample rate index
- put_bits(&pb, 4, s->channels);
+ put_bits(&pb, 4, channels);
//GASpecificConfig
put_bits(&pb, 1, 0); //frame length - 1024 samples
put_bits(&pb, 1, 0); //does not depend on core coder
@@ -71,6 +110,16 @@ static void put_audio_specific_config(AVCodecContext *
flush_put_bits(&pb);
}
+void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
+{
+ int sf, g;
+ for (sf = 0; sf < 256; sf++) {
+ for (g = 0; g < 128; g++) {
+ s->quantize_band_cost_cache[sf][g].bits = -1;
+ }
+ }
+}
+
#define WINDOW_FUNC(type) \
static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
SingleChannelElement *sce, \
@@ -258,6 +307,8 @@ static void apply_intensity_stereo(ChannelElement *cpe
start += ics->swb_sizes[g];
continue;
}
+ if (cpe->ms_mask[w*16 + g])
+ p *= -1;
for (i = 0; i < ics->swb_sizes[g]; i++) {
float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
cpe->ch[0].coeffs[start+i] = sum;
@@ -279,7 +330,7 @@ static void apply_mid_side_stereo(ChannelElement *cpe)
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
- if (!cpe->ms_mask[w*16 + g]) {
+ if (!cpe->ms_mask[w*16 + g] && !cpe->is_mask[w*16 + g]) {
start += ics->swb_sizes[g];
continue;
}
@@ -489,7 +540,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
float **samples = s->planar_samples, *samples2, *la, *overlap;
ChannelElement *cpe;
SingleChannelElement *sce;
- int i, ch, w, chans, tag, start_ch, ret;
+ int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
+ int target_bits, rate_bits, too_many_bits, too_few_bits;
int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
int chan_el_counter[4];
FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
@@ -581,14 +581,16 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
@@ -581,14 +633,14 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
}
if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
return ret;
+ frame_bits = its = 0;
do {
- int frame_bits;
+ int target_bits, too_many_bits, too_few_bits;
-
init_put_bits(&s->pb, avpkt->data, avpkt->size);
if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
@ -33,7 +139,7 @@ AAC encoder: tweak rate-distortion logic
memset(chan_el_counter, 0, sizeof(chan_el_counter));
for (i = 0; i < s->chan_map[0]; i++) {
FFPsyWindowInfo* wi = windows + start_ch;
@@ -611,7 +613,15 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
@@ -611,9 +663,20 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
if (sce->band_type[w] > RESERVED_BT)
sce->band_type[w] = 0;
}
@ -42,14 +148,46 @@ AAC encoder: tweak rate-distortion logic
s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
+ if (s->psy.bitres.alloc > 0) {
+ /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
+ target_bits += s->psy.bitres.alloc;
+ target_bits += s->psy.bitres.alloc
+ * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
+ s->psy.bitres.alloc /= chans;
+ }
+ s->cur_type = tag;
for (ch = 0; ch < chans; ch++) {
s->cur_channel = start_ch + ch;
+ if (s->options.pns && s->coder->mark_pns)
+ s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
@@ -692,36 +702,69 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
}
if (chans > 1
@@ -655,8 +718,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
s->coder->search_for_pred(s, sce);
if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
}
- if (s->coder->adjust_common_prediction)
- s->coder->adjust_common_prediction(s, cpe);
+ if (s->coder->adjust_common_pred)
+ s->coder->adjust_common_pred(s, cpe);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
@@ -665,13 +728,11 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
}
s->cur_channel = start_ch;
}
- if (s->options.stereo_mode) { /* Mid/Side stereo */
- if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
+ if (s->options.mid_side) { /* Mid/Side stereo */
+ if (s->options.mid_side == -1 && s->coder->search_for_ms)
s->coder->search_for_ms(s, cpe);
else if (cpe->common_window)
memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
- for (w = 0; w < 128; w++)
- cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
apply_mid_side_stereo(cpe);
}
adjust_frame_information(cpe, chans);
@@ -692,36 +753,73 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
start_ch += chans;
}
@ -72,23 +210,25 @@ AAC encoder: tweak rate-distortion logic
- s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
+ /* rate control stuff
+ * target either the nominal bitrate, or what psy's bit reservoir says to target
+ * whichever is greatest
+ * allow between the nominal bitrate, and what psy's bit reservoir says to target
+ * but drift towards the nominal bitrate always
+ */
+ frame_bits = put_bits_count(&s->pb);
+ target_bits = FFMAX(target_bits, avctx->bit_rate * 1024 / avctx->sample_rate);
+ target_bits = FFMIN(target_bits, 6144 * s->channels - 3);
+
+ rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
+ rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
+ too_many_bits = FFMAX(target_bits, rate_bits);
+ too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
+ too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
+ /* When using ABR, be strict (but only for increasing) */
+ too_many_bits = target_bits + target_bits/2;
+ too_few_bits = target_bits - target_bits/8;
+ too_few_bits = too_few_bits - too_few_bits/8;
+ too_many_bits = too_many_bits + too_many_bits/2;
+
+ if ( its == 0 /* for steady-state Q-scale tracking */
+ || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
+ || frame_bits >= 6144 * s->channels - 3 )
+ {
+ float ratio = ((float)target_bits) / frame_bits;
+ float ratio = ((float)rate_bits) / frame_bits;
+
+ if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
+ /*
@ -106,7 +246,7 @@ AAC encoder: tweak rate-distortion logic
+ s->lambda = FFMIN(s->lambda * ratio, 65536.f);
+
+ /* Keep iterating if we must reduce and lambda is in the sky */
+ if (s->lambda < 300.f || ratio > 0.9f) {
+ if ((s->lambda < 300.f || ratio > 0.9f) && (s->lambda > 10.f || ratio < 1.1f)) {
+ break;
+ } else {
+ if (is_mode || ms_mode || tns_mode || pred_mode) {
@ -128,6 +268,8 @@ AAC encoder: tweak rate-distortion logic
put_bits(&s->pb, 3, TYPE_END);
flush_put_bits(&s->pb);
avctx->frame_bits = put_bits_count(&s->pb);
+ s->lambda_sum += s->lambda;
+ s->lambda_count++;
- // rate control stuff
- if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
@ -139,11 +281,172 @@ AAC encoder: tweak rate-distortion logic
if (!frame)
s->last_frame++;
@@ -861,6 +904,7 @@ static av_cold int aac_encode_init(AVCodecContext *avc
@@ -737,6 +835,8 @@ static av_cold int aac_encode_end(AVCodecContext *avct
{
AACEncContext *s = avctx->priv_data;
+ av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
+
ff_mdct_end(&s->mdct1024);
ff_mdct_end(&s->mdct128);
ff_psy_end(&s->psy);
@@ -790,81 +890,88 @@ alloc_fail:
static av_cold int aac_encode_init(AVCodecContext *avctx)
{
AACEncContext *s = avctx->priv_data;
+ const AACEncOptions *p_opt = NULL;
int i, ret = 0;
const uint8_t *sizes[2];
uint8_t grouping[AAC_MAX_CHANNELS];
int lengths[2];
+ s->channels = avctx->channels;
+ s->chan_map = aac_chan_configs[s->channels-1];
+ s->random_state = 0x1f2e3d4c;
+ s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
+ avctx->extradata_size = 5;
avctx->frame_size = 1024;
+ avctx->initial_padding = 1024;
+ avctx->bit_rate = (int)FFMIN(
+ 6144 * s->channels / 1024.0 * avctx->sample_rate,
+ avctx->bit_rate);
+ avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
+ avctx->profile;
for (i = 0; i < 16; i++)
if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
break;
+ s->samplerate_index = i;
- s->channels = avctx->channels;
-
- ERROR_IF(i == 16 || i >= ff_aac_swb_size_1024_len || i >= ff_aac_swb_size_128_len,
+ ERROR_IF(s->samplerate_index == 16 ||
+ s->samplerate_index >= ff_aac_swb_size_1024_len ||
+ s->samplerate_index >= ff_aac_swb_size_128_len,
"Unsupported sample rate %d\n", avctx->sample_rate);
- ERROR_IF(s->channels > AAC_MAX_CHANNELS,
+ ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
"Unsupported number of channels: %d\n", s->channels);
WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
"Too many bits per frame requested, clamping to max\n");
- if (avctx->profile == FF_PROFILE_AAC_MAIN) {
- s->options.pred = 1;
- } else if ((avctx->profile == FF_PROFILE_AAC_LOW ||
- avctx->profile == FF_PROFILE_UNKNOWN) && s->options.pred) {
- s->profile = 0; /* Main */
- WARN_IF(1, "Prediction requested, changing profile to AAC-Main\n");
- } else if (avctx->profile == FF_PROFILE_AAC_LOW ||
- avctx->profile == FF_PROFILE_UNKNOWN) {
- s->profile = 1; /* Low */
- } else {
- ERROR_IF(1, "Unsupported profile %d\n", avctx->profile);
+
+ for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++) {
+ if (avctx->profile == aacenc_profiles[i].profile) {
+ p_opt = &aacenc_profiles[i].opts;
+ break;
+ }
}
+ ERROR_IF(!p_opt, "Unsupported encoding profile: %d\n", avctx->profile);
+ AAC_OPT_SET(&s->options, p_opt, 1, coder);
+ AAC_OPT_SET(&s->options, p_opt, 0, pns);
+ AAC_OPT_SET(&s->options, p_opt, 0, tns);
+ AAC_OPT_SET(&s->options, p_opt, 0, pred);
+ AAC_OPT_SET(&s->options, p_opt, 1, mid_side);
+ AAC_OPT_SET(&s->options, p_opt, 0, intensity_stereo);
+ if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW)
+ s->profile = FF_PROFILE_AAC_LOW;
+ else
+ s->profile = avctx->profile;
+ s->coder = &ff_aac_coders[s->options.coder];
- if (s->options.aac_coder != AAC_CODER_TWOLOOP) {
+ if (s->options.coder != AAC_CODER_TWOLOOP) {
s->options.intensity_stereo = 0;
s->options.pns = 0;
}
- avctx->bit_rate = (int)FFMIN(
- 6144 * s->channels / 1024.0 * avctx->sample_rate,
- avctx->bit_rate);
-
- s->samplerate_index = i;
-
- s->chan_map = aac_chan_configs[s->channels-1];
-
if ((ret = dsp_init(avctx, s)) < 0)
goto fail;
if ((ret = alloc_buffers(avctx, s)) < 0)
goto fail;
- avctx->extradata_size = 5;
put_audio_specific_config(avctx);
- sizes[0] = ff_aac_swb_size_1024[i];
- sizes[1] = ff_aac_swb_size_128[i];
- lengths[0] = ff_aac_num_swb_1024[i];
- lengths[1] = ff_aac_num_swb_128[i];
+ sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
+ sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
+ lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
+ lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
for (i = 0; i < s->chan_map[0]; i++)
grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
s->chan_map[0], grouping)) < 0)
goto fail;
s->psypp = ff_psy_preprocess_init(avctx);
- s->coder = &ff_aac_coders[s->options.aac_coder];
ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
if (HAVE_MIPSDSPR1)
ff_aac_coder_init_mips(s);
s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
+ s->random_state = 0x1f2e3d4c;
- s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
-
ff_aac_tableinit();
- avctx->initial_padding = 1024;
ff_af_queue_init(avctx, &s->afq);
return 0;
@@ -875,27 +982,16 @@ fail:
#define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
static const AVOption aacenc_options[] = {
- {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
- {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
- {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
- {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
- {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
- {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
- {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
- {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
- {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
- {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "aac_pns"},
- {"disable", "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
- {"enable", "Enable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
- {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
- {"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
- {"enable", "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
- {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_tns"},
- {"disable", "Disable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
- {"enable", "Enable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
- {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pred"},
- {"disable", "Disable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
- {"enable", "Enable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
+ {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, -1, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
+ {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+ {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+ {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+ {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+ {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
+ {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
+ {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
+ {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
+ {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = OPT_AUTO}, -1, 1, AACENC_FLAGS},
{NULL}
};

View File

@ -1,12 +1,75 @@
$OpenBSD: patch-libavcodec_aacenc_h,v 1.2 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenc_h,v 1.3 2015/10/13 05:44:18 ajacoutot Exp $
aacenc: copy PRNG from the decoder
AAC encoder: tweak rate-distortion logic
AAC encoder: Extensive improvements
AAC encoder: memoize quantize_band_cost
aacenc: add support for changing options based on a profile
aacenc: increase size of s->planar_samples[] from 6 to 8
aacenc: shorten name of ff_aac_adjust_common_prediction
--- libavcodec/aacenc.h.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenc.h Wed Sep 23 03:54:58 2015
@@ -96,9 +96,12 @@ typedef struct AACEncContext {
+++ libavcodec/aacenc.h Mon Oct 12 22:06:52 2015
@@ -42,11 +42,11 @@ typedef enum AACCoder {
}AACCoder;
typedef struct AACEncOptions {
- int stereo_mode;
- int aac_coder;
+ int coder;
int pns;
int tns;
int pred;
+ int mid_side;
int intensity_stereo;
} AACEncOptions;
@@ -61,11 +61,12 @@ typedef struct AACCoefficientsEncoder {
int scale_idx, int cb, const float lambda, int rtz);
void (*encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce);
void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
- void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
+ void (*adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe);
void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
+ void (*mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe);
void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
@@ -74,6 +75,15 @@ typedef struct AACCoefficientsEncoder {
extern AACCoefficientsEncoder ff_aac_coders[];
+typedef struct AACQuantizeBandCostCacheEntry {
+ float rd;
+ float energy;
+ int bits; ///< -1 means uninitialized entry
+ char cb;
+ char rtz;
+ char padding[2]; ///< Keeps the entry size a multiple of 32 bits
+} AACQuantizeBandCostCacheEntry;
+
/**
* AAC encoder context
*/
@@ -84,7 +94,7 @@ typedef struct AACEncContext {
FFTContext mdct1024; ///< long (1024 samples) frame transform context
FFTContext mdct128; ///< short (128 samples) frame transform context
AVFloatDSPContext *fdsp;
- float *planar_samples[6]; ///< saved preprocessed input
+ float *planar_samples[8]; ///< saved preprocessed input
int profile; ///< copied from avctx
LPCContext lpc; ///< used by TNS
@@ -96,18 +106,27 @@ typedef struct AACEncContext {
FFPsyContext psy;
struct FFPsyPreprocessContext* psypp;
AACCoefficientsEncoder *coder;
@ -15,8 +78,23 @@ AAC encoder: tweak rate-distortion logic
int last_frame;
+ int random_state;
float lambda;
+ float lambda_sum; ///< sum(lambda), for Qvg reporting
+ int lambda_count; ///< count(lambda), for Qvg reporting
+ enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
+
AudioFrameQueue afq;
DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
+ AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]; ///< memoization area for quantize_band_cost
+
struct {
float *samples;
} buffer;
} AACEncContext;
void ff_aac_coder_init_mips(AACEncContext *c);
+void ff_quantize_band_cost_cache_init(struct AACEncContext *s);
+
#endif /* AVCODEC_AACENC_H */

View File

@ -1,14 +1,58 @@
$OpenBSD: patch-libavcodec_aacenc_is_c,v 1.1 2015/09/12 09:44:54 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenc_is_c,v 1.2 2015/10/13 05:44:18 ajacoutot Exp $
aacenc_is: ignore PNS band types
aacenc_is: take absolute coefficient value upon energy calculations
--- libavcodec/aacenc_is.c.orig Sat Sep 12 02:07:50 2015
+++ libavcodec/aacenc_is.c Sat Sep 12 02:08:20 2015
@@ -107,10 +107,15 @@ void ff_aac_search_for_is(AACEncContext *s, AVCodecCon
AAC encoder: Extensive improvements
--- libavcodec/aacenc_is.c.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenc_is.c Mon Oct 12 22:05:06 2015
@@ -45,6 +45,11 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext
float dist1 = 0.0f, dist2 = 0.0f;
struct AACISError is_error = {0};
+ if (ener01 <= 0 || ener0 <= 0) {
+ is_error.pass = 0;
+ return is_error;
+ }
+
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
@@ -63,15 +68,15 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext
sce0->ics.swb_sizes[g],
sce0->sf_idx[(w+w2)*16+g],
sce0->band_type[(w+w2)*16+g],
- s->lambda / band0->threshold, INFINITY, NULL, 0);
+ s->lambda / band0->threshold, INFINITY, NULL, NULL, 0);
dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34,
sce1->ics.swb_sizes[g],
sce1->sf_idx[(w+w2)*16+g],
sce1->band_type[(w+w2)*16+g],
- s->lambda / band1->threshold, INFINITY, NULL, 0);
+ s->lambda / band1->threshold, INFINITY, NULL, NULL, 0);
dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
is_sf_idx, is_band_type,
- s->lambda / minthr, INFINITY, NULL, 0);
+ s->lambda / minthr, INFINITY, NULL, NULL, 0);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
@@ -85,6 +90,7 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext
is_error.error = fabsf(dist1 - dist2);
is_error.dist1 = dist1;
is_error.dist2 = dist2;
+ is_error.ener01 = ener01;
return is_error;
}
@@ -105,27 +111,34 @@ void ff_aac_search_for_is(AACEncContext *s, AVCodecCon
if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
- float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
+ float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f, ener01p = 0.0f;
struct AACISError ph_err1, ph_err2, *erf;
+ if (sce0->band_type[w*16+g] == NOISE_BT ||
+ sce1->band_type[w*16+g] == NOISE_BT) {
@ -19,8 +63,29 @@ aacenc_is: take absolute coefficient value upon energy calculations
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
- float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
- float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
+ float coef0 = fabsf(sce0->pcoeffs[start+(w+w2)*128+i]);
+ float coef1 = fabsf(sce1->pcoeffs[start+(w+w2)*128+i]);
+ float coef0 = fabsf(sce0->coeffs[start+(w+w2)*128+i]);
+ float coef1 = fabsf(sce1->coeffs[start+(w+w2)*128+i]);
ener0 += coef0*coef0;
ener1 += coef1*coef1;
ener01 += (coef0 + coef1)*(coef0 + coef1);
+ ener01p += (coef0 - coef1)*(coef0 - coef1);
}
}
ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
- ener0, ener1, ener01, 0, -1);
+ ener0, ener1, ener01p, 0, -1);
ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
ener0, ener1, ener01, 0, +1);
- erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
+ erf = (ph_err1.pass && ph_err1.error < ph_err2.error) ? &ph_err1 : &ph_err2;
if (erf->pass) {
cpe->is_mask[w*16+g] = 1;
- cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01);
+ cpe->ms_mask[w*16+g] = 0;
+ cpe->ch[0].is_ener[w*16+g] = sqrt(ener0 / erf->ener01);
cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
- cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2;
+ cpe->ch[1].band_type[w*16+g] = (erf->phase > 0) ? INTENSITY_BT : INTENSITY_BT2;
count++;
}
}

View File

@ -0,0 +1,14 @@
$OpenBSD: patch-libavcodec_aacenc_is_h,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: Extensive improvements
--- libavcodec/aacenc_is.h.orig Mon Oct 12 21:57:04 2015
+++ libavcodec/aacenc_is.h Mon Oct 12 22:05:07 2015
@@ -39,6 +39,7 @@ struct AACISError {
float error; /* fabs(dist1 - dist2) */
float dist1; /* From original coeffs */
float dist2; /* From IS'd coeffs */
+ float ener01;
};
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,

View File

@ -1,10 +1,23 @@
$OpenBSD: patch-libavcodec_aacenc_pred_c,v 1.1 2015/09/12 09:44:54 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenc_pred_c,v 1.2 2015/10/13 05:44:18 ajacoutot Exp $
aacenc_pred: disable PNS SFBs and disable right predictors with IS
--- libavcodec/aacenc_pred.c.orig Sat Sep 12 02:08:04 2015
+++ libavcodec/aacenc_pred.c Sat Sep 12 02:08:25 2015
@@ -262,7 +262,9 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleCh
AAC encoder: Extensive improvements
aacenc: shorten name of ff_aac_adjust_common_prediction
--- libavcodec/aacenc_pred.c.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenc_pred.c Mon Oct 12 22:05:12 2015
@@ -148,7 +148,7 @@ static inline int update_counters(IndividualChannelStr
return 0;
}
-void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe)
+void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe)
{
int start, w, w2, g, i, count = 0;
SingleChannelElement *sce0 = &cpe->ch[0];
@@ -262,14 +262,16 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleCh
const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
@ -15,3 +28,29 @@ aacenc_pred: disable PNS SFBs and disable right predictors with IS
continue;
/* Normal coefficients */
abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs);
dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
O34, num_coeffs, sce->sf_idx[sfb],
- cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0);
+ cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0);
cost_coeffs += cost1;
/* Encoded coefficients - needed for #bits, band type and quant. error */
@@ -282,7 +284,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleCh
cb_p = cb_n;
quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
- &cost2, 0);
+ &cost2, NULL, 0);
/* Reconstructed coefficients - needed for distortion measurements */
for (i = 0; i < num_coeffs; i++)
@@ -294,7 +296,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleCh
cb_p = cb_n;
dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
P34, num_coeffs, sce->sf_idx[sfb],
- cb_p, s->lambda / band->threshold, INFINITY, NULL, 0);
+ cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0);
for (i = 0; i < num_coeffs; i++)
dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
dist_spec_err *= s->lambda / band->threshold;

View File

@ -0,0 +1,15 @@
$OpenBSD: patch-libavcodec_aacenc_pred_h,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
aacenc: shorten name of ff_aac_adjust_common_prediction
--- libavcodec/aacenc_pred.h.orig Mon Oct 12 22:03:10 2015
+++ libavcodec/aacenc_pred.h Mon Oct 12 22:05:15 2015
@@ -40,7 +40,7 @@
#define PRED_SFB_START 10
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce);
-void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe);
+void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe);
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce);
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce);

View File

@ -1,27 +1,156 @@
$OpenBSD: patch-libavcodec_aacenc_quantization_h,v 1.1 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenc_quantization_h,v 1.2 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: refactor to resynchronize MIPS port
--- libavcodec/aacenc_quantization.h.orig Wed Sep 23 03:50:29 2015
+++ libavcodec/aacenc_quantization.h Wed Sep 23 03:50:38 2015
@@ -249,6 +249,20 @@ static inline float quantize_band_cost(struct AACEncCo
cb, lambda, uplim, bits, rtz);
AAC encoder: Extensive improvements
AAC encoder: memoize quantize_band_cost
aacenc_quantization: fix header description
--- libavcodec/aacenc_quantization.h.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenc_quantization.h Mon Oct 12 22:08:15 2015
@@ -1,5 +1,5 @@
/*
- * AAC encoder intensity stereo
+ * AAC encoder quantizer
* Copyright (C) 2015 Rostislav Pehlivanov
*
* This file is part of FFmpeg.
@@ -43,7 +43,7 @@ static av_always_inline float quantize_and_encode_band
PutBitContext *pb, const float *in, float *out,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int BT_ZERO, int BT_UNSIGNED,
+ int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED,
int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
const float ROUNDING)
{
@@ -54,6 +54,7 @@ static av_always_inline float quantize_and_encode_band
const float CLIPPED_ESCAPE = 165140.0f*IQ;
int i, j;
float cost = 0;
+ float qenergy = 0;
const int dim = BT_PAIR ? 2 : 4;
int resbits = 0;
int off;
@@ -63,6 +64,8 @@ static av_always_inline float quantize_and_encode_band
cost += in[i]*in[i];
if (bits)
*bits = 0;
+ if (energy)
+ *energy = qenergy;
if (out) {
for (i = 0; i < size; i += dim)
for (j = 0; j < dim; j++)
@@ -113,11 +116,13 @@ static av_always_inline float quantize_and_encode_band
out[i+j] = in[i+j] >= 0 ? quantized : -quantized;
if (vec[j] != 0.0f)
curbits++;
+ qenergy += quantized*quantized;
rd += di*di;
}
} else {
for (j = 0; j < dim; j++) {
quantized = vec[j]*IQ;
+ qenergy += quantized*quantized;
if (out)
out[i+j] = quantized;
rd += (in[i+j] - quantized)*(in[i+j] - quantized);
@@ -149,6 +154,8 @@ static av_always_inline float quantize_and_encode_band
if (bits)
*bits = resbits;
+ if (energy)
+ *energy = qenergy;
return cost;
}
@@ -156,7 +163,7 @@ static inline float quantize_and_encode_band_cost_NONE
const float *in, float *quant, const float *scaled,
int size, int scale_idx, int cb,
const float lambda, const float uplim,
- int *bits) {
+ int *bits, float *energy) {
av_assert0(0);
return 0.0f;
}
@@ -167,10 +174,10 @@ static float quantize_and_encode_band_cost_ ## NAME(
PutBitContext *pb, const float *in, float *quant, \
const float *scaled, int size, int scale_idx, \
int cb, const float lambda, const float uplim, \
- int *bits) { \
+ int *bits, float *energy) { \
return quantize_and_encode_band_cost_template( \
s, pb, in, quant, scaled, size, scale_idx, \
- BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \
+ BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \
BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
ROUNDING); \
}
@@ -190,7 +197,7 @@ static float (*const quantize_and_encode_band_cost_arr
PutBitContext *pb, const float *in, float *quant,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits) = {
+ int *bits, float *energy) = {
quantize_and_encode_band_cost_ZERO,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_SQUAD,
@@ -214,7 +221,7 @@ static float (*const quantize_and_encode_band_cost_rtz
PutBitContext *pb, const float *in, float *quant,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits) = {
+ int *bits, float *energy) = {
quantize_and_encode_band_cost_ZERO,
quantize_and_encode_band_cost_SQUAD,
quantize_and_encode_band_cost_SQUAD,
@@ -235,26 +242,42 @@ static float (*const quantize_and_encode_band_cost_rtz
#define quantize_and_encode_band_cost( \
s, pb, in, quant, scaled, size, scale_idx, cb, \
- lambda, uplim, bits, rtz) \
+ lambda, uplim, bits, energy, rtz) \
((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
s, pb, in, quant, scaled, size, scale_idx, cb, \
- lambda, uplim, bits)
+ lambda, uplim, bits, energy)
static inline float quantize_band_cost(struct AACEncContext *s, const float *in,
const float *scaled, int size, int scale_idx,
int cb, const float lambda, const float uplim,
- int *bits, int rtz)
+ int *bits, float *energy, int rtz)
{
return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
- cb, lambda, uplim, bits, rtz);
+ cb, lambda, uplim, bits, energy, rtz);
}
+static inline int quantize_band_cost_bits(struct AACEncContext *s, const float *in,
+ const float *scaled, int size, int scale_idx,
+ int cb, const float lambda, const float uplim,
+ int *bits, int rtz)
+ int *bits, float *energy, int rtz)
+{
+ int _bits;
+ int auxbits;
+ quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
+ cb, 0.0f, uplim, &_bits, rtz);
+ cb, 0.0f, uplim, &auxbits, energy, rtz);
+ if (bits) {
+ *bits = _bits;
+ *bits = auxbits;
+ }
+ return _bits;
+ return auxbits;
+}
+
static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
const float *in, float *out, int size, int scale_idx,
int cb, const float lambda, int rtz)
{
quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
- INFINITY, NULL, rtz);
+ INFINITY, NULL, NULL, rtz);
}
+
+#include "aacenc_quantization_misc.h"
#endif /* AVCODEC_AACENC_QUANTIZATION_H */

View File

@ -0,0 +1,59 @@
$OpenBSD: patch-libavcodec_aacenc_quantization_misc_h,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: memoize quantize_band_cost
--- libavcodec/aacenc_quantization_misc.h.orig Mon Oct 12 22:00:26 2015
+++ libavcodec/aacenc_quantization_misc.h Mon Oct 12 22:05:24 2015
@@ -0,0 +1,52 @@
+/*
+ * AAC encoder quantization
+ * Copyright (C) 2015 Claudio Freire
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder quantization misc reusable function templates
+ * @author Claudio Freire ( klaussfreire gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_QUANTIZATION_MISC_H
+#define AVCODEC_AACENC_QUANTIZATION_MISC_H
+
+static inline float quantize_band_cost_cached(struct AACEncContext *s, int w, int g, const float *in,
+ const float *scaled, int size, int scale_idx,
+ int cb, const float lambda, const float uplim,
+ int *bits, float *energy, int rtz)
+{
+ AACQuantizeBandCostCacheEntry *entry;
+ av_assert1(scale_idx >= 0 && scale_idx < 256);
+ entry = &s->quantize_band_cost_cache[scale_idx][w*16+g];
+ if (entry->bits < 0 || entry->cb != cb || entry->rtz != rtz) {
+ entry->rd = quantize_band_cost(s, in, scaled, size, scale_idx,
+ cb, lambda, uplim, &entry->bits, &entry->energy, rtz);
+ entry->cb = cb;
+ entry->rtz = rtz;
+ }
+ if (bits)
+ *bits = entry->bits;
+ if (energy)
+ *energy = entry->energy;
+ return entry->rd;
+}
+
+#endif /* AVCODEC_AACENC_QUANTIZATION_MISC_H */

View File

@ -1,12 +1,20 @@
$OpenBSD: patch-libavcodec_aacenc_utils_h,v 1.2 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenc_utils_h,v 1.3 2015/10/13 05:44:18 ajacoutot Exp $
aacenc: copy PRNG from the decoder
AAC encoder: simplify and speed up find_min_book
AAC encoder: Extensive improvements
aacenc_utils: add 'inline' flag to find_form_factor, silence warning
aacenc: add support for changing options based on a profile
aacenc_utils: fit find_form_factor() below 80 chars per line
--- libavcodec/aacenc_utils.h.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenc_utils.h Wed Sep 23 03:52:58 2015
@@ -89,13 +89,10 @@ static inline int find_min_book(float maxval, int sf)
+++ libavcodec/aacenc_utils.h Mon Oct 12 22:07:45 2015
@@ -89,16 +89,62 @@ static inline int find_min_book(float maxval, int sf)
float Q34 = sqrtf(Q * sqrtf(Q));
int qmaxval, cb;
qmaxval = maxval * Q34 + C_QUANT;
@ -24,10 +32,68 @@ AAC encoder: simplify and speed up find_min_book
return cb;
}
@@ -126,6 +123,15 @@ static inline int quant_array_idx(const float val, con
}
}
+static inline float find_form_factor(int group_len, int swb_size, float thresh,
+ const float *scaled, float nzslope) {
+ const float iswb_size = 1.0f / swb_size;
+ const float iswb_sizem1 = 1.0f / (swb_size - 1);
+ const float ethresh = thresh;
+ float form = 0.0f, weight = 0.0f;
+ int w2, i;
+ for (w2 = 0; w2 < group_len; w2++) {
+ float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
+ float nzl = 0;
+ for (i = 0; i < swb_size; i++) {
+ float s = fabsf(scaled[w2*128+i]);
+ maxval = FFMAX(maxval, s);
+ e += s;
+ e2 += s *= s;
+ /* We really don't want a hard non-zero-line count, since
+ * even below-threshold lines do add up towards band spectral power.
+ * So, fall steeply towards zero, but smoothly
+ */
+ if (s >= ethresh) {
+ nzl += 1.0f;
+ } else {
+ nzl += powf(s / ethresh, nzslope);
+ }
+ }
+ if (e2 > thresh) {
+ float frm;
+ e *= iswb_size;
+
+ /** compute variance */
+ for (i = 0; i < swb_size; i++) {
+ float d = fabsf(scaled[w2*128+i]) - e;
+ var += d*d;
+ }
+ var = sqrtf(var * iswb_sizem1);
+
+ e2 *= iswb_size;
+ frm = e / FFMIN(e+4*var,maxval);
+ form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl);
+ weight += e2;
+ }
+ }
+ if (weight > 0) {
+ return form / weight;
+ } else {
+ return 1.0f;
+ }
+}
+
/** Return the minimum scalefactor where the quantized coef does not clip. */
static inline uint8_t coef2minsf(float coef)
{
@@ -128,6 +174,23 @@ static inline int quant_array_idx(const float val, con
return index;
}
+/**
+ * approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
+ */
+static av_always_inline float bval2bmax(float b)
+{
+ return 0.001f + 0.0035f * (b*b*b) / (15.5f*15.5f*15.5f);
+}
+
+/*
@ -37,6 +103,52 @@ AAC encoder: simplify and speed up find_min_book
+{
+ union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
+ return v.s;
}
+}
+
#define ERROR_IF(cond, ...) \
if (cond) { \
av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
@@ -139,5 +202,43 @@ static inline int quant_array_idx(const float val, con
av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
}
+#define AAC_OPT_SET(e_opt, p_opt, bypass, name) \
+ ERROR_IF ((e_opt)->name == 1 && (p_opt)->name == OPT_BANNED, \
+ "Profile %i does not allow %s\n", avctx->profile, #name); \
+ ERROR_IF ((e_opt)->name == 0 && (p_opt)->name == OPT_REQUIRED, \
+ "Option %s is a requirement for this profile (%i)\n", \
+ #name, avctx->profile); \
+ if ((e_opt)->name == 1 && (p_opt)->name == OPT_NEEDS_MAIN && \
+ avctx->profile == FF_PROFILE_AAC_LOW) { \
+ WARN_IF(1, "Profile %i does not allow for %s, setting profile to " \
+ "\"aac_main\"(%i)\n", avctx->profile, #name, \
+ FF_PROFILE_AAC_MAIN); \
+ avctx->profile = FF_PROFILE_AAC_MAIN; \
+ p_opt = &aacenc_profiles[FF_PROFILE_AAC_MAIN].opts; \
+ } \
+ if ((e_opt)->name == 1 && (p_opt)->name == OPT_NEEDS_LTP && \
+ avctx->profile == FF_PROFILE_AAC_LOW) { \
+ WARN_IF(1, "Profile %i does not allow for %s, setting profile to " \
+ "\"aac_ltp\"(%i)\n", avctx->profile, #name, \
+ FF_PROFILE_AAC_LTP); \
+ avctx->profile = FF_PROFILE_AAC_LTP; \
+ p_opt = &aacenc_profiles[FF_PROFILE_AAC_LTP].opts; \
+ } \
+ if ((e_opt)->name == OPT_AUTO) { \
+ if ((p_opt)->name == OPT_BANNED) { \
+ (e_opt)->name = 0; \
+ } else if ((p_opt)->name == OPT_NEEDS_LTP) { \
+ (e_opt)->name = 0; \
+ } else if ((p_opt)->name == OPT_NEEDS_MAIN) { \
+ (e_opt)->name = 0; \
+ } else if ((p_opt)->name == OPT_REQUIRED) { \
+ (e_opt)->name = 1; \
+ } else if (bypass) { \
+ (e_opt)->name = (e_opt)->name; \
+ } else { \
+ (e_opt)->name = (p_opt)->name; \
+ } \
+ } \
+ av_log(avctx, AV_LOG_VERBOSE, "Option %s set to %i\n", #name, (e_opt)->name);
#endif /* AVCODEC_AACENC_UTILS_H */

View File

@ -1,17 +1,70 @@
$OpenBSD: patch-libavcodec_aacenctab_h,v 1.2 2015/09/27 06:35:33 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacenctab_h,v 1.3 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: simplify and speed up find_min_book
avcodec/aacenctab: Make aac_maxval_cb const
aacenc: add support for encoding 7.1 channel audio
aacenc: add support for changing options based on a profile
--- libavcodec/aacenctab.h.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacenctab.h Fri Sep 25 15:47:31 2015
@@ -110,4 +110,8 @@ static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,
+++ libavcodec/aacenctab.h Mon Oct 12 22:07:27 2015
@@ -36,21 +36,30 @@
/** Total number of codebooks, including special ones **/
#define CB_TOT_ALL 15
-#define AAC_MAX_CHANNELS 6
+/** Profile option settings **/
+#define OPT_AUTO -1
+#define OPT_BANNED -256
+#define OPT_NEEDS_LTP -384
+#define OPT_NEEDS_MAIN -512
+#define OPT_REQUIRED -768
+#define AAC_MAX_CHANNELS 8
+
extern const uint8_t *ff_aac_swb_size_1024[];
extern const int ff_aac_swb_size_1024_len;
extern const uint8_t *ff_aac_swb_size_128[];
extern const int ff_aac_swb_size_128_len;
/** default channel configurations */
-static const uint8_t aac_chan_configs[6][5] = {
- {1, TYPE_SCE}, // 1 channel - single channel element
- {1, TYPE_CPE}, // 2 channels - channel pair
- {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
- {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
- {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
- {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
+static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6] = {
+ {1, TYPE_SCE}, // 1 channel - single channel element
+ {1, TYPE_CPE}, // 2 channels - channel pair
+ {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
+ {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
+ {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
+ {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
+ {0}, // 7 channels - invalid without PCE
+ {5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 8 channels - front center + front stereo + side stereo + back stereo + LFE
};
/**
@@ -63,6 +72,8 @@ static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][A
{ 2, 0, 1, 3 },
{ 2, 0, 1, 3, 4 },
{ 2, 0, 1, 4, 5, 3 },
+ { 0 },
+ { 2, 0, 1, 6, 7, 4, 5, 3 },
};
/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
@@ -109,5 +120,9 @@ static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,
static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
+
+static const unsigned char aac_maxval_cb[] = {
+ 0, 1, 3, 5, 5, 7, 7, 7, 9, 9, 9, 9, 9, 11
+};
+
#endif /* AVCODEC_AACENCTAB_H */

View File

@ -1,9 +1,13 @@
$OpenBSD: patch-libavcodec_aacpsy_c,v 1.3 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_aacpsy_c,v 1.4 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: tweak rate-distortion logic
--- libavcodec/aacpsy.c.orig Wed Sep 23 03:55:17 2015
+++ libavcodec/aacpsy.c Wed Sep 23 03:55:25 2015
AAC encoder: Extensive improvements
AAC encoder: cosmetics from last commit
--- libavcodec/aacpsy.c.orig Mon Sep 7 21:58:01 2015
+++ libavcodec/aacpsy.c Mon Oct 12 22:05:35 2015
@@ -87,6 +87,7 @@ enum {
};
@ -12,15 +16,111 @@ AAC encoder: tweak rate-distortion logic
/* LAME psy model constants */
#define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
@@ -687,6 +688,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx
desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
0.85f, 1.15f);
@@ -157,6 +158,7 @@ typedef struct AacPsyContext{
} pe;
AacPsyCoeffs psy_coef[2][64];
AacPsyChannel *ch;
+ float global_quality; ///< normalized global quality taken from avctx
}AacPsyContext;
/**
@@ -299,7 +301,8 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
float bark;
int i, j, g, start;
float prev, minscale, minath, minsnr, pe_min;
- const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels;
+ int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels);
+
const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx);
const float num_bark = calc_bark((float)bandwidth);
@@ -307,9 +310,15 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
if (!ctx->model_priv_data)
return AVERROR(ENOMEM);
pctx = (AacPsyContext*) ctx->model_priv_data;
+ pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
+ if (ctx->avctx->flags & CODEC_FLAG_QSCALE) {
+ /* Use the target average bitrate to compute spread parameters */
+ chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
+ }
+
pctx->chan_bitrate = chan_bitrate;
- pctx->frame_bits = chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate;
+ pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
ctx->bitres.size = 6144 - pctx->frame_bits;
@@ -397,7 +406,7 @@ static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsy
int channel, int prev_type)
{
int i, j;
- int br = ctx->avctx->bit_rate / ctx->avctx->channels;
+ int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
int attack_ratio = br <= 16000 ? 18 : 10;
AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
AacPsyChannel *pch = &pctx->ch[channel];
@@ -507,7 +516,12 @@ static int calc_bit_demand(AacPsyContext *ctx, float p
ctx->pe.max = FFMAX(pe, ctx->pe.max);
ctx->pe.min = FFMIN(pe, ctx->pe.min);
- return FFMIN(ctx->frame_bits * bit_factor, ctx->frame_bits + size - bits);
+ /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
+ * reservoir starvation from producing zero-bit frames
+ */
+ return FFMIN(
+ ctx->frame_bits * bit_factor,
+ FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
}
static float calc_pe_3gpp(AacPsyBand *band)
@@ -677,16 +691,36 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx
/* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
ctx->ch[channel].entropy = pe;
- desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
- desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
- /* NOTE: PE correction is kept simple. During initial testing it had very
- * little effect on the final bitrate. Probably a good idea to come
- * back and do more testing later.
- */
- if (ctx->bitres.bits > 0)
- desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
- 0.85f, 1.15f);
+ if (ctx->avctx->flags & CODEC_FLAG_QSCALE) {
+ /* (2.5 * 120) achieves almost transparent rate, and we want to give
+ * ample room downwards, so we make that equivalent to QSCALE=2.4
+ */
+ desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
+ desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
+ desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
+
+ /* PE slope smoothing */
+ if (ctx->bitres.bits > 0) {
+ desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
+ desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
+ }
+
+ pctx->pe.max = FFMAX(pe, pctx->pe.max);
+ pctx->pe.min = FFMIN(pe, pctx->pe.min);
+ } else {
+ desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
+ desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
+
+ /* NOTE: PE correction is kept simple. During initial testing it had very
+ * little effect on the final bitrate. Probably a good idea to come
+ * back and do more testing later.
+ */
+ if (ctx->bitres.bits > 0)
+ desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
+ 0.85f, 1.15f);
+ }
pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
+ ctx->bitres.alloc = desired_bits;
if (desired_pe < pe) {
/* 5.6.1.3.4 "First Estimation of the reduction value" */
@@ -788,6 +790,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx
@@ -788,6 +822,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx
psy_band->threshold = band->thr;
psy_band->energy = band->energy;
psy_band->spread = band->active_lines * 2.0f / band_sizes[g];

View File

@ -0,0 +1,24 @@
$OpenBSD: patch-libavcodec_lpc_c,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
lpc: correctly apply windowing to the samples in the float-only lpc
--- libavcodec/lpc.c.orig Mon Oct 12 23:03:10 2015
+++ libavcodec/lpc.c Mon Oct 12 23:03:21 2015
@@ -173,11 +173,13 @@ double ff_lpc_calc_ref_coefs_f(LPCContext *s, const fl
int i;
double signal = 0.0f, avg_err = 0.0f;
double autoc[MAX_LPC_ORDER+1] = {0}, error[MAX_LPC_ORDER+1] = {0};
- const double c = (len - 1)/2.0f;
+ const double a = 0.5f, b = 1.0f - a;
- /* Welch window */
- for (i = 0; i < len; i++)
- s->windowed_samples[i] = 1.0f - ((samples[i]-c)/c)*((samples[i]-c)/c);
+ /* Apply windowing */
+ for (i = 0; i < len; i++) {
+ double weight = a - b*cos((2*M_PI*i)/(len - 1));
+ s->windowed_samples[i] = weight*samples[i];
+ }
s->lpc_compute_autocorr(s->windowed_samples, len, order, autoc);
signal = autoc[0];

View File

@ -0,0 +1,18 @@
$OpenBSD: patch-libavcodec_mathops_h,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: Extensive improvements
--- libavcodec/mathops.h.orig Mon Oct 12 21:57:54 2015
+++ libavcodec/mathops.h Mon Oct 12 22:05:41 2015
@@ -233,6 +233,11 @@ static inline av_const unsigned int ff_sqrt(unsigned i
}
#endif
+static inline av_const float ff_sqrf(float a)
+{
+ return a*a;
+}
+
static inline int8_t ff_u8_to_s8(uint8_t a)
{
union {

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,45 @@
$OpenBSD: patch-libavcodec_psymodel_c,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: Extensive improvements
AAC encoder: cosmetics from last commit
--- libavcodec/psymodel.c.orig Mon Oct 12 21:58:16 2015
+++ libavcodec/psymodel.c Mon Oct 12 22:05:51 2015
@@ -109,24 +109,20 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preproce
return NULL;
ctx->avctx = avctx;
- if (avctx->cutoff > 0)
- cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
+ /* AAC has its own LP method */
+ if (avctx->codec_id != AV_CODEC_ID_AAC) {
+ if (avctx->cutoff > 0)
+ cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
- if (!cutoff_coeff && avctx->codec_id == AV_CODEC_ID_AAC)
- cutoff_coeff = 2.0 * AAC_CUTOFF(avctx) / avctx->sample_rate;
-
- if (cutoff_coeff && cutoff_coeff < 0.98)
- ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,
- FF_FILTER_MODE_LOWPASS, FILT_ORDER,
- cutoff_coeff, 0.0, 0.0);
- if (ctx->fcoeffs) {
- ctx->fstate = av_mallocz_array(sizeof(ctx->fstate[0]), avctx->channels);
- if (!ctx->fstate) {
- av_free(ctx);
- return NULL;
+ if (cutoff_coeff && cutoff_coeff < 0.98)
+ ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,
+ FF_FILTER_MODE_LOWPASS, FILT_ORDER,
+ cutoff_coeff, 0.0, 0.0);
+ if (ctx->fcoeffs) {
+ ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);
+ for (i = 0; i < avctx->channels; i++)
+ ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
}
- for (i = 0; i < avctx->channels; i++)
- ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
}
ff_iir_filter_init(&ctx->fiir);

View File

@ -1,10 +1,34 @@
$OpenBSD: patch-libavcodec_psymodel_h,v 1.1 2015/09/23 09:16:45 ajacoutot Exp $
$OpenBSD: patch-libavcodec_psymodel_h,v 1.2 2015/10/13 05:44:18 ajacoutot Exp $
AAC encoder: tweak rate-distortion logic
--- libavcodec/psymodel.h.orig Wed Sep 23 03:55:36 2015
+++ libavcodec/psymodel.h Wed Sep 23 03:55:45 2015
@@ -88,6 +88,7 @@ typedef struct FFPsyContext {
AAC encoder: Extensive improvements
--- libavcodec/psymodel.h.orig Mon Sep 7 21:58:02 2015
+++ libavcodec/psymodel.h Mon Oct 12 22:05:51 2015
@@ -29,7 +29,20 @@
/** maximum number of channels */
#define PSY_MAX_CHANS 20
-#define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2))
+/* cutoff for VBR is purposedly increased, since LP filtering actually
+ * hinders VBR performance rather than the opposite
+ */
+#define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \
+ FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \
+ 3000 + bit_rate/channels/4, \
+ 12000 + bit_rate/channels/16), \
+ 22000, \
+ sample_rate / 2): (sample_rate / 2))
+#define AAC_CUTOFF(s) ( \
+ (s->flags & CODEC_FLAG_QSCALE) \
+ ? s->sample_rate / 2 \
+ : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \
+)
/**
* single band psychoacoustic information
@@ -88,6 +101,7 @@ typedef struct FFPsyContext {
struct {
int size; ///< size of the bitresevoir in bits
int bits; ///< number of bits used in the bitresevoir

View File

@ -0,0 +1,175 @@
$OpenBSD: patch-libavutil_opt_c,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
avutil/opt: add AV_OPT_TYPE_BOOL
--- libavutil/opt.c.orig Mon Oct 12 22:22:56 2015
+++ libavutil/opt.c Mon Oct 12 22:32:30 2015
@@ -67,6 +67,7 @@ static int read_number(const AVOption *o, const void *
case AV_OPT_TYPE_FLAGS: *intnum = *(unsigned int*)dst;return 0;
case AV_OPT_TYPE_PIXEL_FMT: *intnum = *(enum AVPixelFormat *)dst;return 0;
case AV_OPT_TYPE_SAMPLE_FMT:*intnum = *(enum AVSampleFormat*)dst;return 0;
+ case AV_OPT_TYPE_BOOL:
case AV_OPT_TYPE_INT: *intnum = *(int *)dst;return 0;
case AV_OPT_TYPE_CHANNEL_LAYOUT:
case AV_OPT_TYPE_DURATION:
@@ -103,6 +104,7 @@ static int write_number(void *obj, const AVOption *o,
switch (o->type) {
case AV_OPT_TYPE_PIXEL_FMT: *(enum AVPixelFormat *)dst = llrint(num/den) * intnum; break;
case AV_OPT_TYPE_SAMPLE_FMT:*(enum AVSampleFormat*)dst = llrint(num/den) * intnum; break;
+ case AV_OPT_TYPE_BOOL:
case AV_OPT_TYPE_FLAGS:
case AV_OPT_TYPE_INT: *(int *)dst= llrint(num/den)*intnum; break;
case AV_OPT_TYPE_DURATION:
@@ -304,6 +306,44 @@ static int set_string_color(void *obj, const AVOption
return 0;
}
+static const char *get_bool_name(int val)
+{
+ if (val < 0)
+ return "auto";
+ return val ? "true" : "false";
+}
+
+static int set_string_bool(void *obj, const AVOption *o, const char *val, int *dst)
+{
+ int n;
+
+ if (!val)
+ return 0;
+
+ if (!strcmp(val, "auto")) {
+ n = -1;
+ } else if (av_match_name(val, "true,y,yes,enable,enabled,on")) {
+ n = 1;
+ } else if (av_match_name(val, "false,n,no,disable,disabled,off")) {
+ n = 0;
+ } else {
+ char *end = NULL;
+ n = strtol(val, &end, 10);
+ if (val + strlen(val) != end)
+ goto fail;
+ }
+
+ if (n < o->min || n > o->max)
+ goto fail;
+
+ *dst = n;
+ return 0;
+
+fail:
+ av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as boolean\n", val);
+ return AVERROR(EINVAL);
+}
+
static int set_string_fmt(void *obj, const AVOption *o, const char *val, uint8_t *dst,
int fmt_nb, int ((*get_fmt)(const char *)), const char *desc)
{
@@ -377,7 +417,7 @@ int av_opt_set(void *obj, const char *name, const char
o->type != AV_OPT_TYPE_PIXEL_FMT && o->type != AV_OPT_TYPE_SAMPLE_FMT &&
o->type != AV_OPT_TYPE_IMAGE_SIZE && o->type != AV_OPT_TYPE_VIDEO_RATE &&
o->type != AV_OPT_TYPE_DURATION && o->type != AV_OPT_TYPE_COLOR &&
- o->type != AV_OPT_TYPE_CHANNEL_LAYOUT))
+ o->type != AV_OPT_TYPE_CHANNEL_LAYOUT && o->type != AV_OPT_TYPE_BOOL))
return AVERROR(EINVAL);
if (o->flags & AV_OPT_FLAG_READONLY)
@@ -385,6 +425,7 @@ int av_opt_set(void *obj, const char *name, const char
dst = ((uint8_t*)target_obj) + o->offset;
switch (o->type) {
+ case AV_OPT_TYPE_BOOL: return set_string_bool(obj, o, val, dst);
case AV_OPT_TYPE_STRING: return set_string(obj, o, val, dst);
case AV_OPT_TYPE_BINARY: return set_string_binary(obj, o, val, dst);
case AV_OPT_TYPE_FLAGS:
@@ -701,6 +742,9 @@ int av_opt_get(void *obj, const char *name, int search
buf[0] = 0;
switch (o->type) {
+ case AV_OPT_TYPE_BOOL:
+ ret = snprintf(buf, sizeof(buf), "%s", (char *)av_x_if_null(get_bool_name(*(int *)dst), "invalid"));
+ break;
case AV_OPT_TYPE_FLAGS: ret = snprintf(buf, sizeof(buf), "0x%08X", *(int *)dst);break;
case AV_OPT_TYPE_INT: ret = snprintf(buf, sizeof(buf), "%d" , *(int *)dst);break;
case AV_OPT_TYPE_INT64: ret = snprintf(buf, sizeof(buf), "%"PRId64, *(int64_t*)dst);break;
@@ -1076,6 +1120,9 @@ static void opt_list(void *obj, void *av_log_obj, cons
case AV_OPT_TYPE_CHANNEL_LAYOUT:
av_log(av_log_obj, AV_LOG_INFO, "%-12s ", "<channel_layout>");
break;
+ case AV_OPT_TYPE_BOOL:
+ av_log(av_log_obj, AV_LOG_INFO, "%-12s ", "<boolean>");
+ break;
case AV_OPT_TYPE_CONST:
default:
av_log(av_log_obj, AV_LOG_INFO, "%-12s ", "");
@@ -1095,6 +1142,7 @@ static void opt_list(void *obj, void *av_log_obj, cons
if (av_opt_query_ranges(&r, obj, opt->name, AV_OPT_SEARCH_FAKE_OBJ) >= 0) {
switch (opt->type) {
+ case AV_OPT_TYPE_BOOL:
case AV_OPT_TYPE_INT:
case AV_OPT_TYPE_INT64:
case AV_OPT_TYPE_DOUBLE:
@@ -1121,6 +1169,9 @@ static void opt_list(void *obj, void *av_log_obj, cons
!opt->default_val.str)) {
av_log(av_log_obj, AV_LOG_INFO, " (default ");
switch (opt->type) {
+ case AV_OPT_TYPE_BOOL:
+ av_log(av_log_obj, AV_LOG_INFO, "%s", (char *)av_x_if_null(get_bool_name(opt->default_val.i64), "invalid"));
+ break;
case AV_OPT_TYPE_FLAGS:
av_log(av_log_obj, AV_LOG_INFO, "%"PRIX64, opt->default_val.i64);
break;
@@ -1554,6 +1605,7 @@ void *av_opt_ptr(const AVClass *class, void *obj, cons
static int opt_size(enum AVOptionType type)
{
switch(type) {
+ case AV_OPT_TYPE_BOOL:
case AV_OPT_TYPE_INT:
case AV_OPT_TYPE_FLAGS: return sizeof(int);
case AV_OPT_TYPE_DURATION:
@@ -1671,6 +1723,7 @@ int av_opt_query_ranges_default(AVOptionRanges **range
range->value_max = field->max;
switch (field->type) {
+ case AV_OPT_TYPE_BOOL:
case AV_OPT_TYPE_INT:
case AV_OPT_TYPE_INT64:
case AV_OPT_TYPE_PIXEL_FMT:
@@ -1754,6 +1807,7 @@ int av_opt_is_set_to_default(void *obj, const AVOption
switch (o->type) {
case AV_OPT_TYPE_CONST:
return 1;
+ case AV_OPT_TYPE_BOOL:
case AV_OPT_TYPE_FLAGS:
case AV_OPT_TYPE_PIXEL_FMT:
case AV_OPT_TYPE_SAMPLE_FMT:
@@ -1917,6 +1971,9 @@ typedef struct TestContext
float flt;
double dbl;
char *escape;
+ int bool1;
+ int bool2;
+ int bool3;
} TestContext;
#define OFFSET(x) offsetof(TestContext, x)
@@ -1948,6 +2005,9 @@ static const AVOption test_options[]= {
{"num64", "set num 64bit", OFFSET(num64), AV_OPT_TYPE_INT64, {.i64 = 1}, 0, 100, 1 },
{"flt", "set float", OFFSET(flt), AV_OPT_TYPE_FLOAT, {.dbl = 1.0/3}, 0, 100, 1},
{"dbl", "set double", OFFSET(dbl), AV_OPT_TYPE_DOUBLE, {.dbl = 1.0/3}, 0, 100, 1 },
+{"bool1", "set boolean value", OFFSET(bool1), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, 1 },
+{"bool2", "set boolean value", OFFSET(bool2), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, 1 },
+{"bool3", "set boolean value", OFFSET(bool3), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 1 },
{NULL},
};
@@ -2113,6 +2173,8 @@ int main(void)
"dbl=2.2",
"dbl=-1",
"dbl=101",
+ "bool1=true",
+ "bool2=auto",
};
test_ctx.class = &test_class;

View File

@ -0,0 +1,14 @@
$OpenBSD: patch-libavutil_opt_h,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
avutil/opt: add AV_OPT_TYPE_BOOL
--- libavutil/opt.h.orig Mon Oct 12 22:32:35 2015
+++ libavutil/opt.h Mon Oct 12 22:33:51 2015
@@ -236,6 +236,7 @@ enum AVOptionType{
AV_OPT_TYPE_DURATION = MKBETAG('D','U','R',' '),
AV_OPT_TYPE_COLOR = MKBETAG('C','O','L','R'),
AV_OPT_TYPE_CHANNEL_LAYOUT = MKBETAG('C','H','L','A'),
+ AV_OPT_TYPE_BOOL = MKBETAG('B','O','O','L'),
#if FF_API_OLD_AVOPTIONS
FF_OPT_TYPE_FLAGS = 0,
FF_OPT_TYPE_INT,

View File

@ -0,0 +1,104 @@
$OpenBSD: patch-tests_fate_aac_mak,v 1.1 2015/10/13 05:44:18 ajacoutot Exp $
fate: adjust AAC encoder tests values
aacenc_is: take absolute coefficient value upon energy calculations
aaccoder: add frequency scaling and quantization correction for PNS
aaccoder: tweak PNS implementation further
fate: readjust AAC encoder TNS test
aacenc_tns: fix coefficient compression condition
AAC encoder: refactor to resynchronize MIPS port
lpc: correctly apply windowing to the samples in the float-only lpc
AAC encoder: tweak rate-distortion logic
AAC encoder: tweak PNS usage to be more aggressive
AAC encoder: Extensive improvements
AAC encoder tests: increase fuzz for pred test
fate: increase fuzz on fate-aac-tns-encode test
--- tests/fate/aac.mak.orig Mon Oct 12 22:10:58 2015
+++ tests/fate/aac.mak Mon Oct 12 22:11:07 2015
@@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le
fate-aac-aref-encode: CMP = stddev
fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
fate-aac-aref-encode: CMP_SHIFT = -4096
-fate-aac-aref-encode: CMP_TARGET = 594
+fate-aac-aref-encode: CMP_TARGET = 1127
fate-aac-aref-encode: SIZE_TOLERANCE = 2464
fate-aac-aref-encode: FUZZ = 6
@@ -155,42 +155,52 @@ fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $
fate-aac-ln-encode: CMP = stddev
fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-ln-encode: CMP_SHIFT = -4096
-fate-aac-ln-encode: CMP_TARGET = 68
+fate-aac-ln-encode: CMP_TARGET = 80
fate-aac-ln-encode: SIZE_TOLERANCE = 3560
+fate-aac-ln-encode: FUZZ = 30
+FATE_AAC_ENCODE += fate-aac-ln-encode-128k
+fate-aac-ln-encode-128k: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050
+fate-aac-ln-encode-128k: CMP = stddev
+fate-aac-ln-encode-128k: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-ln-encode-128k: CMP_SHIFT = -4096
+fate-aac-ln-encode-128k: CMP_TARGET = 745
+fate-aac-ln-encode-128k: SIZE_TOLERANCE = 3560
+fate-aac-ln-encode-128k: FUZZ = 5
+
FATE_AAC_ENCODE += fate-aac-pns-encode
-fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 1 -aac_is 0 -b:a 128k
+fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 1 -aac_is 0 -b:a 128k -cutoff 22050
fate-aac-pns-encode: CMP = stddev
fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-pns-encode: CMP_SHIFT = -4096
-fate-aac-pns-encode: CMP_TARGET = 646.89
+fate-aac-pns-encode: CMP_TARGET = 695
fate-aac-pns-encode: SIZE_TOLERANCE = 3560
-fate-aac-pns-encode: FUZZ = 5
+fate-aac-pns-encode: FUZZ = 25
FATE_AAC_ENCODE += fate-aac-tns-encode
-fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k
+fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050
fate-aac-tns-encode: CMP = stddev
fate-aac-tns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-tns-encode: CMP_SHIFT = -4096
-fate-aac-tns-encode: CMP_TARGET = 648.50
-fate-aac-tns-encode: FUZZ = 2.8
+fate-aac-tns-encode: CMP_TARGET = 768
+fate-aac-tns-encode: FUZZ = 6
fate-aac-tns-encode: SIZE_TOLERANCE = 3560
FATE_AAC_ENCODE += fate-aac-is-encode
-fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 0 -aac_is 1 -b:a 128k
+fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 0 -aac_is 1 -b:a 128k -cutoff 22050
fate-aac-is-encode: CMP = stddev
fate-aac-is-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-is-encode: CMP_SHIFT = -4096
-fate-aac-is-encode: CMP_TARGET = 613.26
+fate-aac-is-encode: CMP_TARGET = 582
fate-aac-is-encode: SIZE_TOLERANCE = 3560
-fate-aac-is-encode: FUZZ = 35
+fate-aac-is-encode: FUZZ = 1
FATE_AAC_ENCODE += fate-aac-pred-encode
-fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k
+fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050
fate-aac-pred-encode: CMP = stddev
fate-aac-pred-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
fate-aac-pred-encode: CMP_SHIFT = -4096
-fate-aac-pred-encode: CMP_TARGET = 652.60
+fate-aac-pred-encode: CMP_TARGET = 790
fate-aac-pred-encode: FUZZ = 10
fate-aac-pred-encode: SIZE_TOLERANCE = 3560