Update to dav1d-0.4.0.

from Brad (maintainer)
This commit is contained in:
ajacoutot 2019-08-17 08:28:48 +00:00
parent e665576d8b
commit a1f044ac2b
6 changed files with 36 additions and 420 deletions

View File

@ -1,10 +1,9 @@
# $OpenBSD: Makefile,v 1.15 2019/07/12 20:47:55 sthen Exp $
# $OpenBSD: Makefile,v 1.16 2019/08/17 08:28:48 ajacoutot Exp $
COMMENT= small and fast AV1 decoder
VER= 0.3.1
VER= 0.4.0
DISTNAME= dav1d-${VER}
REVISION= 0
CATEGORIES= multimedia
MASTER_SITES= https://code.videolan.org/videolan/dav1d/-/archive/${VER}/
EXTRACT_SUFX= .tar.bz2

View File

@ -1,2 +1,2 @@
SHA256 (dav1d-0.3.1.tar.bz2) = vlEdRlIjzHSqtQf+Om5OdkUBZi6/Gl4jOFTtBkp64gQ=
SIZE (dav1d-0.3.1.tar.bz2) = 438676
SHA256 (dav1d-0.4.0.tar.bz2) = GL+WxRaLjHBEIjh2IP76qVPo29Tqyw8HlsA9bnQfiSQ=
SIZE (dav1d-0.4.0.tar.bz2) = 493854

View File

@ -1,15 +0,0 @@
$OpenBSD: patch-src_arm_asm_S,v 1.1 2019/06/02 08:32:12 ajacoutot Exp $
arm: Mark the stack as non-executable on ELF
Index: src/arm/asm.S
--- src/arm/asm.S.orig
+++ src/arm/asm.S
@@ -37,6 +37,7 @@
.fpu neon
.eabi_attribute 10, 0 // suppress Tag_FP_arch
.eabi_attribute 12, 0 // suppress Tag_Advanced_SIMD_arch
+ .section .note.GNU-stack,"",%progbits // Mark stack as non-executable
#endif
#ifdef _WIN32

View File

@ -1,73 +0,0 @@
$OpenBSD: patch-src_env_h,v 1.1 2019/06/02 08:32:12 ajacoutot Exp $
Optimize coefficient decoding
Index: src/env.h
--- src/env.h.orig
+++ src/env.h
@@ -609,25 +609,12 @@ static inline int get_coef_skip_ctx(const TxfmInfo *co
}
}
-static inline int get_coef_nz_ctx(uint8_t *const levels, const int scan_idx,
- const int rc, const int is_eob,
+static inline int get_coef_nz_ctx(uint8_t *const levels,
const enum RectTxfmSize tx,
- const enum TxClass tx_class)
+ const enum TxClass tx_class,
+ const int x, const int y,
+ const ptrdiff_t stride)
{
- const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
-
- if (is_eob) {
- if (scan_idx == 0) return 0;
- const int eighth_sz = imin(t_dim->w, 8) * imin(t_dim->h, 8) * 2;
- if (scan_idx <= eighth_sz) return 1;
- const int quart_sz = eighth_sz * 2;
- if (scan_idx <= quart_sz) return 2;
- return 3;
- }
-
- const int x = rc >> (2 + imin(t_dim->lh, 3));
- const int y = rc & (4 * imin(t_dim->h, 8) - 1);
- const ptrdiff_t stride = 4 * (imin(t_dim->h, 8) + 1);
static const uint8_t offsets[3][5][2 /* x, y */] = {
[TX_CLASS_2D] = {
{ 0, 1 }, { 1, 0 }, { 2, 0 }, { 0, 2 }, { 1, 1 }
@@ -643,8 +630,7 @@ static inline int get_coef_nz_ctx(uint8_t *const level
mag += imin(levels[(x + off[i][0]) * stride + (y + off[i][1])], 3);
const int ctx = imin((mag + 1) >> 1, 4);
if (tx_class == TX_CLASS_2D) {
- return !rc ? 0 :
- dav1d_nz_map_ctx_offset[tx][imin(y, 4)][imin(x, 4)] + ctx;
+ return dav1d_nz_map_ctx_offset[tx][imin(y, 4)][imin(x, 4)] + ctx;
} else {
return 26 + imin((tx_class == TX_CLASS_V) ? y : x, 2) * 5 + ctx;
}
@@ -686,13 +672,10 @@ static inline int get_dc_sign_ctx(const TxfmInfo *cons
}
static inline int get_br_ctx(const uint8_t *const levels,
- const int rc, const enum RectTxfmSize tx,
- const enum TxClass tx_class)
+ const int ac, const enum TxClass tx_class,
+ const int x, const int y,
+ const ptrdiff_t stride)
{
- const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
- const int x = rc >> (imin(t_dim->lh, 3) + 2);
- const int y = rc & (4 * imin(t_dim->h, 8) - 1);
- const int stride = 4 * (imin(t_dim->h, 8) + 1);
int mag = 0;
static const uint8_t offsets_from_txclass[3][3][2] = {
[TX_CLASS_2D] = { { 0, 1 }, { 1, 0 }, { 1, 1 } },
@@ -704,7 +687,7 @@ static inline int get_br_ctx(const uint8_t *const leve
mag += levels[(x + offsets[i][1]) * stride + y + offsets[i][0]];
mag = imin((mag + 1) >> 1, 6);
- if (rc == 0) return mag;
+ if (!ac) return mag;
switch (tx_class) {
case TX_CLASS_2D:
if (y < 2 && x < 2) return mag + 7;

View File

@ -1,327 +0,0 @@
$OpenBSD: patch-src_recon_tmpl_c,v 1.1 2019/06/02 08:32:12 ajacoutot Exp $
Optimize coefficient decoding
Index: src/recon_tmpl.c
--- src/recon_tmpl.c.orig
+++ src/recon_tmpl.c
@@ -69,19 +69,19 @@ static int decode_coefs(Dav1dTileContext *const t,
const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
const int dbg = DEBUG_BLOCK_INFO && plane && 0;
- if (dbg) printf("Start: r=%d\n", ts->msac.rng);
+ if (dbg)
+ printf("Start: r=%d\n", ts->msac.rng);
// does this block have any non-zero coefficients
const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
const int all_skip = dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.coef.skip[t_dim->ctx][sctx]);
if (dbg)
- printf("Post-non-zero[%d][%d][%d]: r=%d\n",
- t_dim->ctx, sctx, all_skip, ts->msac.rng);
+ printf("Post-non-zero[%d][%d][%d]: r=%d\n",
+ t_dim->ctx, sctx, all_skip, ts->msac.rng);
if (all_skip) {
*res_ctx = 0x40;
- *txtp = f->frame_hdr->segmentation.lossless[b->seg_id] ? WHT_WHT :
- DCT_DCT;
+ *txtp = f->frame_hdr->segmentation.lossless[b->seg_id] ? WHT_WHT : DCT_DCT;
return -1;
}
@@ -111,9 +111,9 @@ static int decode_coefs(Dav1dTileContext *const t,
dav1d_msac_decode_symbol_adapt16)(&ts->msac, txtp_cdf, set_cnt);
if (dbg)
- printf("Post-txtp[%d->%d][%d->%d][%d][%d->%d]: r=%d\n",
- set, set_idx, tx, t_dim->min, intra ? (int)y_mode_nofilt : -1,
- idx, dav1d_tx_types_per_set[set][idx], ts->msac.rng);
+ printf("Post-txtp[%d->%d][%d->%d][%d][%d->%d]: r=%d\n",
+ set, set_idx, tx, t_dim->min, intra ? (int)y_mode_nofilt : -1,
+ idx, dav1d_tx_types_per_set[set][idx], ts->msac.rng);
}
*txtp = dav1d_tx_types_per_set[set][idx];
}
@@ -140,26 +140,20 @@ static int decode_coefs(Dav1dTileContext *const t,
#undef case_sz
}
if (dbg)
- printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
- 16 << tx2dszctx, chroma, is_1d, eob_bin, ts->msac.rng);
+ printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
+ 16 << tx2dszctx, chroma, is_1d, eob_bin, ts->msac.rng);
int eob;
if (eob_bin > 1) {
- eob = 1 << (eob_bin - 1);
uint16_t *const eob_hi_bit_cdf =
ts->cdf.coef.eob_hi_bit[t_dim->ctx][chroma][eob_bin];
- const int eob_hi_bit = dav1d_msac_decode_bool_adapt(&ts->msac,
- eob_hi_bit_cdf);
+ const int eob_hi_bit = dav1d_msac_decode_bool_adapt(&ts->msac, eob_hi_bit_cdf);
if (dbg)
- printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
- t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
- unsigned mask = eob >> 1;
- if (eob_hi_bit) eob |= mask;
- for (mask >>= 1; mask; mask >>= 1) {
- const int eob_bit = dav1d_msac_decode_bool_equi(&ts->msac);
- if (eob_bit) eob |= mask;
- }
+ printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
+ t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
+ eob = ((eob_hi_bit | 2) << (eob_bin - 2)) |
+ dav1d_msac_decode_bools(&ts->msac, eob_bin - 2);
if (dbg)
- printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
+ printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
} else {
eob = eob_bin;
}
@@ -168,98 +162,180 @@ static int decode_coefs(Dav1dTileContext *const t,
uint16_t (*const br_cdf)[5] =
ts->cdf.coef.br_tok[imin(t_dim->ctx, 3)][chroma];
const int16_t *const scan = dav1d_scans[tx][tx_class];
- uint8_t levels[36 * 36];
- ptrdiff_t stride = 4 * (imin(t_dim->h, 8) + 1);
- memset(levels, 0, stride * 4 * (imin(t_dim->w, 8) + 1));
- const int shift = 2 + imin(t_dim->lh, 3), mask = 4 * imin(t_dim->h, 8) - 1;
- unsigned cul_level = 0;
- for (int i = eob, is_last = 1; i >= 0; i--, is_last = 0) {
- const int rc = scan[i], x = rc >> shift, y = rc & mask;
+ int dc_tok;
- // lo tok
- const int ctx = get_coef_nz_ctx(levels, i, rc, is_last, tx, tx_class);
- uint16_t *const lo_cdf = is_last ?
- ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma][ctx] :
- ts->cdf.coef.base_tok[t_dim->ctx][chroma][ctx];
- int tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf,
- 4 - is_last) + is_last;
+ if (eob) {
+ ALIGN_STK_16(uint8_t, levels, 36 * 36,);
+ const int sw = imin(t_dim->w, 8), sh = imin(t_dim->h, 8);
+ const ptrdiff_t stride = 4 * (sh + 1);
+ memset(levels, 0, stride * 4 * (sw + 1));
+ const int shift = 2 + imin(t_dim->lh, 3), mask = 4 * sh - 1;
+
+ { // eob
+ const int rc = scan[eob], x = rc >> shift, y = rc & mask;
+
+ const int ctx = 1 + (eob > sw * sh * 2) + (eob > sw * sh * 4);
+ uint16_t *const lo_cdf = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma][ctx];
+ int tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 3) + 1;
+ if (dbg)
+ printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n",
+ t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng);
+
+ if (tok == 3) {
+ const int br_ctx = get_br_ctx(levels, 1, tx_class, x, y, stride);
+ do {
+ const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ if (dbg)
+ printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",
+ imin(t_dim->ctx, 3), chroma, br_ctx,
+ eob, rc, tok_br, tok, ts->msac.rng);
+ tok += tok_br;
+ if (tok_br < 3) break;
+ } while (tok < 15);
+ }
+
+ cf[rc] = tok;
+ levels[x * stride + y] = (uint8_t) tok;
+ }
+ for (int i = eob - 1; i > 0; i--) { // ac
+ const int rc = scan[i], x = rc >> shift, y = rc & mask;
+
+ // lo tok
+ const int ctx = get_coef_nz_ctx(levels, tx, tx_class, x, y, stride);
+ uint16_t *const lo_cdf = ts->cdf.coef.base_tok[t_dim->ctx][chroma][ctx];
+ int tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 4);
+ if (dbg)
+ printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n",
+ t_dim->ctx, chroma, ctx, i, rc, tok, ts->msac.rng);
+
+ // hi tok
+ if (tok == 3) {
+ const int br_ctx = get_br_ctx(levels, 1, tx_class, x, y, stride);
+ do {
+ const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ if (dbg)
+ printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",
+ imin(t_dim->ctx, 3), chroma, br_ctx,
+ i, rc, tok_br, tok, ts->msac.rng);
+ tok += tok_br;
+ if (tok_br < 3) break;
+ } while (tok < 15);
+ }
+
+ cf[rc] = tok;
+ levels[x * stride + y] = (uint8_t) tok;
+ }
+ { // dc
+ int ctx = 0;
+ if (tx_class != TX_CLASS_2D)
+ ctx = get_coef_nz_ctx(levels, tx, tx_class, 0, 0, stride);
+ uint16_t *const lo_cdf = ts->cdf.coef.base_tok[t_dim->ctx][chroma][ctx];
+ dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 4);
+ if (dbg)
+ printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
+ t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng);
+
+ if (dc_tok == 3) {
+ const int br_ctx = get_br_ctx(levels, 0, tx_class, 0, 0, stride);
+ do {
+ const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
+ br_cdf[br_ctx], 4);
+ if (dbg)
+ printf("Post-dc_hi_tok[%d][%d][%d][%d->%d]: r=%d\n",
+ imin(t_dim->ctx, 3), chroma, br_ctx,
+ tok_br, dc_tok, ts->msac.rng);
+ dc_tok += tok_br;
+ if (tok_br < 3) break;
+ } while (dc_tok < 15);
+ }
+ }
+ } else { // dc-only
+ uint16_t *const lo_cdf = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma][0];
+ dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf, 3) + 1;
if (dbg)
- printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n",
- t_dim->ctx, chroma, ctx, i, rc, tok, ts->msac.rng);
- if (!tok) continue;
+ printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
+ t_dim->ctx, chroma, 0, dc_tok, ts->msac.rng);
- // hi tok
- if (tok == 3) {
- const int br_ctx = get_br_ctx(levels, rc, tx, tx_class);
+ if (dc_tok == 3) {
do {
const int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac,
- br_cdf[br_ctx], 4);
+ br_cdf[0], 4);
if (dbg)
- printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",
- imin(t_dim->ctx, 3), chroma, br_ctx,
- i, rc, tok_br, tok, ts->msac.rng);
- tok += tok_br;
+ printf("Post-dc_hi_tok[%d][%d][%d][%d->%d]: r=%d\n",
+ imin(t_dim->ctx, 3), chroma, 0,
+ tok_br, dc_tok, ts->msac.rng);
+ dc_tok += tok_br;
if (tok_br < 3) break;
- } while (tok < 15);
+ } while (dc_tok < 15);
}
-
- cf[rc] = tok;
- levels[x * stride + y] = (uint8_t) cf[rc];
}
// residual and sign
- int dc_sign = 1;
+ int dc_sign = 1 << 6;
const int lossless = f->frame_hdr->segmentation.lossless[b->seg_id];
const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
const uint8_t *const qm_tbl = f->qm[lossless || is_1d || *txtp == IDTX][tx][plane];
const int dq_shift = imax(0, t_dim->ctx - 2);
const int bitdepth = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
- const int cf_min = -(1 << (7 + bitdepth));
const int cf_max = (1 << (7 + bitdepth)) - 1;
- for (int i = 0; i <= eob; i++) {
+ unsigned cul_level = 0;
+
+ if (dc_tok) { // dc
+ const int dc_sign_ctx = get_dc_sign_ctx(t_dim, a, l);
+ uint16_t *const dc_sign_cdf =
+ ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
+ const int sign = dav1d_msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
+ const unsigned dq = (dq_tbl[0] * qm_tbl[0] + 16) >> 5;
+ if (dbg)
+ printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
+ chroma, dc_sign_ctx, sign, ts->msac.rng);
+ dc_sign = (sign - 1) & (2 << 6);
+
+ if (dc_tok == 15) {
+ dc_tok += read_golomb(&ts->msac);
+ if (dbg)
+ printf("Post-dc_residual[%d->%d]: r=%d\n",
+ dc_tok - 15, dc_tok, ts->msac.rng);
+
+ dc_tok &= 0xfffff;
+ }
+
+ cul_level += dc_tok;
+ dc_tok = ((dq * dc_tok) & 0xffffff) >> dq_shift;
+ cf[0] = imin(dc_tok - sign, cf_max) ^ -sign;
+ }
+ for (int i = 1; i <= eob; i++) { // ac
const int rc = scan[i];
int tok = cf[rc];
if (!tok) continue;
- int dq;
// sign
- int sign;
- if (i == 0) {
- const int dc_sign_ctx = get_dc_sign_ctx(t_dim, a, l);
- uint16_t *const dc_sign_cdf =
- ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
- sign = dav1d_msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
- if (dbg)
- printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
- chroma, dc_sign_ctx, sign, ts->msac.rng);
- dc_sign = sign ? 0 : 2;
- dq = (dq_tbl[0] * qm_tbl[0] + 16) >> 5;
- } else {
- sign = dav1d_msac_decode_bool_equi(&ts->msac);
- if (dbg)
+ const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
+ const unsigned dq = (dq_tbl[1] * qm_tbl[rc] + 16) >> 5;
+ if (dbg)
printf("Post-sign[%d=%d=%d]: r=%d\n", i, rc, sign, ts->msac.rng);
- dq = (dq_tbl[1] * qm_tbl[rc] + 16) >> 5;
- }
// residual
if (tok == 15) {
tok += read_golomb(&ts->msac);
if (dbg)
- printf("Post-residual[%d=%d=%d->%d]: r=%d\n",
- i, rc, tok - 15, tok, ts->msac.rng);
+ printf("Post-residual[%d=%d=%d->%d]: r=%d\n",
+ i, rc, tok - 15, tok, ts->msac.rng);
+
+ // coefficient parsing, see 5.11.39
+ tok &= 0xfffff;
}
- // coefficient parsing, see 5.11.39
- tok &= 0xfffff;
-
// dequant, see 7.12.3
cul_level += tok;
- tok = (((int64_t)dq * tok) & 0xffffff) >> dq_shift;
- cf[rc] = iclip(sign ? -tok : tok, cf_min, cf_max);
+ tok = ((dq * tok) & 0xffffff) >> dq_shift;
+ cf[rc] = imin(tok - sign, cf_max) ^ -sign;
}
// context
- *res_ctx = imin(cul_level, 63) | (dc_sign << 6);
+ *res_ctx = imin(cul_level, 63) | dc_sign;
return eob;
}
@@ -644,7 +720,7 @@ static int obmc(Dav1dTileContext *const t,
if (a_r->ref[0] > 0) {
const int ow4 = iclip(a_b_dim[0], 2, b_dim[0]);
const int oh4 = imin(b_dim[1], 16) >> 1;
- res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, oh4,
+ res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, (oh4 * 3 + 3) >> 2,
t->bx + x, t->by, pl, a_r->mv[0],
&f->refp[a_r->ref[0] - 1], a_r->ref[0] - 1,
dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);

View File

@ -0,0 +1,32 @@
$OpenBSD: patch-tools_meson_build,v 1.1 2019/08/17 08:28:48 ajacoutot Exp $
Index: tools/meson.build
--- tools/meson.build.orig
+++ tools/meson.build
@@ -85,26 +85,3 @@ dav1dplay_sources = files(
'input/ivf.c',
)
-sdl2_dependency = dependency('sdl2', version: '>= 2.0.1', required: false)
-
-if sdl2_dependency.found()
- placebo_dependency = dependency('libplacebo', version: '>= 1.18.0', required: false)
- vulkan_dependency = dependency('vulkan', required: false)
- sdl_has_vulkan = cc.has_header('SDL_vulkan.h', dependencies: [sdl2_dependency])
- cflag_placebo = []
- deps_placebo = []
- if placebo_dependency.found() and vulkan_dependency.found() and sdl_has_vulkan
- cflag_placebo += '-DHAVE_PLACEBO_VULKAN=1'
- deps_placebo = [vulkan_dependency, placebo_dependency]
- endif
- dav1dplay = executable('dav1dplay',
- dav1dplay_sources,
- rev_target,
-
- link_with : libdav1d,
- include_directories : [dav1d_inc_dirs],
- dependencies : [getopt_dependency, sdl2_dependency, deps_placebo],
- install : true,
- c_args : cflag_placebo,
- )
-endif