Spaces:
Runtime error
Runtime error
/* | |
* Copyright (c) 2016 Martin Storsjo | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License along | |
* with FFmpeg; if not, write to the Free Software Foundation, Inc., | |
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
*/ | |
static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; | |
static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f }; | |
dct4x4_impl(16, int16_t) | |
dct4x4_impl(32, int32_t) | |
dct8x8_impl(16, int16_t) | |
dct8x8_impl(32, int32_t) | |
static void dct4x4(int16_t *coef, int bit_depth) | |
{ | |
if (bit_depth == 8) | |
dct4x4_16(coef); | |
else | |
dct4x4_32((int32_t *) coef); | |
} | |
static void dct8x8(int16_t *coef, int bit_depth) | |
{ | |
if (bit_depth == 8) { | |
dct8x8_16(coef); | |
} else { | |
dct8x8_32((int32_t *) coef); | |
} | |
} | |
static void check_idct(void) | |
{ | |
LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst1_base, [8 * 8 * 2 + 32]); | |
LOCAL_ALIGNED_16(int16_t, coef, [8 * 8 * 2]); | |
LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]); | |
LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]); | |
H264DSPContext h; | |
int bit_depth, sz, align, dc; | |
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride); | |
for (bit_depth = 8; bit_depth <= 10; bit_depth++) { | |
ff_h264dsp_init(&h, bit_depth, 1); | |
for (sz = 4; sz <= 8; sz += 4) { | |
randomize_buffers(); | |
if (sz == 4) | |
dct4x4(coef, bit_depth); | |
else | |
dct8x8(coef, bit_depth); | |
for (dc = 0; dc <= 1; dc++) { | |
void (*idct)(uint8_t *, int16_t *, int) = NULL; | |
switch ((sz << 1) | dc) { | |
case (4 << 1) | 0: idct = h.h264_idct_add; break; | |
case (4 << 1) | 1: idct = h.h264_idct_dc_add; break; | |
case (8 << 1) | 0: idct = h.h264_idct8_add; break; | |
case (8 << 1) | 1: idct = h.h264_idct8_dc_add; break; | |
} | |
if (check_func(idct, "h264_idct%d_add%s_%dbpp", sz, dc ? "_dc" : "", bit_depth)) { | |
for (align = 0; align < 16; align += sz * SIZEOF_PIXEL) { | |
uint8_t *dst1 = dst1_base + align; | |
if (dc) { | |
memset(subcoef0, 0, sz * sz * SIZEOF_COEF); | |
memcpy(subcoef0, coef, SIZEOF_COEF); | |
} else { | |
memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF); | |
} | |
memcpy(dst0, dst, sz * PIXEL_STRIDE); | |
memcpy(dst1, dst, sz * PIXEL_STRIDE); | |
memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF); | |
call_ref(dst0, subcoef0, PIXEL_STRIDE); | |
call_new(dst1, subcoef1, PIXEL_STRIDE); | |
if (memcmp(dst0, dst1, sz * PIXEL_STRIDE) || | |
memcmp(subcoef0, subcoef1, sz * sz * SIZEOF_COEF)) | |
fail(); | |
bench_new(dst1, subcoef1, sz * SIZEOF_PIXEL); | |
} | |
} | |
} | |
} | |
} | |
} | |
static void check_idct_multiple(void) | |
{ | |
LOCAL_ALIGNED_16(uint8_t, dst_full, [16 * 16 * 2]); | |
LOCAL_ALIGNED_16(int16_t, coef_full, [16 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst0, [16 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16 * 2]); | |
LOCAL_ALIGNED_16(int16_t, coef0, [16 * 16 * 2]); | |
LOCAL_ALIGNED_16(int16_t, coef1, [16 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, nnzc, [15 * 8]); | |
H264DSPContext h; | |
int bit_depth, i, y, func; | |
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15*8]); | |
for (bit_depth = 8; bit_depth <= 10; bit_depth++) { | |
ff_h264dsp_init(&h, bit_depth, 1); | |
for (func = 0; func < 3; func++) { | |
void (*idct)(uint8_t *, const int *, int16_t *, int, const uint8_t[]) = NULL; | |
const char *name; | |
int sz = 4, intra = 0; | |
int block_offset[16] = { 0 }; | |
switch (func) { | |
case 0: | |
idct = h.h264_idct_add16; | |
name = "h264_idct_add16"; | |
break; | |
case 1: | |
idct = h.h264_idct_add16intra; | |
name = "h264_idct_add16intra"; | |
intra = 1; | |
break; | |
case 2: | |
idct = h.h264_idct8_add4; | |
name = "h264_idct8_add4"; | |
sz = 8; | |
break; | |
} | |
memset(nnzc, 0, 15 * 8); | |
memset(coef_full, 0, 16 * 16 * SIZEOF_COEF); | |
for (i = 0; i < 16 * 16; i += sz * sz) { | |
uint8_t src[8 * 8 * 2]; | |
uint8_t dst[8 * 8 * 2]; | |
int16_t coef[8 * 8 * 2]; | |
int index = i / sz; | |
int block_y = (index / 16) * sz; | |
int block_x = index % 16; | |
int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL; | |
int nnz = rnd() % 3; | |
randomize_buffers(); | |
if (sz == 4) | |
dct4x4(coef, bit_depth); | |
else | |
dct8x8(coef, bit_depth); | |
for (y = 0; y < sz; y++) | |
memcpy(&dst_full[offset + y * 16 * SIZEOF_PIXEL], | |
&dst[PIXEL_STRIDE * y], sz * SIZEOF_PIXEL); | |
if (nnz > 1) | |
nnz = sz * sz; | |
memcpy(&coef_full[i * SIZEOF_COEF/sizeof(coef[0])], | |
coef, nnz * SIZEOF_COEF); | |
if (intra && nnz == 1) | |
nnz = 0; | |
nnzc[scan8[i / 16]] = nnz; | |
block_offset[i / 16] = offset; | |
} | |
if (check_func(idct, "%s_%dbpp", name, bit_depth)) { | |
memcpy(coef0, coef_full, 16 * 16 * SIZEOF_COEF); | |
memcpy(coef1, coef_full, 16 * 16 * SIZEOF_COEF); | |
memcpy(dst0, dst_full, 16 * 16 * SIZEOF_PIXEL); | |
memcpy(dst1, dst_full, 16 * 16 * SIZEOF_PIXEL); | |
call_ref(dst0, block_offset, coef0, 16 * SIZEOF_PIXEL, nnzc); | |
call_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); | |
if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL) || | |
memcmp(coef0, coef1, 16 * 16 * SIZEOF_COEF)) | |
fail(); | |
bench_new(dst1, block_offset, coef1, 16 * SIZEOF_PIXEL, nnzc); | |
} | |
} | |
} | |
} | |
static void check_loop_filter(void) | |
{ | |
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]); | |
H264DSPContext h; | |
int bit_depth; | |
int alphas[36], betas[36]; | |
int8_t tc0[36][4]; | |
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride, | |
int alpha, int beta, int8_t *tc0); | |
for (bit_depth = 8; bit_depth <= 10; bit_depth++) { | |
int i, j, a, c; | |
uint32_t mask = pixel_mask_lf[bit_depth - 8]; | |
ff_h264dsp_init(&h, bit_depth, 1); | |
for (i = 35, a = 255, c = 250; i >= 0; i--) { | |
alphas[i] = a << (bit_depth - 8); | |
betas[i] = (i + 1) / 2 << (bit_depth - 8); | |
tc0[i][0] = tc0[i][3] = (c + 6) / 10; | |
tc0[i][1] = (c + 7) / 15; | |
tc0[i][2] = (c + 9) / 20; | |
a = a*9/10; | |
c = c*9/10; | |
} | |
CHECK_LOOP_FILTER(h264_v_loop_filter_luma, 1,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_luma, 0,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff, 0,); | |
CHECK_LOOP_FILTER(h264_v_loop_filter_chroma, 1,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0,); | |
ff_h264dsp_init(&h, bit_depth, 2); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma, 0, 422); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff, 0, 422); | |
} | |
} | |
static void check_loop_filter_intra(void) | |
{ | |
LOCAL_ALIGNED_16(uint8_t, dst, [32 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 16 * 2]); | |
LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 16 * 2]); | |
H264DSPContext h; | |
int bit_depth; | |
int alphas[36], betas[36]; | |
declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride, | |
int alpha, int beta); | |
for (bit_depth = 8; bit_depth <= 10; bit_depth++) { | |
int i, j, a; | |
uint32_t mask = pixel_mask_lf[bit_depth - 8]; | |
ff_h264dsp_init(&h, bit_depth, 1); | |
for (i = 35, a = 255; i >= 0; i--) { | |
alphas[i] = a << (bit_depth - 8); | |
betas[i] = (i + 1) / 2 << (bit_depth - 8); | |
a = a*9/10; | |
} | |
CHECK_LOOP_FILTER(h264_v_loop_filter_luma_intra, 1,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_intra, 0,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_luma_mbaff_intra, 0,); | |
CHECK_LOOP_FILTER(h264_v_loop_filter_chroma_intra, 1,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0,); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0,); | |
ff_h264dsp_init(&h, bit_depth, 2); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_intra, 0, 422); | |
CHECK_LOOP_FILTER(h264_h_loop_filter_chroma_mbaff_intra, 0, 422); | |
} | |
} | |
void checkasm_check_h264dsp(void) | |
{ | |
check_idct(); | |
check_idct_multiple(); | |
report("idct"); | |
check_loop_filter(); | |
report("loop_filter"); | |
check_loop_filter_intra(); | |
report("loop_filter_intra"); | |
} | |