Spaces:
Runtime error
Runtime error
/* | |
* Copyright (c) 2001 Michel Lespinasse | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
/* NOTE: This code is based on GPL code from the libmpeg2 project. The | |
* author, Michel Lespinasses, has given explicit permission to release | |
* under LGPL as part of FFmpeg. | |
* | |
* FFmpeg integration by Dieter Shirley | |
* | |
* This file is a direct copy of the AltiVec IDCT module from the libmpeg2 | |
* project. I've deleted all of the libmpeg2-specific code, renamed the | |
* functions and reordered the function parameters. The only change to the | |
* IDCT function itself was to factor out the partial transposition, and to | |
* perform a full transpose at the end of the function. */ | |
static const vec_s16 constants[5] = { | |
{ 23170, 13573, 6518, 21895, -23170, -21895, 32, 31 }, | |
{ 16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725 }, | |
{ 22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521 }, | |
{ 21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692 }, | |
{ 19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722 } | |
}; | |
static void idct_altivec(int16_t *blk) | |
{ | |
vec_s16 *block = (vec_s16 *) blk; | |
IDCT; | |
block[0] = vx0; | |
block[1] = vx1; | |
block[2] = vx2; | |
block[3] = vx3; | |
block[4] = vx4; | |
block[5] = vx5; | |
block[6] = vx6; | |
block[7] = vx7; | |
} | |
static void idct_put_altivec(uint8_t *dest, ptrdiff_t stride, int16_t *blk) | |
{ | |
vec_s16 *block = (vec_s16 *) blk; | |
vec_u8 tmp; | |
IDCT; | |
COPY(dest, vx0); | |
dest += stride; | |
COPY(dest, vx1); | |
dest += stride; | |
COPY(dest, vx2); | |
dest += stride; | |
COPY(dest, vx3); | |
dest += stride; | |
COPY(dest, vx4); | |
dest += stride; | |
COPY(dest, vx5); | |
dest += stride; | |
COPY(dest, vx6); | |
dest += stride; | |
COPY(dest, vx7); | |
} | |
static void idct_add_altivec(uint8_t *dest, ptrdiff_t stride, int16_t *blk) | |
{ | |
vec_s16 *block = (vec_s16 *) blk; | |
vec_u8 tmp; | |
vec_s16 tmp2, tmp3; | |
vec_u8 perm0; | |
vec_u8 perm1; | |
vec_u8 p0, p1, p; | |
IDCT; | |
p0 = vec_lvsl(0, dest); | |
p1 = vec_lvsl(stride, dest); | |
p = vec_splat_u8(-1); | |
perm0 = vec_mergeh(p, p0); | |
perm1 = vec_mergeh(p, p1); | |
ADD(dest, vx0, perm0); | |
dest += stride; | |
ADD(dest, vx1, perm1); | |
dest += stride; | |
ADD(dest, vx2, perm0); | |
dest += stride; | |
ADD(dest, vx3, perm1); | |
dest += stride; | |
ADD(dest, vx4, perm0); | |
dest += stride; | |
ADD(dest, vx5, perm1); | |
dest += stride; | |
ADD(dest, vx6, perm0); | |
dest += stride; | |
ADD(dest, vx7, perm1); | |
} | |
av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx, | |
unsigned high_bit_depth) | |
{ | |
if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
return; | |
if (!high_bit_depth && avctx->lowres == 0) { | |
if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) || | |
(avctx->idct_algo == FF_IDCT_ALTIVEC)) { | |
c->idct = idct_altivec; | |
c->idct_add = idct_add_altivec; | |
c->idct_put = idct_put_altivec; | |
c->perm_type = FF_IDCT_PERM_TRANSPOSE; | |
} | |
} | |
} | |