Spaces:
Runtime error
Runtime error
/* | |
* Copyright (C) 2009 David Conrad | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
static const vec_s16 constants = | |
{0, 64277, 60547, 54491, 46341, 36410, 25080, 12785}; | |
static const vec_u8 interleave_high = | |
{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}; | |
static const vec_u8 interleave_high = | |
{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}; | |
// these functions do (a*C)>>16 | |
// things are tricky because a is signed, but C unsigned. | |
// M15 is used if C fits in 15 bit unsigned (C6,C7) | |
// M16 is used if C requires 16 bits unsigned | |
static inline vec_s16 M15(vec_s16 a, vec_s16 C) | |
{ | |
return (vec_s16)vec_perm(vec_mule(a,C), vec_mulo(a,C), interleave_high); | |
} | |
static inline vec_s16 M16(vec_s16 a, vec_s16 C) | |
{ | |
return vec_add(a, M15(a, C)); | |
} | |
static void vp3_idct_put_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64]) | |
{ | |
vec_u8 t; | |
IDCT_START | |
// pixels are signed; so add 128*16 in addition to the normal 8 | |
vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11)); | |
eight = vec_add(eight, v2048); | |
IDCT_1D(NOP, NOP) | |
TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); | |
IDCT_1D(ADD8, SHIFT4) | |
PUT(b0) dst += stride; | |
PUT(b1) dst += stride; | |
PUT(b2) dst += stride; | |
PUT(b3) dst += stride; | |
PUT(b4) dst += stride; | |
PUT(b5) dst += stride; | |
PUT(b6) dst += stride; | |
PUT(b7) | |
memset(block, 0, sizeof(*block) * 64); | |
} | |
static void vp3_idct_add_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64]) | |
{ | |
LOAD_ZERO; | |
vec_u8 t, vdst; | |
vec_s16 vdst_16; | |
vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst)); | |
IDCT_START | |
IDCT_1D(NOP, NOP) | |
TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7); | |
IDCT_1D(ADD8, SHIFT4) | |
ADD(b0) dst += stride; | |
ADD(b1) dst += stride; | |
ADD(b2) dst += stride; | |
ADD(b3) dst += stride; | |
ADD(b4) dst += stride; | |
ADD(b5) dst += stride; | |
ADD(b6) dst += stride; | |
ADD(b7) | |
memset(block, 0, sizeof(*block) * 64); | |
} | |
av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags) | |
{ | |
if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
return; | |
c->idct_put = vp3_idct_put_altivec; | |
c->idct_add = vp3_idct_add_altivec; | |
} | |