Spaces:
Runtime error
Runtime error
/* | |
* FFT/IFFT transforms | |
* AltiVec-enabled | |
* Copyright (c) 2009 Loren Merritt | |
* | |
* This file is part of FFmpeg. | |
* | |
* FFmpeg is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* FFmpeg is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
/** | |
* Do a complex FFT with the parameters defined in ff_fft_init(). | |
* The input data must be permuted before with s->revtab table. | |
* No 1.0 / sqrt(n) normalization is done. | |
* AltiVec-enabled: | |
* This code assumes that the 'z' pointer is 16 bytes-aligned. | |
* It also assumes all FFTComplex are 8 bytes-aligned pairs of floats. | |
*/ | |
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); | |
void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z); | |
static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input) | |
{ | |
int j, k; | |
int n = 1 << s->mdct_bits; | |
int n4 = n >> 2; | |
int n8 = n >> 3; | |
int n32 = n >> 5; | |
const uint16_t *revtabj = s->revtab; | |
const uint16_t *revtabk = s->revtab+n4; | |
const vec_f *tcos = (const vec_f*)(s->tcos+n8); | |
const vec_f *tsin = (const vec_f*)(s->tsin+n8); | |
const vec_f *pin = (const vec_f*)(input+n4); | |
vec_f *pout = (vec_f*)(output+n4); | |
/* pre rotation */ | |
k = n32-1; | |
do { | |
vec_f cos,sin,cos0,sin0,cos1,sin1,re,im,r0,i0,r1,i1,a,b,c,d; | |
cos0 = tcos[k]; | |
sin0 = tsin[k]; | |
cos1 = tcos[-k-1]; | |
sin1 = tsin[-k-1]; | |
CMULA(0, 0,1,2,3); | |
CMULA(1, 2,3,0,1); | |
STORE8(0); | |
STORE8(1); | |
revtabj += 4; | |
revtabk -= 4; | |
k--; | |
} while(k >= 0); | |
ff_fft_calc_vsx(s, (FFTComplex*)output); | |
ff_fft_calc_altivec(s, (FFTComplex*)output); | |
/* post rotation + reordering */ | |
j = -n32; | |
k = n32-1; | |
do { | |
vec_f cos,sin,re,im,a,b,c,d; | |
CMULB(a,b,j); | |
CMULB(c,d,k); | |
pout[2*j] = vec_perm(a, d, vcprm(0,s3,1,s2)); | |
pout[2*j+1] = vec_perm(a, d, vcprm(2,s1,3,s0)); | |
pout[2*k] = vec_perm(c, b, vcprm(0,s3,1,s2)); | |
pout[2*k+1] = vec_perm(c, b, vcprm(2,s1,3,s0)); | |
j++; | |
k--; | |
} while(k >= 0); | |
} | |
static void imdct_calc_altivec(FFTContext *s, FFTSample *output, const FFTSample *input) | |
{ | |
int k; | |
int n = 1 << s->mdct_bits; | |
int n4 = n >> 2; | |
int n16 = n >> 4; | |
vec_u32 sign = {1U<<31,1U<<31,1U<<31,1U<<31}; | |
vec_u32 *p0 = (vec_u32*)(output+n4); | |
vec_u32 *p1 = (vec_u32*)(output+n4*3); | |
imdct_half_altivec(s, output + n4, input); | |
for (k = 0; k < n16; k++) { | |
vec_u32 a = p0[k] ^ sign; | |
vec_u32 b = p1[-k-1]; | |
p0[-k-1] = vec_perm(a, a, vcprm(3,2,1,0)); | |
p1[k] = vec_perm(b, b, vcprm(3,2,1,0)); | |
} | |
} | |
av_cold void ff_fft_init_ppc(FFTContext *s) | |
{ | |
if (!PPC_ALTIVEC(av_get_cpu_flags())) | |
return; | |
s->fft_calc = ff_fft_calc_interleave_vsx; | |
s->fft_calc = ff_fft_calc_interleave_altivec; | |
if (s->mdct_bits >= 5) { | |
s->imdct_calc = imdct_calc_altivec; | |
s->imdct_half = imdct_half_altivec; | |
} | |
} | |