Spaces:
Runtime error
Runtime error
;****************************************************************************** | |
;* Copyright (c) 2012 Michael Niedermayer | |
;* Copyright (c) 2014 James Almer <jamrial <at> gmail.com> | |
;* Copyright (c) 2014 Ronald S. Bultje <[email protected]> | |
;* | |
;* This file is part of FFmpeg. | |
;* | |
;* FFmpeg is free software; you can redistribute it and/or | |
;* modify it under the terms of the GNU Lesser General Public | |
;* License as published by the Free Software Foundation; either | |
;* version 2.1 of the License, or (at your option) any later version. | |
;* | |
;* FFmpeg is distributed in the hope that it will be useful, | |
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
;* Lesser General Public License for more details. | |
;* | |
;* You should have received a copy of the GNU Lesser General Public | |
;* License along with FFmpeg; if not, write to the Free Software | |
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
;****************************************************************************** | |
%include "libavutil/x86/x86util.asm" | |
%if ARCH_X86_64 | |
%define pointer resq | |
%else | |
%define pointer resd | |
%endif | |
struc ResampleContext | |
.av_class: pointer 1 | |
.filter_bank: pointer 1 | |
.filter_length: resd 1 | |
.filter_alloc: resd 1 | |
.ideal_dst_incr: resd 1 | |
.dst_incr: resd 1 | |
.dst_incr_div: resd 1 | |
.dst_incr_mod: resd 1 | |
.index: resd 1 | |
.frac: resd 1 | |
.src_incr: resd 1 | |
.compensation_distance: resd 1 | |
.phase_count: resd 1 | |
; there's a few more here but we only care about the first few | |
endstruc | |
SECTION_RODATA | |
pf_1: dd 1.0 | |
pdbl_1: dq 1.0 | |
pd_0x4000: dd 0x4000 | |
SECTION .text | |
; FIXME remove unneeded variables (index_incr, phase_mask) | |
%macro RESAMPLE_FNS 3-5 ; format [float or int16], bps, log2_bps, float op suffix [s or d], 1.0 constant | |
; int resample_common_$format(ResampleContext *ctx, $format *dst, | |
; const $format *src, int size, int update_ctx) | |
%if ARCH_X86_64 ; unix64 and win64 | |
cglobal resample_common_%1, 0, 15, 2, ctx, dst, src, phase_count, index, frac, \ | |
dst_incr_mod, size, min_filter_count_x4, \ | |
min_filter_len_x4, dst_incr_div, src_incr, \ | |
phase_mask, dst_end, filter_bank | |
; use red-zone for variable storage | |
%define ctx_stackq [rsp-0x8] | |
%define src_stackq [rsp-0x10] | |
%if WIN64 | |
%define update_context_stackd r4m | |
%else ; unix64 | |
%define update_context_stackd [rsp-0x14] | |
%endif | |
; load as many variables in registers as possible; for the rest, store | |
; on stack so that we have 'ctx' available as one extra register | |
mov sized, r3d | |
%if UNIX64 | |
mov update_context_stackd, r4d | |
%endif | |
mov indexd, [ctxq+ResampleContext.index] | |
mov fracd, [ctxq+ResampleContext.frac] | |
mov dst_incr_modd, [ctxq+ResampleContext.dst_incr_mod] | |
mov filter_bankq, [ctxq+ResampleContext.filter_bank] | |
mov src_incrd, [ctxq+ResampleContext.src_incr] | |
mov ctx_stackq, ctxq | |
mov min_filter_len_x4d, [ctxq+ResampleContext.filter_length] | |
mov dst_incr_divd, [ctxq+ResampleContext.dst_incr_div] | |
shl min_filter_len_x4d, %3 | |
lea dst_endq, [dstq+sizeq*%2] | |
%if UNIX64 | |
mov ecx, [ctxq+ResampleContext.phase_count] | |
mov edi, [ctxq+ResampleContext.filter_alloc] | |
DEFINE_ARGS filter_alloc, dst, src, phase_count, index, frac, dst_incr_mod, \ | |
filter, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ | |
src_incr, phase_mask, dst_end, filter_bank | |
%elif WIN64 | |
mov R9d, [ctxq+ResampleContext.filter_alloc] | |
mov ecx, [ctxq+ResampleContext.phase_count] | |
DEFINE_ARGS phase_count, dst, src, filter_alloc, index, frac, dst_incr_mod, \ | |
filter, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ | |
src_incr, phase_mask, dst_end, filter_bank | |
%endif | |
neg min_filter_len_x4q | |
sub filter_bankq, min_filter_len_x4q | |
sub srcq, min_filter_len_x4q | |
mov src_stackq, srcq | |
%else ; x86-32 | |
cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \ | |
index, min_filter_length_x4, filter_bank | |
; push temp variables to stack | |
%define ctx_stackq r0mp | |
%define src_stackq r2mp | |
%define update_context_stackd r4m | |
mov dstq, r1mp | |
mov r3, r3mp | |
lea r3, [dstq+r3*%2] | |
PUSH dword [ctxq+ResampleContext.dst_incr_div] | |
PUSH dword [ctxq+ResampleContext.dst_incr_mod] | |
PUSH dword [ctxq+ResampleContext.filter_alloc] | |
PUSH r3 | |
PUSH dword [ctxq+ResampleContext.phase_count] ; unneeded replacement for phase_mask | |
PUSH dword [ctxq+ResampleContext.src_incr] | |
mov min_filter_length_x4d, [ctxq+ResampleContext.filter_length] | |
mov indexd, [ctxq+ResampleContext.index] | |
shl min_filter_length_x4d, %3 | |
mov fracd, [ctxq+ResampleContext.frac] | |
neg min_filter_length_x4q | |
mov filter_bankq, [ctxq+ResampleContext.filter_bank] | |
sub r2mp, min_filter_length_x4q | |
sub filter_bankq, min_filter_length_x4q | |
PUSH min_filter_length_x4q | |
PUSH filter_bankq | |
mov phase_countd, [ctxq+ResampleContext.phase_count] | |
DEFINE_ARGS src, phase_count, dst, frac, index, min_filter_count_x4, filter | |
%define filter_bankq dword [rsp+0x0] | |
%define min_filter_length_x4q dword [rsp+0x4] | |
%define src_incrd dword [rsp+0x8] | |
%define phase_maskd dword [rsp+0xc] | |
%define dst_endq dword [rsp+0x10] | |
%define filter_allocd dword [rsp+0x14] | |
%define dst_incr_modd dword [rsp+0x18] | |
%define dst_incr_divd dword [rsp+0x1c] | |
mov srcq, r2mp | |
%endif | |
.loop: | |
mov filterd, filter_allocd | |
imul filterd, indexd | |
%if ARCH_X86_64 | |
mov min_filter_count_x4q, min_filter_len_x4q | |
lea filterq, [filter_bankq+filterq*%2] | |
%else ; x86-32 | |
mov min_filter_count_x4q, filter_bankq | |
lea filterq, [min_filter_count_x4q+filterq*%2] | |
mov min_filter_count_x4q, min_filter_length_x4q | |
%endif | |
%ifidn %1, int16 | |
movd m0, [pd_0x4000] | |
%else ; float/double | |
xorps m0, m0, m0 | |
%endif | |
align 16 | |
.inner_loop: | |
movu m1, [srcq+min_filter_count_x4q*1] | |
%ifidn %1, int16 | |
%if cpuflag(xop) | |
vpmadcswd m0, m1, [filterq+min_filter_count_x4q*1], m0 | |
%else | |
pmaddwd m1, [filterq+min_filter_count_x4q*1] | |
paddd m0, m1 | |
%endif | |
%else ; float/double | |
%if cpuflag(fma4) || cpuflag(fma3) | |
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0 | |
%else | |
mulp%4 m1, m1, [filterq+min_filter_count_x4q*1] | |
addp%4 m0, m0, m1 | |
%endif ; cpuflag | |
%endif | |
add min_filter_count_x4q, mmsize | |
js .inner_loop | |
%ifidn %1, int16 | |
HADDD m0, m1 | |
psrad m0, 15 | |
add fracd, dst_incr_modd | |
packssdw m0, m0 | |
add indexd, dst_incr_divd | |
movd [dstq], m0 | |
%else ; float/double | |
; horizontal sum & store | |
%if mmsize == 32 | |
vextractf128 xm1, m0, 0x1 | |
addp%4 xm0, xm1 | |
%endif | |
movhlps xm1, xm0 | |
%ifidn %1, float | |
addps xm0, xm1 | |
shufps xm1, xm0, xm0, q0001 | |
%endif | |
add fracd, dst_incr_modd | |
addp%4 xm0, xm1 | |
add indexd, dst_incr_divd | |
movs%4 [dstq], xm0 | |
%endif | |
cmp fracd, src_incrd | |
jl .skip | |
sub fracd, src_incrd | |
inc indexd | |
%if UNIX64 | |
DEFINE_ARGS filter_alloc, dst, src, phase_count, index, frac, dst_incr_mod, \ | |
index_incr, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ | |
src_incr, phase_mask, dst_end, filter_bank | |
%elif WIN64 | |
DEFINE_ARGS phase_count, dst, src, filter_alloc, index, frac, dst_incr_mod, \ | |
index_incr, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ | |
src_incr, phase_mask, dst_end, filter_bank | |
%else ; x86-32 | |
DEFINE_ARGS src, phase_count, dst, frac, index, index_incr | |
%endif | |
.skip: | |
add dstq, %2 | |
cmp indexd, phase_countd | |
jb .index_skip | |
.index_while: | |
sub indexd, phase_countd | |
lea srcq, [srcq+%2] | |
cmp indexd, phase_countd | |
jnb .index_while | |
.index_skip: | |
cmp dstq, dst_endq | |
jne .loop | |
%if ARCH_X86_64 | |
DEFINE_ARGS ctx, dst, src, phase_count, index, frac | |
%else ; x86-32 | |
DEFINE_ARGS src, ctx, update_context, frac, index | |
%endif | |
cmp dword update_context_stackd, 0 | |
jz .skip_store | |
; strictly speaking, the function should always return the consumed | |
; number of bytes; however, we only use the value if update_context | |
; is true, so let's just leave it uninitialized otherwise | |
mov ctxq, ctx_stackq | |
movifnidn rax, srcq | |
mov [ctxq+ResampleContext.frac ], fracd | |
sub rax, src_stackq | |
mov [ctxq+ResampleContext.index], indexd | |
shr rax, %3 | |
.skip_store: | |
%if ARCH_X86_32 | |
ADD rsp, 0x20 | |
%endif | |
RET | |
; int resample_linear_$format(ResampleContext *ctx, float *dst, | |
; const float *src, int size, int update_ctx) | |
%if ARCH_X86_64 ; unix64 and win64 | |
%if UNIX64 | |
cglobal resample_linear_%1, 0, 15, 5, ctx, dst, phase_mask, phase_count, index, frac, \ | |
size, dst_incr_mod, min_filter_count_x4, \ | |
min_filter_len_x4, dst_incr_div, src_incr, \ | |
src, dst_end, filter_bank | |
mov srcq, r2mp | |
%else ; win64 | |
cglobal resample_linear_%1, 0, 15, 5, ctx, phase_mask, src, phase_count, index, frac, \ | |
size, dst_incr_mod, min_filter_count_x4, \ | |
min_filter_len_x4, dst_incr_div, src_incr, \ | |
dst, dst_end, filter_bank | |
mov dstq, r1mp | |
%endif | |
; use red-zone for variable storage | |
%define ctx_stackq [rsp-0x8] | |
%define src_stackq [rsp-0x10] | |
%define phase_mask_stackd [rsp-0x14] | |
%if WIN64 | |
%define update_context_stackd r4m | |
%else ; unix64 | |
%define update_context_stackd [rsp-0x18] | |
%endif | |
; load as many variables in registers as possible; for the rest, store | |
; on stack so that we have 'ctx' available as one extra register | |
mov sized, r3d | |
%if UNIX64 | |
mov update_context_stackd, r4d | |
%endif | |
mov indexd, [ctxq+ResampleContext.index] | |
mov fracd, [ctxq+ResampleContext.frac] | |
mov dst_incr_modd, [ctxq+ResampleContext.dst_incr_mod] | |
mov filter_bankq, [ctxq+ResampleContext.filter_bank] | |
mov src_incrd, [ctxq+ResampleContext.src_incr] | |
mov ctx_stackq, ctxq | |
mov min_filter_len_x4d, [ctxq+ResampleContext.filter_length] | |
%ifidn %1, int16 | |
movd m4, [pd_0x4000] | |
%else ; float/double | |
cvtsi2s%4 xm0, src_incrd | |
movs%4 xm4, [%5] | |
divs%4 xm4, xm0 | |
%endif | |
mov dst_incr_divd, [ctxq+ResampleContext.dst_incr_div] | |
shl min_filter_len_x4d, %3 | |
lea dst_endq, [dstq+sizeq*%2] | |
%if UNIX64 | |
mov ecx, [ctxq+ResampleContext.phase_count] | |
mov edi, [ctxq+ResampleContext.filter_alloc] | |
DEFINE_ARGS filter_alloc, dst, filter2, phase_count, index, frac, filter1, \ | |
dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ | |
dst_incr_div, src_incr, src, dst_end, filter_bank | |
%elif WIN64 | |
mov R9d, [ctxq+ResampleContext.filter_alloc] | |
mov ecx, [ctxq+ResampleContext.phase_count] | |
DEFINE_ARGS phase_count, filter2, src, filter_alloc, index, frac, filter1, \ | |
dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ | |
dst_incr_div, src_incr, dst, dst_end, filter_bank | |
%endif | |
neg min_filter_len_x4q | |
sub filter_bankq, min_filter_len_x4q | |
sub srcq, min_filter_len_x4q | |
mov src_stackq, srcq | |
%else ; x86-32 | |
cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \ | |
frac, index, dst, filter_bank | |
; push temp variables to stack | |
%define ctx_stackq r0mp | |
%define src_stackq r2mp | |
%define update_context_stackd r4m | |
mov dstq, r1mp | |
mov r3, r3mp | |
lea r3, [dstq+r3*%2] | |
PUSH dword [ctxq+ResampleContext.dst_incr_div] | |
PUSH r3 | |
mov r3, dword [ctxq+ResampleContext.filter_alloc] | |
PUSH dword [ctxq+ResampleContext.dst_incr_mod] | |
PUSH r3 | |
shl r3, %3 | |
PUSH r3 | |
mov r3, dword [ctxq+ResampleContext.src_incr] | |
PUSH dword [ctxq+ResampleContext.phase_count] ; unneeded replacement of phase_mask | |
PUSH r3d | |
%ifidn %1, int16 | |
movd m4, [pd_0x4000] | |
%else ; float/double | |
cvtsi2s%4 xm0, r3d | |
movs%4 xm4, [%5] | |
divs%4 xm4, xm0 | |
%endif | |
mov min_filter_length_x4d, [ctxq+ResampleContext.filter_length] | |
mov indexd, [ctxq+ResampleContext.index] | |
shl min_filter_length_x4d, %3 | |
mov fracd, [ctxq+ResampleContext.frac] | |
neg min_filter_length_x4q | |
mov filter_bankq, [ctxq+ResampleContext.filter_bank] | |
sub r2mp, min_filter_length_x4q | |
sub filter_bankq, min_filter_length_x4q | |
PUSH min_filter_length_x4q | |
PUSH filter_bankq | |
PUSH dword [ctxq+ResampleContext.phase_count] | |
DEFINE_ARGS filter1, min_filter_count_x4, filter2, frac, index, dst, src | |
%define phase_count_stackd dword [rsp+0x0] | |
%define filter_bankq dword [rsp+0x4] | |
%define min_filter_length_x4q dword [rsp+0x8] | |
%define src_incrd dword [rsp+0xc] | |
%define phase_mask_stackd dword [rsp+0x10] | |
%define filter_alloc_x4q dword [rsp+0x14] | |
%define filter_allocd dword [rsp+0x18] | |
%define dst_incr_modd dword [rsp+0x1c] | |
%define dst_endq dword [rsp+0x20] | |
%define dst_incr_divd dword [rsp+0x24] | |
mov srcq, r2mp | |
%endif | |
.loop: | |
mov filter1d, filter_allocd | |
imul filter1d, indexd | |
%if ARCH_X86_64 | |
mov min_filter_count_x4q, min_filter_len_x4q | |
lea filter1q, [filter_bankq+filter1q*%2] | |
lea filter2q, [filter1q+filter_allocq*%2] | |
%else ; x86-32 | |
mov min_filter_count_x4q, filter_bankq | |
lea filter1q, [min_filter_count_x4q+filter1q*%2] | |
mov min_filter_count_x4q, min_filter_length_x4q | |
mov filter2q, filter1q | |
add filter2q, filter_alloc_x4q | |
%endif | |
%ifidn %1, int16 | |
mova m0, m4 | |
mova m2, m4 | |
%else ; float/double | |
xorps m0, m0, m0 | |
xorps m2, m2, m2 | |
%endif | |
align 16 | |
.inner_loop: | |
movu m1, [srcq+min_filter_count_x4q*1] | |
%ifidn %1, int16 | |
%if cpuflag(xop) | |
vpmadcswd m2, m1, [filter2q+min_filter_count_x4q*1], m2 | |
vpmadcswd m0, m1, [filter1q+min_filter_count_x4q*1], m0 | |
%else | |
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1] | |
pmaddwd m1, [filter1q+min_filter_count_x4q*1] | |
paddd m2, m3 | |
paddd m0, m1 | |
%endif ; cpuflag | |
%else ; float/double | |
%if cpuflag(fma4) || cpuflag(fma3) | |
fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2 | |
fmaddp%4 m0, m1, [filter1q+min_filter_count_x4q*1], m0 | |
%else | |
mulp%4 m3, m1, [filter2q+min_filter_count_x4q*1] | |
mulp%4 m1, m1, [filter1q+min_filter_count_x4q*1] | |
addp%4 m2, m2, m3 | |
addp%4 m0, m0, m1 | |
%endif ; cpuflag | |
%endif | |
add min_filter_count_x4q, mmsize | |
js .inner_loop | |
%ifidn %1, int16 | |
%if mmsize == 16 | |
%if cpuflag(xop) | |
vphadddq m2, m2 | |
vphadddq m0, m0 | |
%endif | |
pshufd m3, m2, q0032 | |
pshufd m1, m0, q0032 | |
paddd m2, m3 | |
paddd m0, m1 | |
%endif | |
%if notcpuflag(xop) | |
PSHUFLW m3, m2, q0032 | |
PSHUFLW m1, m0, q0032 | |
paddd m2, m3 | |
paddd m0, m1 | |
%endif | |
psubd m2, m0 | |
; This is probably a really bad idea on atom and other machines with a | |
; long transfer latency between GPRs and XMMs (atom). However, it does | |
; make the clip a lot simpler... | |
movd eax, m2 | |
add indexd, dst_incr_divd | |
imul fracd | |
idiv src_incrd | |
movd m1, eax | |
add fracd, dst_incr_modd | |
paddd m0, m1 | |
psrad m0, 15 | |
packssdw m0, m0 | |
movd [dstq], m0 | |
; note that for imul/idiv, I need to move filter to edx/eax for each: | |
; - 32bit: eax=r0[filter1], edx=r2[filter2] | |
; - win64: eax=r6[filter1], edx=r1[todo] | |
; - unix64: eax=r6[filter1], edx=r2[todo] | |
%else ; float/double | |
; val += (v2 - val) * (FELEML) frac / c->src_incr; | |
%if mmsize == 32 | |
vextractf128 xm1, m0, 0x1 | |
vextractf128 xm3, m2, 0x1 | |
addp%4 xm0, xm1 | |
addp%4 xm2, xm3 | |
%endif | |
cvtsi2s%4 xm1, fracd | |
subp%4 xm2, xm0 | |
mulp%4 xm1, xm4 | |
shufp%4 xm1, xm1, q0000 | |
%if cpuflag(fma4) || cpuflag(fma3) | |
fmaddp%4 xm0, xm2, xm1, xm0 | |
%else | |
mulp%4 xm2, xm1 | |
addp%4 xm0, xm2 | |
%endif ; cpuflag | |
; horizontal sum & store | |
movhlps xm1, xm0 | |
%ifidn %1, float | |
addps xm0, xm1 | |
shufps xm1, xm0, xm0, q0001 | |
%endif | |
add fracd, dst_incr_modd | |
addp%4 xm0, xm1 | |
add indexd, dst_incr_divd | |
movs%4 [dstq], xm0 | |
%endif | |
cmp fracd, src_incrd | |
jl .skip | |
sub fracd, src_incrd | |
inc indexd | |
%if UNIX64 | |
DEFINE_ARGS filter_alloc, dst, filter2, phase_count, index, frac, index_incr, \ | |
dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ | |
dst_incr_div, src_incr, src, dst_end, filter_bank | |
%elif WIN64 | |
DEFINE_ARGS phase_count, filter2, src, filter_alloc, index, frac, index_incr, \ | |
dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ | |
dst_incr_div, src_incr, dst, dst_end, filter_bank | |
%else ; x86-32 | |
DEFINE_ARGS filter1, phase_count, index_incr, frac, index, dst, src | |
%endif | |
.skip: | |
%if ARCH_X86_32 | |
mov phase_countd, phase_count_stackd | |
%endif | |
add dstq, %2 | |
cmp indexd, phase_countd | |
jb .index_skip | |
.index_while: | |
sub indexd, phase_countd | |
lea srcq, [srcq+%2] | |
cmp indexd, phase_countd | |
jnb .index_while | |
.index_skip: | |
cmp dstq, dst_endq | |
jne .loop | |
%if UNIX64 | |
DEFINE_ARGS ctx, dst, filter2, phase_count, index, frac, index_incr, \ | |
dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ | |
dst_incr_div, src_incr, src, dst_end, filter_bank | |
%elif WIN64 | |
DEFINE_ARGS ctx, filter2, src, phase_count, index, frac, index_incr, \ | |
dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ | |
dst_incr_div, src_incr, dst, dst_end, filter_bank | |
%else ; x86-32 | |
DEFINE_ARGS filter1, ctx, update_context, frac, index, dst, src | |
%endif | |
cmp dword update_context_stackd, 0 | |
jz .skip_store | |
; strictly speaking, the function should always return the consumed | |
; number of bytes; however, we only use the value if update_context | |
; is true, so let's just leave it uninitialized otherwise | |
mov ctxq, ctx_stackq | |
movifnidn rax, srcq | |
mov [ctxq+ResampleContext.frac ], fracd | |
sub rax, src_stackq | |
mov [ctxq+ResampleContext.index], indexd | |
shr rax, %3 | |
.skip_store: | |
%if ARCH_X86_32 | |
ADD rsp, 0x28 | |
%endif | |
RET | |
%endmacro | |
INIT_XMM sse | |
RESAMPLE_FNS float, 4, 2, s, pf_1 | |
%if HAVE_AVX_EXTERNAL | |
INIT_YMM avx | |
RESAMPLE_FNS float, 4, 2, s, pf_1 | |
%endif | |
%if HAVE_FMA3_EXTERNAL | |
INIT_YMM fma3 | |
RESAMPLE_FNS float, 4, 2, s, pf_1 | |
%endif | |
%if HAVE_FMA4_EXTERNAL | |
INIT_XMM fma4 | |
RESAMPLE_FNS float, 4, 2, s, pf_1 | |
%endif | |
INIT_XMM sse2 | |
RESAMPLE_FNS int16, 2, 1 | |
%if HAVE_XOP_EXTERNAL | |
INIT_XMM xop | |
RESAMPLE_FNS int16, 2, 1 | |
%endif | |
INIT_XMM sse2 | |
RESAMPLE_FNS double, 8, 3, d, pdbl_1 | |
%if HAVE_AVX_EXTERNAL | |
INIT_YMM avx | |
RESAMPLE_FNS double, 8, 3, d, pdbl_1 | |
%endif | |
%if HAVE_FMA3_EXTERNAL | |
INIT_YMM fma3 | |
RESAMPLE_FNS double, 8, 3, d, pdbl_1 | |
%endif | |