diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c index 776f564aa28c..643836626c0f 100644 --- a/third_party/pffft/src/pffft.c +++ b/third_party/pffft/src/pffft.c @@ -59,7 +59,6 @@ #include "pffft.h" #include -// #include #include #include @@ -75,11 +74,14 @@ # define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) # define RESTRICT __restrict # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; +# define VLA_ARRAY_ON_STACK_FREE(varname__) #elif defined(COMPILER_MSVC) +#include # define ALWAYS_INLINE(return_type) __forceinline return_type # define NEVER_INLINE(return_type) __declspec(noinline) return_type # define RESTRICT __restrict -# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) +# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_malloca(size__ * sizeof(type__)) +# define VLA_ARRAY_ON_STACK_FREE(varname__) _freea(varname__) #endif @@ -219,35 +221,24 @@ void validate_pffft_simd() { memcpy(a3.f, f+12, 4*sizeof(float)); t = a0; u = a1; t.v = VZERO(); - // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0); t.v = VADD(a1.v, a2.v); - // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18); t.v = VMUL(a1.v, a2.v); - // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); t.v = VMADD(a1.v, a2.v,a0.v); - // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); INTERLEAVE2(a1.v,a2.v,t.v,u.v); - // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); UNINTERLEAVE2(a1.v,a2.v,t.v,u.v); - // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11); t.v=LD_PS1(f[15]); - // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 15, 15, 15, 15); t.v = VSWAPHL(a1.v, a2.v); - // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 8, 9, 6, 7); VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v); - // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", - // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], - // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); } #endif //!PFFFT_SIMD_DISABLE @@ -1674,6 +1665,8 @@ void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *fo ib = !ib; } assert(buff[ib] == voutput); + + VLA_ARRAY_ON_STACK_FREE(scratch_on_stack); } void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) { @@ -1851,6 +1844,8 @@ void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, flo ib = !ib; } assert(buff[ib] == output); + + VLA_ARRAY_ON_STACK_FREE(scratch_on_stack); } #define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate