Bump to WebRTC M120 release
Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
This commit is contained in:
7
webrtc/third_party/pffft/BUILD.gn
vendored
7
webrtc/third_party/pffft/BUILD.gn
vendored
@ -1,4 +1,4 @@
|
||||
# Copyright 2019 The Chromium Authors. All rights reserved.
|
||||
# Copyright 2019 The Chromium Authors
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
@ -7,6 +7,8 @@ import("//testing/libfuzzer/fuzzer_test.gni")
|
||||
import("//testing/test.gni")
|
||||
|
||||
config("common_config") {
|
||||
cflags = [ "-Wno-shadow" ]
|
||||
|
||||
if (is_win) {
|
||||
defines = [
|
||||
# Required to use math constants from math.h.
|
||||
@ -17,7 +19,8 @@ config("common_config") {
|
||||
# PFFFT doesn't support SIMD on some cpus, so build a scalar version.
|
||||
if ((current_cpu == "arm" && !arm_use_neon) || current_cpu == "mipsel" ||
|
||||
current_cpu == "mips64el" || current_cpu == "ppc64" ||
|
||||
current_cpu == "s390x") {
|
||||
current_cpu == "riscv64" || current_cpu == "s390x" ||
|
||||
current_cpu == "loong64") {
|
||||
defines = [ "PFFFT_SIMD_DISABLE" ]
|
||||
}
|
||||
}
|
||||
|
46
webrtc/third_party/pffft/LICENSE
vendored
46
webrtc/third_party/pffft/LICENSE
vendored
@ -1 +1,45 @@
|
||||
Q29weXJpZ2h0IChjKSAyMDEzICBKdWxpZW4gUG9tbWllciAoIHBvbW1pZXJAbW9kYXJ0dC5jb20gKQoKQmFzZWQgb24gb3JpZ2luYWwgZm9ydHJhbiA3NyBjb2RlIGZyb20gRkZUUEFDS3Y0IGZyb20gTkVUTElCLAphdXRob3JlZCBieSBEciBQYXVsIFN3YXJ6dHJhdWJlciBvZiBOQ0FSLCBpbiAxOTg1LgoKQXMgY29uZmlybWVkIGJ5IHRoZSBOQ0FSIGZmdHBhY2sgc29mdHdhcmUgY3VyYXRvcnMsIHRoZSBmb2xsb3dpbmcKRkZUUEFDS3Y1IGxpY2Vuc2UgYXBwbGllcyB0byBGRlRQQUNLdjQgc291cmNlcy4gTXkgY2hhbmdlcyBhcmUKcmVsZWFzZWQgdW5kZXIgdGhlIHNhbWUgdGVybXMuCgpGRlRQQUNLIGxpY2Vuc2U6CgpodHRwOi8vd3d3LmNpc2wudWNhci5lZHUvY3NzL3NvZnR3YXJlL2ZmdHBhY2s1L2Z0cGsuaHRtbAoKQ29weXJpZ2h0IChjKSAyMDA0IHRoZSBVbml2ZXJzaXR5IENvcnBvcmF0aW9uIGZvciBBdG1vc3BoZXJpYwpSZXNlYXJjaCAoIlVDQVIiKS4gQWxsIHJpZ2h0cyByZXNlcnZlZC4gRGV2ZWxvcGVkIGJ5IE5DQVIncwpDb21wdXRhdGlvbmFsIGFuZCBJbmZvcm1hdGlvbiBTeXN0ZW1zIExhYm9yYXRvcnksIFVDQVIsCnd3dy5jaXNsLnVjYXIuZWR1LgoKUmVkaXN0cmlidXRpb24gYW5kIHVzZSBvZiB0aGUgU29mdHdhcmUgaW4gc291cmNlIGFuZCBiaW5hcnkgZm9ybXMsCndpdGggb3Igd2l0aG91dCBtb2RpZmljYXRpb24sIGlzIHBlcm1pdHRlZCBwcm92aWRlZCB0aGF0IHRoZQpmb2xsb3dpbmcgY29uZGl0aW9ucyBhcmUgbWV0OgoKLSBOZWl0aGVyIHRoZSBuYW1lcyBvZiBOQ0FSJ3MgQ29tcHV0YXRpb25hbCBhbmQgSW5mb3JtYXRpb24gU3lzdGVtcwpMYWJvcmF0b3J5LCB0aGUgVW5pdmVyc2l0eSBDb3Jwb3JhdGlvbiBmb3IgQXRtb3NwaGVyaWMgUmVzZWFyY2gsCm5vciB0aGUgbmFtZXMgb2YgaXRzIHNwb25zb3JzIG9yIGNvbnRyaWJ1dG9ycyBtYXkgYmUgdXNlZCB0bwplbmRvcnNlIG9yIHByb21vdGUgcHJvZHVjdHMgZGVyaXZlZCBmcm9tIHRoaXMgU29mdHdhcmUgd2l0aG91dApzcGVjaWZpYyBwcmlvciB3cml0dGVuIHBlcm1pc3Npb24uCgotIFJlZGlzdHJpYnV0aW9ucyBvZiBzb3VyY2UgY29kZSBtdXN0IHJldGFpbiB0aGUgYWJvdmUgY29weXJpZ2h0Cm5vdGljZXMsIHRoaXMgbGlzdCBvZiBjb25kaXRpb25zLCBhbmQgdGhlIGRpc2NsYWltZXIgYmVsb3cuCgotIFJlZGlzdHJpYnV0aW9ucyBpbiBiaW5hcnkgZm9ybSBtdXN0IHJlcHJvZHVjZSB0aGUgYWJvdmUgY29weXJpZ2h0Cm5vdGljZSwgdGhpcyBsaXN0IG9mIGNvbmRpdGlvbnMsIGFuZCB0aGUgZGlzY2xhaW1lciBiZWxvdyBpbiB0aGUKZG9jdW1lbnRhdGlvbiBhbmQvb3Igb3RoZXIgbWF0ZXJpYWxzIHByb3ZpZGVkIHdpdGggdGhlCmRpc3RyaWJ1dGlvbi4KClRISVMgU09GVFdBUkUgSVMgUFJPVklERUQgIkFTIElTIiwgV0lUSE9VVCBXQVJSQU5UWSBPRiBBTlkgS0lORCwKRVhQUkVTUyBPUiBJTVBMSUVELCBJTkNMVURJTkcsIEJVVCBOT1QgTElNSVRFRCBUTyBUSEUgV0FSUkFOVElFUyBPRgpNRVJDSEFOVEFCSUxJVFksIEZJVE5FU1MgRk9SIEEgUEFSVElDVUxBUiBQVVJQT1NFIEFORApOT05JTkZSSU5HRU1FTlQuIElOIE5PIEVWRU5UIFNIQUxMIFRIRSBDT05UUklCVVRPUlMgT1IgQ09QWVJJR0hUCkhPTERFUlMgQkUgTElBQkxFIEZPUiBBTlkgQ0xBSU0sIElORElSRUNULCBJTkNJREVOVEFMLCBTUEVDSUFMLApFWEVNUExBUlksIE9SIENPTlNFUVVFTlRJQUwgREFNQUdFUyBPUiBPVEhFUiBMSUFCSUxJVFksIFdIRVRIRVIgSU4gQU4KQUNUSU9OIE9GIENPTlRSQUNULCBUT1JUIE9SIE9USEVSV0lTRSwgQVJJU0lORyBGUk9NLCBPVVQgT0YgT1IgSU4KQ09OTkVDVElPTiBXSVRIIFRIRSBTT0ZUV0FSRSBPUiBUSEUgVVNFIE9SIE9USEVSIERFQUxJTkdTIFdJVEggVEhFClNPRlRXQVJFLgo=
|
||||
Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
|
||||
|
||||
Based on original fortran 77 code from FFTPACKv4 from NETLIB,
|
||||
authored by Dr Paul Swarztrauber of NCAR, in 1985.
|
||||
|
||||
As confirmed by the NCAR fftpack software curators, the following
|
||||
FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
|
||||
released under the same terms.
|
||||
|
||||
FFTPACK license:
|
||||
|
||||
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
|
||||
|
||||
Copyright (c) 2004 the University Corporation for Atmospheric
|
||||
Research ("UCAR"). All rights reserved. Developed by NCAR's
|
||||
Computational and Information Systems Laboratory, UCAR,
|
||||
www.cisl.ucar.edu.
|
||||
|
||||
Redistribution and use of the Software in source and binary forms,
|
||||
with or without modification, is permitted provided that the
|
||||
following conditions are met:
|
||||
|
||||
- Neither the names of NCAR's Computational and Information Systems
|
||||
Laboratory, the University Corporation for Atmospheric Research,
|
||||
nor the names of its sponsors or contributors may be used to
|
||||
endorse or promote products derived from this Software without
|
||||
specific prior written permission.
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notices, this list of conditions, and the disclaimer below.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions, and the disclaimer below in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
||||
|
69
webrtc/third_party/pffft/README.md
vendored
Normal file
69
webrtc/third_party/pffft/README.md
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
# Notes on PFFFT
|
||||
We strongly recommend to **read this file** before using PFFFT and to **always wrap** the original C library within a C++ wrapper.
|
||||
|
||||
[Example of PFFFT wrapper](https://cs.chromium.org/chromium/src/third_party/webrtc/modules/audio_processing/utility/pffft_wrapper.h).
|
||||
|
||||
## Scratch buffer
|
||||
The caller can optionally provide a scratch buffer. When not provided, VLA is used to provide a thread-safe option.
|
||||
However, it is recommended to write a C++ wrapper which allocates its own scratch buffer.
|
||||
Note that the scratch buffer has the same memory alignment requirements of the input and output vectors.
|
||||
|
||||
## Output layout
|
||||
PFFFT computes the forward transform with two possible output layouts:
|
||||
1. ordered
|
||||
2. unordered
|
||||
|
||||
### Ordered layout
|
||||
Calling `pffft_transform_ordered` produces an array of **interleaved real and imaginary parts**.
|
||||
The last Fourier coefficient is purely real and stored in the imaginary part of the DC component (which is also purely real).
|
||||
|
||||
### Unordered layout
|
||||
Calling `pffft_transform` produces an array with a more complex structure, but in a more efficient way than `pffft_transform_ordered`.
|
||||
Below, the output produced by Matlab and that produced by PFFFT are compared.
|
||||
The comparison is made for a 32 point transform of a 16 sample buffer.
|
||||
A 32 point transform has been chosen as this is the minimum supported by PFFFT.
|
||||
|
||||
Important notes:
|
||||
- In Matlab the DC (Matlab index 1 [R1, I1]]) and Nyquist (Matlab index 17 [R17, I17]) values are not repeated as complex conjugates.
|
||||
- In PFFFT the Nyquist real and imaginary parts ([R17, I17]) are omitted entirely.
|
||||
- In PFFFT the final 8 values (4 real and 4 imaginary) are not in the same order as all of the others.
|
||||
- In PFFFT all imaginary parts are stored as negatives (like second half in Matlab).
|
||||
|
||||
```
|
||||
+-------+-----------+-------+-------+
|
||||
| Index | Matlab | Index | PFFFT |
|
||||
+-------+-----------+-------+-------+
|
||||
| 1 | R1 + I1 | 0 | R1 |
|
||||
| 2 | R2+ I2 | 1 | R2 |
|
||||
| 3 | R3 + I3 | 2 | R3 |
|
||||
| 4 | R4 + I4 | 3 | R4 |
|
||||
| 5 | R5 + I5 | 4 | -I1 |
|
||||
| 6 | R6 + I6 | 5 | -I2 |
|
||||
| 7 | R7 + I7 | 6 | -I3 |
|
||||
| 8 | R8 + I8 | 7 | -I4 |
|
||||
| 9 | R9 + I9 | 8 | R5 |
|
||||
| 10 | R10 + I10 | 9 | R6 |
|
||||
| 11 | R11 + I11 | 10 | R7 |
|
||||
| 12 | R12 + I12 | 11 | R8 |
|
||||
| 13 | R13 + I13 | 12 | -I5 |
|
||||
| 14 | R14 + I14 | 13 | -I6 |
|
||||
| 15 | R15 + I15 | 14 | -I7 |
|
||||
| 16 | R16 + I16 | 15 | -I8 |
|
||||
| 17 | R17 + I17 | 16 | R9 |
|
||||
| 18 | R16 - I16 | 17 | R10 |
|
||||
| 19 | R15 - I15 | 18 | R11 |
|
||||
| 20 | R14 - I14 | 19 | R12 |
|
||||
| 21 | R13 - I13 | 20 | -I9 |
|
||||
| 22 | R12 - I12 | 21 | -I10 |
|
||||
| 23 | R11 - I11 | 22 | -I11 |
|
||||
| 24 | R10 - I10 | 23 | -I12 |
|
||||
| 25 | R9 - I9 | 24 | R13 |
|
||||
| 26 | R8 - I8 | 25 | R16 |
|
||||
| 27 | R7 - I7 | 26 | R15 |
|
||||
| 28 | R6 - I6 | 27 | R14 |
|
||||
| 29 | R5 - I5 | 28 | -I13 |
|
||||
| 30 | R4 - I4 | 29 | -I16 |
|
||||
| 31 | R3 - I3 | 30 | -I15 |
|
||||
| 32 | R2 - I2 | 31 | -I14 |
|
||||
+-------+-----------+-------+-------+
|
||||
```
|
379
webrtc/third_party/pffft/README.txt
vendored
Normal file
379
webrtc/third_party/pffft/README.txt
vendored
Normal file
@ -0,0 +1,379 @@
|
||||
PFFFT: a pretty fast FFT.
|
||||
|
||||
TL;DR
|
||||
--
|
||||
|
||||
PFFFT does 1D Fast Fourier Transforms, of single precision real and
|
||||
complex vectors. It tries do it fast, it tries to be correct, and it
|
||||
tries to be small. Computations do take advantage of SSE1 instructions
|
||||
on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. The
|
||||
license is BSD-like.
|
||||
|
||||
|
||||
Why does it exist:
|
||||
--
|
||||
|
||||
I was in search of a good performing FFT library , preferably very
|
||||
small and with a very liberal license.
|
||||
|
||||
When one says "fft library", FFTW ("Fastest Fourier Transform in the
|
||||
West") is probably the first name that comes to mind -- I guess that
|
||||
99% of open-source projects that need a FFT do use FFTW, and are happy
|
||||
with it. However, it is quite a large library , which does everything
|
||||
fft related (2d transforms, 3d transforms, other transformations such
|
||||
as discrete cosine , or fast hartley). And it is licensed under the
|
||||
GNU GPL , which means that it cannot be used in non open-source
|
||||
products.
|
||||
|
||||
An alternative to FFTW that is really small, is the venerable FFTPACK
|
||||
v4, which is available on NETLIB. A more recent version (v5) exists,
|
||||
but it is larger as it deals with multi-dimensional transforms. This
|
||||
is a library that is written in FORTRAN 77, a language that is now
|
||||
considered as a bit antiquated by many. FFTPACKv4 was written in 1985,
|
||||
by Dr Paul Swarztrauber of NCAR, more than 25 years ago ! And despite
|
||||
its age, benchmarks show it that it still a very good performing FFT
|
||||
library, see for example the 1d single precision benchmarks here:
|
||||
http://www.fftw.org/speed/opteron-2.2GHz-32bit/ . It is however not
|
||||
competitive with the fastest ones, such as FFTW, Intel MKL, AMD ACML,
|
||||
Apple vDSP. The reason for that is that those libraries do take
|
||||
advantage of the SSE SIMD instructions available on Intel CPUs,
|
||||
available since the days of the Pentium III. These instructions deal
|
||||
with small vectors of 4 floats at a time, instead of a single float
|
||||
for a traditionnal FPU, so when using these instructions one may expect
|
||||
a 4-fold performance improvement.
|
||||
|
||||
The idea was to take this fortran fftpack v4 code, translate to C,
|
||||
modify it to deal with those SSE instructions, and check that the
|
||||
final performance is not completely ridiculous when compared to other
|
||||
SIMD FFT libraries. Translation to C was performed with f2c (
|
||||
http://www.netlib.org/f2c/ ). The resulting file was a bit edited in
|
||||
order to remove the thousands of gotos that were introduced by
|
||||
f2c. You will find the fftpack.h and fftpack.c sources in the
|
||||
repository, this a complete translation of
|
||||
http://www.netlib.org/fftpack/ , with the discrete cosine transform
|
||||
and the test program. There is no license information in the netlib
|
||||
repository, but it was confirmed to me by the fftpack v5 curators that
|
||||
the same terms do apply to fftpack v4:
|
||||
http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html . This is a
|
||||
"BSD-like" license, it is compatible with proprietary projects.
|
||||
|
||||
Adapting fftpack to deal with the SIMD 4-element vectors instead of
|
||||
scalar single precision numbers was more complex than I originally
|
||||
thought, especially with the real transforms, and I ended up writing
|
||||
more code than I planned..
|
||||
|
||||
|
||||
The code:
|
||||
--
|
||||
|
||||
Only two files, in good old C, pffft.c and pffft.h . The API is very
|
||||
very simple, just make sure that you read the comments in pffft.h.
|
||||
|
||||
|
||||
Comparison with other FFTs:
|
||||
--
|
||||
|
||||
The idea was not to break speed records, but to get a decently fast
|
||||
fft that is at least 50% as fast as the fastest FFT -- especially on
|
||||
slowest computers . I'm more focused on getting the best performance
|
||||
on slow cpus (Atom, Intel Core 1, old Athlons, ARM Cortex-A9...), than
|
||||
on getting top performance on today fastest cpus.
|
||||
|
||||
It can be used in a real-time context as the fft functions do not
|
||||
perform any memory allocation -- that is why they accept a 'work'
|
||||
array in their arguments.
|
||||
|
||||
It is also a bit focused on performing 1D convolutions, that is why it
|
||||
provides "unordered" FFTs , and a fourier domain convolution
|
||||
operation.
|
||||
|
||||
|
||||
Benchmark results (cpu tested: core i7 2600, core 2 quad, core 1 duo, atom N270, cortex-A9)
|
||||
--
|
||||
|
||||
The benchmark shows the performance of various fft implementations measured in
|
||||
MFlops, with the number of floating point operations being defined as 5Nlog2(N)
|
||||
for a length N complex fft, and 2.5*Nlog2(N) for a real fft.
|
||||
See http://www.fftw.org/speed/method.html for an explanation of these formulas.
|
||||
|
||||
MacOS Lion, gcc 4.2, 64-bit, fftw 3.3 on a 3.4 GHz core i7 2600
|
||||
|
||||
Built with:
|
||||
|
||||
gcc-4.2 -o test_pffft -arch x86_64 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -DHAVE_VECLIB -framework veclib -DHAVE_FFTW -lfftw3f
|
||||
|
||||
| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT |
|
||||
|-----------+------------+------------+------------+------------| |------------+------------+------------+------------|
|
||||
| 64 | 2816 | 8596 | 7329 | 8187 | | 2887 | 14898 | 14668 | 11108 |
|
||||
| 96 | 3298 | n/a | 8378 | 7727 | | 3953 | n/a | 15680 | 10878 |
|
||||
| 128 | 3507 | 11575 | 9266 | 10108 | | 4233 | 17598 | 16427 | 12000 |
|
||||
| 160 | 3391 | n/a | 9838 | 10711 | | 4220 | n/a | 16653 | 11187 |
|
||||
| 192 | 3919 | n/a | 9868 | 10956 | | 4297 | n/a | 15770 | 12540 |
|
||||
| 256 | 4283 | 13179 | 10694 | 13128 | | 4545 | 19550 | 16350 | 13822 |
|
||||
| 384 | 3136 | n/a | 10810 | 12061 | | 3600 | n/a | 16103 | 13240 |
|
||||
| 480 | 3477 | n/a | 10632 | 12074 | | 3536 | n/a | 11630 | 12522 |
|
||||
| 512 | 3783 | 15141 | 11267 | 13838 | | 3649 | 20002 | 16560 | 13580 |
|
||||
| 640 | 3639 | n/a | 11164 | 13946 | | 3695 | n/a | 15416 | 13890 |
|
||||
| 768 | 3800 | n/a | 11245 | 13495 | | 3590 | n/a | 15802 | 14552 |
|
||||
| 800 | 3440 | n/a | 10499 | 13301 | | 3659 | n/a | 12056 | 13268 |
|
||||
| 1024 | 3924 | 15605 | 11450 | 15339 | | 3769 | 20963 | 13941 | 15467 |
|
||||
| 2048 | 4518 | 16195 | 11551 | 15532 | | 4258 | 20413 | 13723 | 15042 |
|
||||
| 2400 | 4294 | n/a | 10685 | 13078 | | 4093 | n/a | 12777 | 13119 |
|
||||
| 4096 | 4750 | 16596 | 11672 | 15817 | | 4157 | 19662 | 14316 | 14336 |
|
||||
| 8192 | 3820 | 16227 | 11084 | 12555 | | 3691 | 18132 | 12102 | 13813 |
|
||||
| 9216 | 3864 | n/a | 10254 | 12870 | | 3586 | n/a | 12119 | 13994 |
|
||||
| 16384 | 3822 | 15123 | 10454 | 12822 | | 3613 | 16874 | 12370 | 13881 |
|
||||
| 32768 | 4175 | 14512 | 10662 | 11095 | | 3881 | 14702 | 11619 | 11524 |
|
||||
| 262144 | 3317 | 11429 | 6269 | 9517 | | 2810 | 11729 | 7757 | 10179 |
|
||||
| 1048576 | 2913 | 10551 | 4730 | 5867 | | 2661 | 7881 | 3520 | 5350 |
|
||||
|-----------+------------+------------+------------+------------| |------------+------------+------------+------------|
|
||||
|
||||
|
||||
Debian 6, gcc 4.4.5, 64-bit, fftw 3.3.1 on a 3.4 GHz core i7 2600
|
||||
|
||||
Built with:
|
||||
gcc -o test_pffft -DHAVE_FFTW -msse2 -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L$HOME/local/lib -I$HOME/local/include/ -lfftw3f -lm
|
||||
|
||||
| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT |
|
||||
|------------------+--------------+--------------+--------------| |--------------+--------------+--------------|
|
||||
| 64 | 3840 | 7680 | 8777 | | 4389 | 20480 | 11171 |
|
||||
| 96 | 4214 | 9633 | 8429 | | 4816 | 22477 | 11238 |
|
||||
| 128 | 3584 | 10240 | 10240 | | 5120 | 23893 | 11947 |
|
||||
| 192 | 4854 | 11095 | 12945 | | 4854 | 22191 | 14121 |
|
||||
| 256 | 4096 | 11703 | 16384 | | 5120 | 23406 | 13653 |
|
||||
| 384 | 4395 | 14651 | 12558 | | 4884 | 19535 | 14651 |
|
||||
| 512 | 5760 | 13166 | 15360 | | 4608 | 23040 | 15360 |
|
||||
| 768 | 4907 | 14020 | 16357 | | 4461 | 19628 | 14020 |
|
||||
| 1024 | 5120 | 14629 | 14629 | | 5120 | 20480 | 15754 |
|
||||
| 2048 | 5632 | 14080 | 18773 | | 4693 | 12516 | 16091 |
|
||||
| 4096 | 5120 | 13653 | 17554 | | 4726 | 7680 | 14456 |
|
||||
| 8192 | 4160 | 7396 | 13312 | | 4437 | 14791 | 13312 |
|
||||
| 9216 | 4210 | 6124 | 13473 | | 4491 | 7282 | 14970 |
|
||||
| 16384 | 3976 | 11010 | 14313 | | 4210 | 11450 | 13631 |
|
||||
| 32768 | 4260 | 10224 | 10954 | | 4260 | 6816 | 11797 |
|
||||
| 262144 | 3736 | 6896 | 9961 | | 2359 | 8965 | 9437 |
|
||||
| 1048576 | 2796 | 4534 | 6453 | | 1864 | 3078 | 5592 |
|
||||
|------------------+--------------+--------------+--------------| |--------------+--------------+--------------|
|
||||
|
||||
|
||||
|
||||
MacOS Snow Leopard, gcc 4.0, 32-bit, fftw 3.3 on a 1.83 GHz core 1 duo
|
||||
|
||||
Built with:
|
||||
|
||||
gcc -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework veclib
|
||||
|
||||
| input len |real FFTPack| real vDSP | real FFTW | real PFFFT | |cplx FFTPack| cplx vDSP | cplx FFTW | cplx PFFFT |
|
||||
|-----------+------------+------------+------------+------------| |------------+------------+------------+------------|
|
||||
| 64 | 745 | 2145 | 1706 | 2028 | | 961 | 3356 | 3313 | 2300 |
|
||||
| 96 | 877 | n/a | 1976 | 1978 | | 1059 | n/a | 3333 | 2233 |
|
||||
| 128 | 951 | 2808 | 2213 | 2279 | | 1202 | 3803 | 3739 | 2494 |
|
||||
| 192 | 1002 | n/a | 2456 | 2429 | | 1186 | n/a | 3701 | 2508 |
|
||||
| 256 | 1065 | 3205 | 2641 | 2793 | | 1302 | 4013 | 3912 | 2663 |
|
||||
| 384 | 845 | n/a | 2759 | 2499 | | 948 | n/a | 3729 | 2504 |
|
||||
| 512 | 900 | 3476 | 2956 | 2759 | | 974 | 4057 | 3954 | 2645 |
|
||||
| 768 | 910 | n/a | 2912 | 2737 | | 975 | n/a | 3837 | 2614 |
|
||||
| 1024 | 936 | 3583 | 3107 | 3009 | | 1006 | 4124 | 3821 | 2697 |
|
||||
| 2048 | 1057 | 3585 | 3091 | 2837 | | 1089 | 3889 | 3701 | 2513 |
|
||||
| 4096 | 1083 | 3524 | 3092 | 2733 | | 1039 | 3617 | 3462 | 2364 |
|
||||
| 8192 | 874 | 3252 | 2967 | 2363 | | 911 | 3106 | 2789 | 2302 |
|
||||
| 9216 | 898 | n/a | 2420 | 2290 | | 865 | n/a | 2676 | 2204 |
|
||||
| 16384 | 903 | 2892 | 2506 | 2421 | | 899 | 3026 | 2797 | 2289 |
|
||||
| 32768 | 965 | 2837 | 2550 | 2358 | | 920 | 2922 | 2763 | 2240 |
|
||||
| 262144 | 738 | 2422 | 1589 | 1708 | | 610 | 2038 | 1436 | 1091 |
|
||||
| 1048576 | 528 | 1207 | 845 | 880 | | 606 | 1020 | 669 | 1036 |
|
||||
|-----------+------------+------------+------------+------------| |------------+------------+------------+------------|
|
||||
|
||||
|
||||
|
||||
Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.2 on a 2.66 core 2 quad
|
||||
|
||||
Built with:
|
||||
gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
|
||||
|
||||
| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT |
|
||||
|-----------+------------+------------+------------| |------------+------------+------------|
|
||||
| 64 | 1920 | 3614 | 5120 | | 2194 | 7680 | 6467 |
|
||||
| 96 | 1873 | 3549 | 5187 | | 2107 | 8429 | 5863 |
|
||||
| 128 | 2240 | 3773 | 5514 | | 2560 | 7964 | 6827 |
|
||||
| 192 | 1765 | 4569 | 7767 | | 2284 | 9137 | 7061 |
|
||||
| 256 | 2048 | 5461 | 7447 | | 2731 | 9638 | 7802 |
|
||||
| 384 | 1998 | 5861 | 6762 | | 2313 | 9253 | 7644 |
|
||||
| 512 | 2095 | 6144 | 7680 | | 2194 | 10240 | 7089 |
|
||||
| 768 | 2230 | 5773 | 7549 | | 2045 | 10331 | 7010 |
|
||||
| 1024 | 2133 | 6400 | 8533 | | 2133 | 10779 | 7877 |
|
||||
| 2048 | 2011 | 7040 | 8665 | | 1942 | 10240 | 7768 |
|
||||
| 4096 | 2194 | 6827 | 8777 | | 1755 | 9452 | 6827 |
|
||||
| 8192 | 1849 | 6656 | 6656 | | 1752 | 7831 | 6827 |
|
||||
| 9216 | 1871 | 5858 | 6416 | | 1643 | 6909 | 6266 |
|
||||
| 16384 | 1883 | 6223 | 6506 | | 1664 | 7340 | 6982 |
|
||||
| 32768 | 1826 | 6390 | 6667 | | 1631 | 7481 | 6971 |
|
||||
| 262144 | 1546 | 4075 | 5977 | | 1299 | 3415 | 3551 |
|
||||
| 1048576 | 1104 | 2071 | 1730 | | 1104 | 1149 | 1834 |
|
||||
|-----------+------------+------------+------------| |------------+------------+------------|
|
||||
|
||||
|
||||
|
||||
Ubuntu 11.04, gcc 4.5, 32-bit, fftw 3.3 on a 1.6 GHz Atom N270
|
||||
|
||||
Built with:
|
||||
gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
|
||||
|
||||
| N (input length) | real FFTPack | real FFTW | real PFFFT | | cplx FFTPack | cplx FFTW | cplx PFFFT |
|
||||
|------------------+--------------+--------------+--------------| |--------------+--------------+--------------|
|
||||
| 64 | 452 | 1041 | 1336 | | 549 | 2318 | 1781 |
|
||||
| 96 | 444 | 1297 | 1297 | | 503 | 2408 | 1686 |
|
||||
| 128 | 527 | 1525 | 1707 | | 543 | 2655 | 1886 |
|
||||
| 192 | 498 | 1653 | 1849 | | 539 | 2678 | 1942 |
|
||||
| 256 | 585 | 1862 | 2156 | | 594 | 2777 | 2244 |
|
||||
| 384 | 499 | 1870 | 1998 | | 511 | 2586 | 1890 |
|
||||
| 512 | 562 | 2095 | 2194 | | 542 | 2973 | 2194 |
|
||||
| 768 | 545 | 2045 | 2133 | | 545 | 2365 | 2133 |
|
||||
| 1024 | 595 | 2133 | 2438 | | 569 | 2695 | 2179 |
|
||||
| 2048 | 587 | 2125 | 2347 | | 521 | 2230 | 1707 |
|
||||
| 4096 | 495 | 1890 | 1834 | | 492 | 1876 | 1672 |
|
||||
| 8192 | 469 | 1548 | 1729 | | 438 | 1740 | 1664 |
|
||||
| 9216 | 468 | 1663 | 1663 | | 446 | 1585 | 1531 |
|
||||
| 16384 | 453 | 1608 | 1767 | | 398 | 1476 | 1664 |
|
||||
| 32768 | 456 | 1420 | 1503 | | 387 | 1388 | 1345 |
|
||||
| 262144 | 309 | 385 | 726 | | 262 | 415 | 840 |
|
||||
| 1048576 | 280 | 351 | 739 | | 261 | 313 | 797 |
|
||||
|------------------+--------------+--------------+--------------| |--------------+--------------+--------------|
|
||||
|
||||
|
||||
|
||||
Windows 7, visual c++ 2010 on a 1.6 GHz Atom N270
|
||||
|
||||
Built with:
|
||||
cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c
|
||||
|
||||
(visual c++ is definitively not very good with SSE intrinsics...)
|
||||
|
||||
| N (input length) | real FFTPack | real PFFFT | | cplx FFTPack | cplx PFFFT |
|
||||
|------------------+--------------+--------------| |--------------+--------------|
|
||||
| 64 | 173 | 1009 | | 174 | 1159 |
|
||||
| 96 | 169 | 1029 | | 188 | 1201 |
|
||||
| 128 | 195 | 1242 | | 191 | 1275 |
|
||||
| 192 | 178 | 1312 | | 184 | 1276 |
|
||||
| 256 | 196 | 1591 | | 186 | 1281 |
|
||||
| 384 | 172 | 1409 | | 181 | 1281 |
|
||||
| 512 | 187 | 1640 | | 181 | 1313 |
|
||||
| 768 | 171 | 1614 | | 176 | 1258 |
|
||||
| 1024 | 186 | 1812 | | 178 | 1223 |
|
||||
| 2048 | 190 | 1707 | | 186 | 1099 |
|
||||
| 4096 | 182 | 1446 | | 177 | 975 |
|
||||
| 8192 | 175 | 1345 | | 169 | 1034 |
|
||||
| 9216 | 165 | 1271 | | 168 | 1023 |
|
||||
| 16384 | 166 | 1396 | | 165 | 949 |
|
||||
| 32768 | 172 | 1311 | | 161 | 881 |
|
||||
| 262144 | 136 | 632 | | 134 | 629 |
|
||||
| 1048576 | 134 | 698 | | 127 | 623 |
|
||||
|------------------+--------------+--------------| |--------------+--------------|
|
||||
|
||||
|
||||
|
||||
Ubuntu 12.04, gcc-4.7.3, 32-bit, with fftw 3.3.3 (built with --enable-neon), on a 1.2GHz ARM Cortex A9 (Tegra 3)
|
||||
|
||||
Built with:
|
||||
gcc-4.7 -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f
|
||||
|
||||
| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT |
|
||||
|-----------+------------+------------+------------| |------------+------------+------------|
|
||||
| 64 | 549 | 452 | 731 | | 512 | 602 | 640 |
|
||||
| 96 | 421 | 272 | 702 | | 496 | 571 | 602 |
|
||||
| 128 | 498 | 512 | 815 | | 597 | 618 | 652 |
|
||||
| 160 | 521 | 536 | 815 | | 586 | 669 | 625 |
|
||||
| 192 | 539 | 571 | 883 | | 485 | 597 | 626 |
|
||||
| 256 | 640 | 539 | 975 | | 569 | 611 | 671 |
|
||||
| 384 | 499 | 610 | 879 | | 499 | 602 | 637 |
|
||||
| 480 | 518 | 507 | 877 | | 496 | 661 | 616 |
|
||||
| 512 | 524 | 591 | 1002 | | 549 | 678 | 668 |
|
||||
| 640 | 542 | 612 | 955 | | 568 | 663 | 645 |
|
||||
| 768 | 557 | 613 | 981 | | 491 | 663 | 598 |
|
||||
| 800 | 514 | 353 | 882 | | 514 | 360 | 574 |
|
||||
| 1024 | 640 | 640 | 1067 | | 492 | 683 | 602 |
|
||||
| 2048 | 587 | 640 | 908 | | 486 | 640 | 552 |
|
||||
| 2400 | 479 | 368 | 777 | | 422 | 376 | 518 |
|
||||
| 4096 | 511 | 614 | 853 | | 426 | 640 | 534 |
|
||||
| 8192 | 415 | 584 | 708 | | 386 | 622 | 516 |
|
||||
| 9216 | 419 | 571 | 687 | | 364 | 586 | 506 |
|
||||
| 16384 | 426 | 577 | 716 | | 398 | 606 | 530 |
|
||||
| 32768 | 417 | 572 | 673 | | 399 | 572 | 468 |
|
||||
| 262144 | 219 | 380 | 293 | | 255 | 431 | 343 |
|
||||
| 1048576 | 202 | 274 | 237 | | 265 | 282 | 355 |
|
||||
|-----------+------------+------------+------------| |------------+------------+------------|
|
||||
|
||||
Same platform as above, but this time pffft and fftpack are built with clang 3.2:
|
||||
|
||||
clang -O3 -DHAVE_FFTW -march=armv7-a -mtune=cortex-a9 -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm -I/usr/local/include/ -L/usr/local/lib/ -lfftw3f
|
||||
|
||||
| input len |real FFTPack| real FFTW | real PFFFT | |cplx FFTPack| cplx FFTW | cplx PFFFT |
|
||||
|-----------+------------+------------+------------| |------------+------------+------------|
|
||||
| 64 | 427 | 452 | 853 | | 427 | 602 | 1024 |
|
||||
| 96 | 351 | 276 | 843 | | 337 | 571 | 963 |
|
||||
| 128 | 373 | 512 | 996 | | 390 | 618 | 1054 |
|
||||
| 160 | 426 | 536 | 987 | | 375 | 669 | 914 |
|
||||
| 192 | 404 | 571 | 1079 | | 388 | 588 | 1079 |
|
||||
| 256 | 465 | 539 | 1205 | | 445 | 602 | 1170 |
|
||||
| 384 | 366 | 610 | 1099 | | 343 | 594 | 1099 |
|
||||
| 480 | 356 | 507 | 1140 | | 335 | 651 | 931 |
|
||||
| 512 | 411 | 591 | 1213 | | 384 | 649 | 1124 |
|
||||
| 640 | 398 | 612 | 1193 | | 373 | 654 | 901 |
|
||||
| 768 | 409 | 613 | 1227 | | 383 | 663 | 1044 |
|
||||
| 800 | 411 | 348 | 1073 | | 353 | 358 | 809 |
|
||||
| 1024 | 427 | 640 | 1280 | | 413 | 692 | 1004 |
|
||||
| 2048 | 414 | 626 | 1126 | | 371 | 640 | 853 |
|
||||
| 2400 | 399 | 373 | 898 | | 319 | 368 | 653 |
|
||||
| 4096 | 404 | 602 | 1059 | | 357 | 633 | 778 |
|
||||
| 8192 | 332 | 584 | 792 | | 308 | 616 | 716 |
|
||||
| 9216 | 322 | 561 | 783 | | 299 | 586 | 687 |
|
||||
| 16384 | 344 | 568 | 778 | | 314 | 617 | 745 |
|
||||
| 32768 | 342 | 564 | 737 | | 314 | 552 | 629 |
|
||||
| 262144 | 201 | 383 | 313 | | 227 | 435 | 413 |
|
||||
| 1048576 | 187 | 262 | 251 | | 228 | 281 | 409 |
|
||||
|-----------+------------+------------+------------| |------------+------------+------------|
|
||||
|
||||
So it looks like, on ARM, gcc 4.7 is the best at scalar floating point
|
||||
(the fftpack performance numbers are better with gcc), while clang is
|
||||
the best with neon intrinsics (see how pffft perf has improved with
|
||||
clang 3.2).
|
||||
|
||||
|
||||
NVIDIA Jetson TK1 board, gcc-4.8.2. The cpu is a 2.3GHz cortex A15 (Tegra K1).
|
||||
|
||||
Built with:
|
||||
gcc -O3 -march=armv7-a -mtune=native -mfloat-abi=hard -mfpu=neon -ffast-math test_pffft.c pffft.c -o test_pffft_arm fftpack.c -lm
|
||||
|
||||
| input len |real FFTPack| real PFFFT | |cplx FFTPack| cplx PFFFT |
|
||||
|-----------+------------+------------| |------------+------------|
|
||||
| 64 | 1735 | 3308 | | 1994 | 3744 |
|
||||
| 96 | 1596 | 3448 | | 1987 | 3572 |
|
||||
| 128 | 1807 | 4076 | | 2255 | 3960 |
|
||||
| 160 | 1769 | 4083 | | 2071 | 3845 |
|
||||
| 192 | 1990 | 4233 | | 2017 | 3939 |
|
||||
| 256 | 2191 | 4882 | | 2254 | 4346 |
|
||||
| 384 | 1878 | 4492 | | 2073 | 4012 |
|
||||
| 480 | 1748 | 4398 | | 1923 | 3951 |
|
||||
| 512 | 2030 | 5064 | | 2267 | 4195 |
|
||||
| 640 | 1918 | 4756 | | 2094 | 4184 |
|
||||
| 768 | 2099 | 4907 | | 2048 | 4297 |
|
||||
| 800 | 1822 | 4555 | | 1880 | 4063 |
|
||||
| 1024 | 2232 | 5355 | | 2187 | 4420 |
|
||||
| 2048 | 2176 | 4983 | | 2027 | 3602 |
|
||||
| 2400 | 1741 | 4256 | | 1710 | 3344 |
|
||||
| 4096 | 1816 | 3914 | | 1851 | 3349 |
|
||||
| 8192 | 1716 | 3481 | | 1700 | 3255 |
|
||||
| 9216 | 1735 | 3589 | | 1653 | 3094 |
|
||||
| 16384 | 1567 | 3483 | | 1637 | 3244 |
|
||||
| 32768 | 1624 | 3240 | | 1655 | 3156 |
|
||||
| 262144 | 1012 | 1898 | | 983 | 1503 |
|
||||
| 1048576 | 876 | 1154 | | 868 | 1341 |
|
||||
|-----------+------------+------------| |------------+------------|
|
||||
|
||||
The performance on the tegra K1 is pretty impressive. I'm not
|
||||
including the FFTW numbers as they as slightly below the scalar
|
||||
fftpack numbers, so something must be wrong (however it seems to be
|
||||
correctly configured and is using neon simd instructions).
|
||||
|
||||
When using clang 3.4 the pffft version is even a bit faster, reaching
|
||||
5.7 GFlops for real ffts of size 1024.
|
60
webrtc/third_party/pffft/patches/01-rmv_printf.diff
vendored
Normal file
60
webrtc/third_party/pffft/patches/01-rmv_printf.diff
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
|
||||
index 7934db448a09..2e0c2f651438 100644
|
||||
--- a/third_party/pffft/src/pffft.c
|
||||
+++ b/third_party/pffft/src/pffft.c
|
||||
@@ -59,7 +59,7 @@
|
||||
|
||||
#include "pffft.h"
|
||||
#include <stdlib.h>
|
||||
-#include <stdio.h>
|
||||
+// #include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
@@ -222,31 +222,35 @@ void validate_pffft_simd() {
|
||||
memcpy(a3.f, f+12, 4*sizeof(float));
|
||||
|
||||
t = a0; u = a1; t.v = VZERO();
|
||||
- printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0);
|
||||
+ // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
+ assertv4(t, 0, 0, 0, 0);
|
||||
t.v = VADD(a1.v, a2.v);
|
||||
- printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18);
|
||||
+ // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
+ assertv4(t, 12, 14, 16, 18);
|
||||
t.v = VMUL(a1.v, a2.v);
|
||||
- printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77);
|
||||
+ // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
+ assertv4(t, 32, 45, 60, 77);
|
||||
t.v = VMADD(a1.v, a2.v,a0.v);
|
||||
- printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80);
|
||||
+ // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
+ assertv4(t, 32, 46, 62, 80);
|
||||
|
||||
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
|
||||
- printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
||||
+ // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
||||
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
|
||||
UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
|
||||
- printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
||||
+ // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
||||
assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
|
||||
|
||||
t.v=LD_PS1(f[15]);
|
||||
- printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
+ // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 15, 15, 15, 15);
|
||||
t.v = VSWAPHL(a1.v, a2.v);
|
||||
- printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
+ // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 8, 9, 6, 7);
|
||||
VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
|
||||
- printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
|
||||
- a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
|
||||
- a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
|
||||
+ // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
|
||||
+ // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
|
||||
+ // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
|
||||
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
|
||||
}
|
||||
#endif //!PFFFT_SIMD_DISABLE
|
16
webrtc/third_party/pffft/patches/02-decl_validate_simd.diff
vendored
Normal file
16
webrtc/third_party/pffft/patches/02-decl_validate_simd.diff
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
diff --git a/third_party/pffft/src/pffft.h b/third_party/pffft/src/pffft.h
|
||||
index 2bfa7b3ebcfb..bb6f78d4b795 100644
|
||||
--- a/third_party/pffft/src/pffft.h
|
||||
+++ b/third_party/pffft/src/pffft.h
|
||||
@@ -83,6 +83,11 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
+#ifndef PFFFT_SIMD_DISABLE
|
||||
+ // Detects compiler bugs with respect to simd instruction.
|
||||
+ void validate_pffft_simd();
|
||||
+#endif
|
||||
+
|
||||
/* opaque struct holding internal stuff (precomputed twiddle factors)
|
||||
this struct can be shared by many threads as it contains only
|
||||
read-only data.
|
82
webrtc/third_party/pffft/patches/03-malloca.diff
vendored
Normal file
82
webrtc/third_party/pffft/patches/03-malloca.diff
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
|
||||
index 776f564aa28c..643836626c0f 100644
|
||||
--- a/third_party/pffft/src/pffft.c
|
||||
+++ b/third_party/pffft/src/pffft.c
|
||||
@@ -59,7 +59,6 @@
|
||||
|
||||
#include "pffft.h"
|
||||
#include <stdlib.h>
|
||||
-// #include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
@@ -75,11 +74,14 @@
|
||||
# define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
|
||||
# define RESTRICT __restrict
|
||||
# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
|
||||
+# define VLA_ARRAY_ON_STACK_FREE(varname__)
|
||||
#elif defined(COMPILER_MSVC)
|
||||
+#include <malloc.h>
|
||||
# define ALWAYS_INLINE(return_type) __forceinline return_type
|
||||
# define NEVER_INLINE(return_type) __declspec(noinline) return_type
|
||||
# define RESTRICT __restrict
|
||||
-# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
|
||||
+# define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_malloca(size__ * sizeof(type__))
|
||||
+# define VLA_ARRAY_ON_STACK_FREE(varname__) _freea(varname__)
|
||||
#endif
|
||||
|
||||
|
||||
@@ -219,35 +221,24 @@ void validate_pffft_simd() {
|
||||
memcpy(a3.f, f+12, 4*sizeof(float));
|
||||
|
||||
t = a0; u = a1; t.v = VZERO();
|
||||
- // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 0, 0, 0, 0);
|
||||
t.v = VADD(a1.v, a2.v);
|
||||
- // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 12, 14, 16, 18);
|
||||
t.v = VMUL(a1.v, a2.v);
|
||||
- // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 32, 45, 60, 77);
|
||||
t.v = VMADD(a1.v, a2.v,a0.v);
|
||||
- // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 32, 46, 62, 80);
|
||||
|
||||
INTERLEAVE2(a1.v,a2.v,t.v,u.v);
|
||||
- // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
||||
assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11);
|
||||
UNINTERLEAVE2(a1.v,a2.v,t.v,u.v);
|
||||
- // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]);
|
||||
assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11);
|
||||
|
||||
t.v=LD_PS1(f[15]);
|
||||
- // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 15, 15, 15, 15);
|
||||
t.v = VSWAPHL(a1.v, a2.v);
|
||||
- // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]);
|
||||
assertv4(t, 8, 9, 6, 7);
|
||||
VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v);
|
||||
- // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n",
|
||||
- // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3],
|
||||
- // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]);
|
||||
assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15);
|
||||
}
|
||||
#endif //!PFFFT_SIMD_DISABLE
|
||||
@@ -1674,6 +1665,8 @@ void pffft_transform_internal(PFFFT_Setup *setup, const float *finput, float *fo
|
||||
ib = !ib;
|
||||
}
|
||||
assert(buff[ib] == voutput);
|
||||
+
|
||||
+ VLA_ARRAY_ON_STACK_FREE(scratch_on_stack);
|
||||
}
|
||||
|
||||
void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, float *ab, float scaling) {
|
||||
@@ -1851,6 +1844,8 @@ void pffft_transform_internal_nosimd(PFFFT_Setup *setup, const float *input, flo
|
||||
ib = !ib;
|
||||
}
|
||||
assert(buff[ib] == output);
|
||||
+
|
||||
+ VLA_ARRAY_ON_STACK_FREE(scratch_on_stack);
|
||||
}
|
||||
|
||||
#define pffft_zconvolve_accumulate_nosimd pffft_zconvolve_accumulate
|
48
webrtc/third_party/pffft/patches/04-fix_ptr_cast.diff
vendored
Normal file
48
webrtc/third_party/pffft/patches/04-fix_ptr_cast.diff
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
|
||||
index 643836626c0f..3033e61b813e 100644
|
||||
--- a/third_party/pffft/src/pffft.c
|
||||
+++ b/third_party/pffft/src/pffft.c
|
||||
@@ -58,6 +58,7 @@
|
||||
*/
|
||||
|
||||
#include "pffft.h"
|
||||
+#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
@@ -125,7 +126,7 @@ inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_p
|
||||
x3 = vec_mergel(y1, y3); \
|
||||
}
|
||||
# define VSWAPHL(a,b) vec_perm(a,b, (vector unsigned char)(16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15))
|
||||
-# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
|
||||
+# define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0xF) == 0)
|
||||
|
||||
/*
|
||||
SSE1 support macros
|
||||
@@ -145,7 +146,7 @@ typedef __m128 v4sf;
|
||||
# define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; }
|
||||
# define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3)
|
||||
# define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
|
||||
-# define VALIGNED(ptr) ((((long)(ptr)) & 0xF) == 0)
|
||||
+# define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0xF) == 0)
|
||||
|
||||
/*
|
||||
ARM NEON support macros
|
||||
@@ -172,7 +173,7 @@ typedef float32x4_t v4sf;
|
||||
// marginally faster version
|
||||
//# define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); }
|
||||
# define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
|
||||
-# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
|
||||
+# define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x3) == 0)
|
||||
#else
|
||||
# if !defined(PFFFT_SIMD_DISABLE)
|
||||
# warning "building with simd disabled !\n";
|
||||
@@ -190,7 +191,7 @@ typedef float v4sf;
|
||||
# define VMADD(a,b,c) ((a)*(b)+(c))
|
||||
# define VSUB(a,b) ((a)-(b))
|
||||
# define LD_PS1(p) (p)
|
||||
-# define VALIGNED(ptr) ((((long)(ptr)) & 0x3) == 0)
|
||||
+# define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x3) == 0)
|
||||
#endif
|
||||
|
||||
// shortcuts for complex multiplcations
|
22
webrtc/third_party/pffft/patches/05-fix-arch-detection.diff
vendored
Normal file
22
webrtc/third_party/pffft/patches/05-fix-arch-detection.diff
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c
|
||||
index 3033e61b813e..bdac4d784999 100644
|
||||
--- a/third_party/pffft/src/pffft.c
|
||||
+++ b/third_party/pffft/src/pffft.c
|
||||
@@ -131,7 +131,7 @@ inline v4sf ld_ps1(const float *p) { v4sf v=vec_lde(0,p); return vec_splat(vec_p
|
||||
/*
|
||||
SSE1 support macros
|
||||
*/
|
||||
-#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
|
||||
+#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || defined(_M_IX86))
|
||||
|
||||
#include <xmmintrin.h>
|
||||
typedef __m128 v4sf;
|
||||
@@ -151,7 +151,7 @@ typedef __m128 v4sf;
|
||||
/*
|
||||
ARM NEON support macros
|
||||
*/
|
||||
-#elif !defined(PFFFT_SIMD_DISABLE) && defined(__arm__)
|
||||
+#elif !defined(PFFFT_SIMD_DISABLE) && (defined(__arm__) || defined(__ARMEL__) || defined(__aarch64__) || defined(_M_ARM64))
|
||||
# include <arm_neon.h>
|
||||
typedef float32x4_t v4sf;
|
||||
# define SIMD_SZ 4
|
18
webrtc/third_party/rnnoise/BUILD.gn
vendored
18
webrtc/third_party/rnnoise/BUILD.gn
vendored
@ -1 +1,17 @@
|
||||
IyBDb3B5cmlnaHQgMjAxOCBUaGUgQ2hyb21pdW0gQXV0aG9ycy4gQWxsIHJpZ2h0cyByZXNlcnZlZC4KIyBVc2Ugb2YgdGhpcyBzb3VyY2UgY29kZSBpcyBnb3Zlcm5lZCBieSBhIEJTRC1zdHlsZSBsaWNlbnNlIHRoYXQgY2FuIGJlCiMgZm91bmQgaW4gdGhlIExJQ0VOU0UgZmlsZS4KCmltcG9ydCgiLy90ZXN0aW5nL3Rlc3QuZ25pIikKCmdyb3VwKCJybm5vaXNlIikgewogIGRlcHMgPSBbICI6cm5uX3ZhZCIgXQp9Cgpzb3VyY2Vfc2V0KCJybm5fdmFkIikgewogIHNvdXJjZXMgPSBbCiAgICAic3JjL3Jubl9hY3RpdmF0aW9ucy5oIiwKICAgICJzcmMvcm5uX3ZhZF93ZWlnaHRzLmNjIiwKICAgICJzcmMvcm5uX3ZhZF93ZWlnaHRzLmgiLAogIF0KfQo=
|
||||
# Copyright 2018 The Chromium Authors
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
import("//testing/test.gni")
|
||||
|
||||
group("rnnoise") {
|
||||
deps = [ ":rnn_vad" ]
|
||||
}
|
||||
|
||||
source_set("rnn_vad") {
|
||||
sources = [
|
||||
"src/rnn_activations.h",
|
||||
"src/rnn_vad_weights.cc",
|
||||
"src/rnn_vad_weights.h",
|
||||
]
|
||||
}
|
||||
|
Reference in New Issue
Block a user